- Added dfa-optimize-period patch (speedup for . patterns in UTF-8)
- Added glibc-matcher-fallback patch (speedup for [a-z] patterns in UTF-8) - Added mmap-option-fix patch - Added dfa-convert-to-wide-char patch (speedup for -m and remove quadratic complexity when going to glibc) - Added dfa-speedup-digit-xdigit patch (speedup for [[:digit:]] [:xdigit:]])
This commit is contained in:
parent
532439cc73
commit
6ccb1073e9
162
grep-2.6.3-dfa-convert-to-wide-char.patch
Normal file
162
grep-2.6.3-dfa-convert-to-wide-char.patch
Normal file
@ -0,0 +1,162 @@
|
||||
From ff191d4667709b52758fcc5bdc568726d1616be4 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <bonzini@gnu.org>
|
||||
Date: Tue, 4 May 2010 17:26:09 +0200
|
||||
Subject: [PATCH] dfa: convert to wide character line-by-line
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This provides a nice speedup for -m in general, but especially
|
||||
it avoids quadratic complexity in case we have to go to glibc.
|
||||
|
||||
Testcases:
|
||||
|
||||
# From upstream backref-multibyte-slow
|
||||
yes aba | sed 10000q > aba.txt
|
||||
time ./egrep -c '^([a-z]).\1$' aba.txt
|
||||
|
||||
# From rbiba
|
||||
time grep '^[a-f][h-j][l-ž]$' cestina-sorted.txt
|
||||
|
||||
* src/dfa.c (prepare_wc_buf): Extract out of dfaexec. Convert
|
||||
only up to the next newline.
|
||||
(dfaexec): Exit multibyte processing loop if past buf_end.
|
||||
Call prepare_wc_buf again after processing a newline.
|
||||
---
|
||||
src/dfa.c | 96 +++++++++++++++++++++++++++++++++++++-----------------------
|
||||
1 files changed, 59 insertions(+), 37 deletions(-)
|
||||
|
||||
diff --git a/src/dfa.c b/src/dfa.c
|
||||
index 523fe05..70aa5a8 100644
|
||||
--- a/src/dfa.c
|
||||
+++ b/src/dfa.c
|
||||
@@ -2824,6 +2824,53 @@ transit_state (struct dfa *d, int s, unsigned char const **pp)
|
||||
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
+/* Initialize mblen_buf and inputwcs with data from the next line. */
|
||||
+
|
||||
+static void
|
||||
+prepare_wc_buf (const char *begin, const char *end)
|
||||
+{
|
||||
+ unsigned char eol = eolbyte;
|
||||
+ size_t remain_bytes, i;
|
||||
+
|
||||
+ buf_begin = (unsigned char *) begin;
|
||||
+
|
||||
+ remain_bytes = 0;
|
||||
+ for (i = 0; i < end - begin + 1; i++)
|
||||
+ {
|
||||
+ if (remain_bytes == 0)
|
||||
+ {
|
||||
+ remain_bytes
|
||||
+ = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
|
||||
+ if (remain_bytes < 1
|
||||
+ || remain_bytes == (size_t) -1
|
||||
+ || remain_bytes == (size_t) -2
|
||||
+ || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
|
||||
+ {
|
||||
+ remain_bytes = 0;
|
||||
+ inputwcs[i] = (wchar_t)begin[i];
|
||||
+ mblen_buf[i] = 0;
|
||||
+ if (begin[i] == eol)
|
||||
+ break;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ mblen_buf[i] = remain_bytes;
|
||||
+ remain_bytes--;
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ mblen_buf[i] = remain_bytes;
|
||||
+ inputwcs[i] = 0;
|
||||
+ remain_bytes--;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ buf_end = (unsigned char *) (begin + i);
|
||||
+ mblen_buf[i] = 0;
|
||||
+ inputwcs[i] = 0; /* sentinel */
|
||||
+}
|
||||
+
|
||||
/* Search through a buffer looking for a match to the given struct dfa.
|
||||
Find the first occurrence of a string matching the regexp in the
|
||||
buffer, and the shortest possible version thereof. Return a pointer to
|
||||
@@ -2870,43 +2917,10 @@ dfaexec (struct dfa *d, char const *begin, char *end,
|
||||
#ifdef MBS_SUPPORT
|
||||
if (d->mb_cur_max > 1)
|
||||
{
|
||||
- int remain_bytes, i;
|
||||
- buf_begin = (unsigned char *) begin;
|
||||
- buf_end = (unsigned char *) end;
|
||||
-
|
||||
- /* initialize mblen_buf, and inputwcs. */
|
||||
MALLOC(mblen_buf, unsigned char, end - begin + 2);
|
||||
MALLOC(inputwcs, wchar_t, end - begin + 2);
|
||||
memset(&mbs, 0, sizeof(mbstate_t));
|
||||
- remain_bytes = 0;
|
||||
- for (i = 0; i < end - begin + 1; i++)
|
||||
- {
|
||||
- if (remain_bytes == 0)
|
||||
- {
|
||||
- remain_bytes
|
||||
- = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
|
||||
- if (remain_bytes < 1
|
||||
- || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
|
||||
- {
|
||||
- remain_bytes = 0;
|
||||
- inputwcs[i] = (wchar_t)begin[i];
|
||||
- mblen_buf[i] = 0;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- mblen_buf[i] = remain_bytes;
|
||||
- remain_bytes--;
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- mblen_buf[i] = remain_bytes;
|
||||
- inputwcs[i] = 0;
|
||||
- remain_bytes--;
|
||||
- }
|
||||
- }
|
||||
- mblen_buf[i] = 0;
|
||||
- inputwcs[i] = 0; /* sentinel */
|
||||
+ prepare_wc_buf (p, end);
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
@@ -2916,7 +2930,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
|
||||
if (d->mb_cur_max > 1)
|
||||
while ((t = trans[s]))
|
||||
{
|
||||
- if ((char *) p > end)
|
||||
+ if (p > buf_end)
|
||||
break;
|
||||
s1 = s;
|
||||
SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
|
||||
@@ -2985,8 +2999,16 @@ dfaexec (struct dfa *d, char const *begin, char *end,
|
||||
}
|
||||
|
||||
/* If the previous character was a newline, count it. */
|
||||
- if (count && (char *) p <= end && p[-1] == eol)
|
||||
- ++*count;
|
||||
+ if ((char *) p <= end && p[-1] == eol)
|
||||
+ {
|
||||
+ if (count)
|
||||
+ ++*count;
|
||||
+
|
||||
+#ifdef MBS_SUPPORT
|
||||
+ if (d->mb_cur_max > 1)
|
||||
+ prepare_wc_buf (p, end);
|
||||
+#endif
|
||||
+ }
|
||||
|
||||
/* Check if we've run off the end of the buffer. */
|
||||
if ((char *) p > end)
|
||||
--
|
||||
1.6.6.1
|
||||
|
165
grep-2.6.3-dfa-optimize-period.patch
Normal file
165
grep-2.6.3-dfa-optimize-period.patch
Normal file
@ -0,0 +1,165 @@
|
||||
From 01422220ebf40f829c1f00418a96873b82f206ff Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <bonzini@gnu.org>
|
||||
Date: Mon, 19 Apr 2010 14:50:23 +0200
|
||||
Subject: [PATCH 1/2] dfa: optimize UTF-8 period
|
||||
|
||||
Backport of upstream commits 7a0ad00 and 42ac56a.
|
||||
|
||||
* src/dfa.h (struct dfa): Add utf8_anychar_classes.
|
||||
* src/dfa.c (add_utf8_anychar): New.
|
||||
(atom): Simplify if/else nesting. Call add_utf8_anychar for ANYCHAR
|
||||
in UTF-8 locales.
|
||||
(dfaoptimize): Abort on ANYCHAR.
|
||||
---
|
||||
src/dfa.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++---------
|
||||
src/dfa.h | 1 +
|
||||
2 files changed, 82 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/src/dfa.c b/src/dfa.c
|
||||
index ba78b08..e13c361 100644
|
||||
--- a/src/dfa.c
|
||||
+++ b/src/dfa.c
|
||||
@@ -1191,6 +1191,55 @@ addtok_wc (wint_t wc)
|
||||
}
|
||||
#endif
|
||||
|
||||
+static void
|
||||
+add_utf8_anychar (void)
|
||||
+{
|
||||
+ static const charclass utf8_classes[5] = {
|
||||
+ { 0, 0, 0, 0, ~0, ~0, 0, 0 }, /* 80-bf: non-lead bytes */
|
||||
+ { ~0, ~0, ~0, ~0, ~0, ~0, 0, 0xff000000 }, /* 00-bf, f8-ff: 1-byte/invalid */
|
||||
+ { 0, 0, 0, 0, 0, 0, ~0, 0 }, /* c0-df: 2-byte sequence */
|
||||
+ { 0, 0, 0, 0, 0, 0, 0, 0xffff }, /* e0-ef: 3-byte sequence */
|
||||
+ { 0, 0, 0, 0, 0, 0, 0, 0xff0000 } /* f0-f7: 4-byte sequence */
|
||||
+ };
|
||||
+ const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
|
||||
+ unsigned int i;
|
||||
+
|
||||
+ /* Define the five character classes that are needed below. */
|
||||
+ if (dfa->utf8_anychar_classes[0] == 0)
|
||||
+ for (i = 0; i < n; i++)
|
||||
+ {
|
||||
+ charclass c;
|
||||
+ memcpy (c, utf8_classes[i], sizeof c);
|
||||
+ if (i == 1)
|
||||
+ {
|
||||
+ if (!(syntax_bits & RE_DOT_NEWLINE))
|
||||
+ clrbit (eolbyte, c);
|
||||
+ if (syntax_bits & RE_DOT_NOT_NULL)
|
||||
+ clrbit ('\0', c);
|
||||
+ }
|
||||
+ dfa->utf8_anychar_classes[i] = CSET + charclass_index(c);
|
||||
+ }
|
||||
+
|
||||
+ /* A valid UTF-8 character is
|
||||
+
|
||||
+ ([0x00-0x7f]
|
||||
+ |[0xc2-0xdf][0x80-0xbf]
|
||||
+ |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
|
||||
+ |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
|
||||
+
|
||||
+ which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x80-0xbf]
|
||||
+ and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
|
||||
+ Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */
|
||||
+ for (i = 1; i < n; i++)
|
||||
+ addtok (dfa->utf8_anychar_classes[i]);
|
||||
+ while (--i > 1)
|
||||
+ {
|
||||
+ addtok (dfa->utf8_anychar_classes[0]);
|
||||
+ addtok (CAT);
|
||||
+ addtok (OR);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* The grammar understood by the parser is as follows.
|
||||
|
||||
regexp:
|
||||
@@ -1229,8 +1278,12 @@ addtok_wc (wint_t wc)
|
||||
static void
|
||||
atom (void)
|
||||
{
|
||||
+ if (0)
|
||||
+ {
|
||||
+ /* empty */
|
||||
+ }
|
||||
#ifdef MBS_SUPPORT
|
||||
- if (tok == WCHAR)
|
||||
+ else if (tok == WCHAR)
|
||||
{
|
||||
addtok_wc (case_fold ? towlower(wctok) : wctok);
|
||||
#ifndef GREP
|
||||
@@ -1242,16 +1295,28 @@ atom (void)
|
||||
#endif
|
||||
|
||||
tok = lex();
|
||||
- return;
|
||||
+ }
|
||||
+
|
||||
+ else if (tok == ANYCHAR && using_utf8())
|
||||
+ {
|
||||
+ /* For UTF-8 expand the period to a series of CSETs that define a valid
|
||||
+ UTF-8 character. This avoids using the slow multibyte path. I'm
|
||||
+ pretty sure it would be both profitable and correct to do it for
|
||||
+ any encoding; however, the optimization must be done manually as
|
||||
+ it is done above in add_utf8_anychar. So, let's start with
|
||||
+ UTF-8: it is the most used, and the structure of the encoding
|
||||
+ makes the correctness more obvious. */
|
||||
+ add_utf8_anychar();
|
||||
+ tok = lex();
|
||||
}
|
||||
#endif /* MBS_SUPPORT */
|
||||
|
||||
- if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|
||||
- || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
|
||||
+ else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|
||||
+ || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
|
||||
#ifdef MBS_SUPPORT
|
||||
- || tok == ANYCHAR || tok == MBCSET /* MB_CUR_MAX > 1 */
|
||||
+ || tok == ANYCHAR || tok == MBCSET
|
||||
#endif /* MBS_SUPPORT */
|
||||
- || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
|
||||
+ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
|
||||
{
|
||||
addtok(tok);
|
||||
tok = lex();
|
||||
@@ -3027,14 +3092,16 @@ dfaoptimize (struct dfa *d)
|
||||
for (i = 0; i < d->tindex; ++i)
|
||||
{
|
||||
switch(d->tokens[i])
|
||||
- {
|
||||
- case ANYCHAR:
|
||||
- case MBCSET:
|
||||
- /* Requires multi-byte algorithm. */
|
||||
- return;
|
||||
- default:
|
||||
- break;
|
||||
- }
|
||||
+ {
|
||||
+ case ANYCHAR:
|
||||
+ /* Lowered. */
|
||||
+ abort ();
|
||||
+ case MBCSET:
|
||||
+ /* Requires multi-byte algorithm. */
|
||||
+ return;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
free_mbdata (d);
|
||||
diff --git a/src/dfa.h b/src/dfa.h
|
||||
index 1c85207..42c177a 100644
|
||||
--- a/src/dfa.h
|
||||
+++ b/src/dfa.h
|
||||
@@ -283,6 +283,7 @@ struct dfa
|
||||
with dfaparse(). */
|
||||
#ifdef MBS_SUPPORT
|
||||
unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
|
||||
+ int utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
|
||||
|
||||
/* The following are used only if MB_CUR_MAX > 1. */
|
||||
|
||||
--
|
||||
1.6.6.1
|
||||
|
114
grep-2.6.3-dfa-speedup-digit-xdigit.patch
Normal file
114
grep-2.6.3-dfa-speedup-digit-xdigit.patch
Normal file
@ -0,0 +1,114 @@
|
||||
From ebca24d6c9eb12f91eed3993de65945ee97dd467 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <bonzini@gnu.org>
|
||||
Date: Tue, 4 May 2010 18:07:28 +0200
|
||||
Subject: [PATCH] dfa: speed up [[:digit:]] and [[:xdigit:]]
|
||||
|
||||
There's no "multibyte pain" in these two classes, since POSIX
|
||||
and ISO C99 mandate their contents.
|
||||
|
||||
Time for "./grep -x '[[:digit:]]' /usr/share/dict/linux.words"
|
||||
Before: 1.5s, after: 0.07s. (sed manages only 0.5s).
|
||||
|
||||
* src/dfa.c (predicates): Declare struct dfa_ctype separately
|
||||
from definition. Add sb_only.
|
||||
(find_pred): Return const struct dfa_ctype *.
|
||||
(parse_bracket_exp): Return const struct dfa_ctype *. Do
|
||||
not fill MBCSET for sb_only character types.
|
||||
---
|
||||
src/dfa.c | 55 ++++++++++++++++++++++++++++---------------------------
|
||||
1 files changed, 28 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/src/dfa.c b/src/dfa.c
|
||||
index 4dd26c9..da5a306 100644
|
||||
--- a/src/dfa.c
|
||||
+++ b/src/dfa.c
|
||||
@@ -429,26 +429,29 @@ typedef int predicate (int);
|
||||
/* The following list maps the names of the Posix named character classes
|
||||
to predicate functions that determine whether a given character is in
|
||||
the class. The leading [ has already been eaten by the lexical analyzer. */
|
||||
-static struct {
|
||||
+struct dfa_ctype {
|
||||
const char *name;
|
||||
- predicate *pred;
|
||||
-} const prednames[] = {
|
||||
- { "alpha", is_alpha },
|
||||
- { "upper", is_upper },
|
||||
- { "lower", is_lower },
|
||||
- { "digit", is_digit },
|
||||
- { "xdigit", is_xdigit },
|
||||
- { "space", is_space },
|
||||
- { "punct", is_punct },
|
||||
- { "alnum", is_alnum },
|
||||
- { "print", is_print },
|
||||
- { "graph", is_graph },
|
||||
- { "cntrl", is_cntrl },
|
||||
- { "blank", is_blank },
|
||||
- { NULL, NULL }
|
||||
+ predicate *func;
|
||||
+ bool sb_only;
|
||||
};
|
||||
|
||||
-static predicate *
|
||||
+static const struct dfa_ctype prednames[] = {
|
||||
+ { "alpha", isalpha, false },
|
||||
+ { "upper", isupper, false },
|
||||
+ { "lower", islower, false },
|
||||
+ { "digit", isdigit, true },
|
||||
+ { "xdigit", isxdigit, true },
|
||||
+ { "space", isspace, false },
|
||||
+ { "punct", ispunct, false },
|
||||
+ { "alnum", isalnum, false },
|
||||
+ { "print", isprint, false },
|
||||
+ { "graph", isgraph, false },
|
||||
+ { "cntrl", iscntrl, false },
|
||||
+ { "blank", isblank, false },
|
||||
+ { NULL, NULL, false }
|
||||
+};
|
||||
+
|
||||
+static const struct dfa_ctype *
|
||||
find_pred (const char *str)
|
||||
{
|
||||
unsigned int i;
|
||||
@@ -456,7 +459,7 @@ find_pred (const char *str)
|
||||
if (!strcmp(str, prednames[i].name))
|
||||
break;
|
||||
|
||||
- return prednames[i].pred;
|
||||
+ return &prednames[i];
|
||||
}
|
||||
|
||||
/* Multibyte character handling sub-routine for lex.
|
||||
@@ -553,8 +556,11 @@ parse_bracket_exp (void)
|
||||
|| !strcmp (str, "lower"))
|
||||
? "alpha"
|
||||
: str);
|
||||
+ const struct dfa_ctype *pred = find_pred (class);
|
||||
+ if (!pred)
|
||||
+ dfaerror(_("invalid character class"));
|
||||
#ifdef MBS_SUPPORT
|
||||
- if (MB_CUR_MAX > 1)
|
||||
+ if (MB_CUR_MAX > 1 && !pred->sb_only)
|
||||
{
|
||||
/* Store the character class as wctype_t. */
|
||||
wctype_t wt = wctype (class);
|
||||
@@ -568,14 +574,9 @@ parse_bracket_exp (void)
|
||||
}
|
||||
#endif
|
||||
|
||||
- {
|
||||
- predicate *pred = find_pred (class);
|
||||
- if (!pred)
|
||||
- dfaerror(_("invalid character class"));
|
||||
- for (c2 = 0; c2 < NOTCHAR; ++c2)
|
||||
- if ((*pred)(c2))
|
||||
- setbit_case_fold (c2, ccl);
|
||||
- }
|
||||
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
|
||||
+ if (pred->func(c2))
|
||||
+ setbit_case_fold (c2, ccl);
|
||||
}
|
||||
|
||||
#ifdef MBS_SUPPORT
|
||||
--
|
||||
1.6.6.1
|
||||
|
38
grep-2.6.3-glibc-matcher-fallback.patch
Normal file
38
grep-2.6.3-glibc-matcher-fallback.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From 3fca11d78cfa1fec6199936d57871b9db08226ab Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <bonzini@gnu.org>
|
||||
Date: Thu, 29 Apr 2010 17:13:32 +0200
|
||||
Subject: [PATCH 2/2] fall back to glibc matcher if a MBCSET is found
|
||||
|
||||
This patch works around the performance problems of multibyte grep
|
||||
upstream.
|
||||
|
||||
For UTF-8 it should trigger only in the presence of MBCSET, e.g. [a-z].
|
||||
|
||||
For other character sets all brackets and `.` as well will trigger it.
|
||||
---
|
||||
src/dfa.c | 9 +++++++++
|
||||
1 files changed, 9 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/src/dfa.c b/src/dfa.c
|
||||
index e13c361..523fe05 100644
|
||||
--- a/src/dfa.c
|
||||
+++ b/src/dfa.c
|
||||
@@ -2927,6 +2927,15 @@ dfaexec (struct dfa *d, char const *begin, char *end,
|
||||
continue;
|
||||
}
|
||||
|
||||
+ if (backref)
|
||||
+ {
|
||||
+ *backref = 1;
|
||||
+ free(mblen_buf);
|
||||
+ free(inputwcs);
|
||||
+ *end = saved_end;
|
||||
+ return (char *) p;
|
||||
+ }
|
||||
+
|
||||
/* Can match with a multibyte character (and multi character
|
||||
collating element). Transition table might be updated. */
|
||||
s = transit_state(d, s, &p);
|
||||
--
|
||||
1.6.6.1
|
||||
|
26
grep-2.6.3-mmap-option-fix.patch
Normal file
26
grep-2.6.3-mmap-option-fix.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From dfa2891e473abdb5507fff65002f946b60145f44 Mon Sep 17 00:00:00 2001
|
||||
From: Paolo Bonzini <bonzini@gnu.org>
|
||||
Date: Tue, 20 Apr 2010 12:32:22 +0200
|
||||
Subject: [PATCH] grep: fix --mmap not being ignored
|
||||
|
||||
* NEWS: Document bugfix.
|
||||
* main.c (main): Ignore MMAP_OPTION.
|
||||
---
|
||||
src/main.c | 1 +
|
||||
1 files changed, 1 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/src/main.c b/src/main.c
|
||||
index 1697c80..0ccf6eb 100644
|
||||
--- a/src/main.c
|
||||
+++ b/src/main.c
|
||||
@@ -2061,6 +2061,7 @@ main (int argc, char **argv)
|
||||
label = optarg;
|
||||
break;
|
||||
|
||||
+ case MMAP_OPTION:
|
||||
case 0:
|
||||
/* long options */
|
||||
break;
|
||||
--
|
||||
1.6.6.1
|
||||
|
32
grep.spec
32
grep.spec
@ -3,7 +3,7 @@
|
||||
Summary: Pattern matching utilities
|
||||
Name: grep
|
||||
Version: 2.6.3
|
||||
Release: 1%{?dist}
|
||||
Release: 2%{?dist}
|
||||
License: GPLv3+
|
||||
Group: Applications/Text
|
||||
Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz
|
||||
@ -13,6 +13,21 @@ Requires(preun): /sbin/install-info
|
||||
BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
|
||||
BuildRequires: pcre-devel >= 3.9-10, texinfo, gettext
|
||||
BuildRequires: autoconf automake
|
||||
# Speedups DFA UTF-8 period patterns.
|
||||
# original name: 0001-dfa-optimize-UTF-8-period.patch
|
||||
Patch0: grep-2.6.3-dfa-optimize-period.patch
|
||||
# Speedups MBCSET ([a-z]) UTF-8 patterns.
|
||||
# original name: 0002-fall-back-to-glibc-matcher-if-a-MBCSET-is-found.patch
|
||||
Patch1: grep-2.6.3-glibc-matcher-fallback.patch
|
||||
# Deprecated --mmap option is now properly ignored.
|
||||
# 0003-grep-fix-mmap-not-being-ignored.patch
|
||||
Patch2: grep-2.6.3-mmap-option-fix.patch
|
||||
# Speedups -m and removes quadratic complexity when going to glibc.
|
||||
# 0004-dfa-convert-to-wide-character-line-by-line.patch
|
||||
Patch3: grep-2.6.3-dfa-convert-to-wide-char.patch
|
||||
# Speedups DFA [[:digit:]] and [[:xdigit:]] patterns.
|
||||
# 0005-dfa-speed-up-digit-and-xdigit.patch
|
||||
Patch4: grep-2.6.3-dfa-speedup-digit-xdigit.patch
|
||||
|
||||
%description
|
||||
The GNU versions of commonly used grep utilities. Grep searches through
|
||||
@ -23,6 +38,11 @@ GNU grep is needed by many scripts, so it shall be installed on every system.
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
%patch0 -p1 -b .dfa-optimize-period
|
||||
%patch1 -p1 -b .glibc-matcher-fallback
|
||||
%patch2 -p1 -b .mmap-option-fix
|
||||
%patch3 -p1 -b .dfa-convert-to-wide-char
|
||||
%patch4 -p1 -b .speedup-digit-xdigit
|
||||
|
||||
%build
|
||||
%configure --without-included-regex CPPFLAGS="-I%{_includedir}/pcre"
|
||||
@ -59,6 +79,14 @@ fi
|
||||
%{_mandir}/*/*
|
||||
|
||||
%changelog
|
||||
* Tue May 06 2010 Jaroslav Škarvada <jskarvad@redhat.com> - 2.6.3-2
|
||||
- Added dfa-optimize-period patch (speedup for . patterns in UTF-8)
|
||||
- Added glibc-matcher-fallback patch (speedup for [a-z] patterns in UTF-8)
|
||||
- Added mmap-option-fix patch
|
||||
- Added dfa-convert-to-wide-char patch (speedup for -m and remove quadratic
|
||||
complexity when going to glibc)
|
||||
- Added dfa-speedup-digit-xdigit patch (speedup for [[:digit:]] [:xdigit:]])
|
||||
|
||||
* Sun Apr 04 2010 Jaroslav Škarvada <jskarvad@redhat.com> - 2.6.3-1
|
||||
- New version: grep-2.6.3
|
||||
- make check is not silent now
|
||||
@ -77,7 +105,7 @@ fi
|
||||
- Added w patch to fix -w switch behaviour broken by dfa-optional patch
|
||||
|
||||
* Wed Feb 10 2010 Jaroslav Škarvada <jskarvad@redhat.com> - 2.5.4-1
|
||||
- New version: grep-2.5.4 (#502931)
|
||||
- New version: grep-2.5.4
|
||||
- Fixed typos in %description
|
||||
- Updated utf-8 patch
|
||||
- Added dfa-optional patch (#538423)
|
||||
|
Loading…
Reference in New Issue
Block a user