diff --git a/.cvsignore b/.cvsignore index 36fd3fc..7f63c13 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -grep-2.5.4.tar.bz2 +grep-2.6.tar.xz diff --git a/grep-2.5.3-case.patch b/grep-2.5.3-case.patch deleted file mode 100644 index d69abc4..0000000 --- a/grep-2.5.3-case.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 68f09677b7aaedafa8c29280ccd76a034fe269f1 Mon Sep 17 00:00:00 2001 -From: Debian -Date: Sun, 23 Nov 2008 17:28:46 +0100 -Subject: [PATCH] Case-insensitive list matching fix - -This fixes case-insensitive matching of lists in multi-byte character sets. -Original comment: - -fix the following problem in multibyte locales. - % echo Y | egrep -i '[y]' - % - -derived from gawk's dfa.c. - -Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=123363 -Debian: 61-dfa.c-case_fold-charclass.patch ---- - src/dfa.c | 14 ++++++++++++++ - 1 files changed, 14 insertions(+), 0 deletions(-) - -diff --git a/src/dfa.c b/src/dfa.c -index 934be97..088c379 100644 ---- a/src/dfa.c -+++ b/src/dfa.c -@@ -689,6 +689,20 @@ parse_bracket_exp_mb () - REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, - work_mbc->nchars + 1); - work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; -+ if (case_fold && (iswlower((wint_t) wc) || iswupper((wint_t) wc))) -+ { -+ wint_t altcase; -+ -+ altcase = wc; /* keeps compiler happy */ -+ if (iswlower((wint_t) wc)) -+ altcase = towupper((wint_t) wc); -+ else if (iswupper((wint_t) wc)) -+ altcase = towlower((wint_t) wc); -+ -+ REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, -+ work_mbc->nchars + 1); -+ work_mbc->chars[work_mbc->nchars++] = (wchar_t) altcase; -+ } - } - } - while ((wc = wc1) != L']'); --- -1.5.5.1 - diff --git a/grep-2.5.3-egf-speedup.patch b/grep-2.5.3-egf-speedup.patch deleted file mode 100644 index 1e44894..0000000 --- a/grep-2.5.3-egf-speedup.patch +++ /dev/null @@ -1,840 +0,0 @@ -From aac37e1939632dbc7d2ade6f991af7ce103b0cba Mon Sep 17 00:00:00 2001 -From: Tim Waugh -Date: Sun, 23 Nov 2008 17:30:59 +0100 -Subject: [PATCH] EGF Speedup - -The full story behind this patch is that grep-2.5.1a does not handle UTF-8 gracefully at all. The basic plan with handling UTF-8 in 2.5.1a is: - - * whenever a buffer is parsed, go through the entire buffer deciding how many bytes make up each character - * use this information when necessary - -This patch changes that to: - - * when information about how many bytes make up a character is needed, work it out on demand - -On the face of it, this is a small obvious improvement. In fact it is much better than that, because the original scheme would calculate character lengths several times for each buffer: in fact, one full pass for every single potential match! - -For a full discussion of this patch, as well as dfa-optional, including benchmarking results, see the mailing list. - -Upstream ticket: https://savannah.gnu.org/patch/?3803 -Debian: 64-egf-speedup.patch -Debian: 66-match_icase.patch ---- - lib/posix/regex.h | 4 + - src/search.c | 652 +++++++++++++++++++++++++++++++++++++++++++---------- - 2 files changed, 535 insertions(+), 121 deletions(-) - -diff --git a/lib/posix/regex.h b/lib/posix/regex.h -index f4c4150..98df2cb 100644 ---- a/lib/posix/regex.h -+++ b/lib/posix/regex.h -@@ -165,6 +165,10 @@ typedef unsigned long int reg_syntax_t; - treated as 'a\{1'. */ - #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) - -+/* If this bit is set, then ignore case when matching. -+ If not set, then case is significant. */ -+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) -+ - /* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect -diff --git a/src/search.c b/src/search.c -index 7f5f187..9691fb8 100644 ---- a/src/search.c -+++ b/src/search.c -@@ -18,10 +18,15 @@ - - /* Written August 1992 by Mike Haertel. */ - -+#ifndef _GNU_SOURCE -+# define _GNU_SOURCE 1 -+#endif - #ifdef HAVE_CONFIG_H - # include - #endif - -+#include -+ - #include - - #include "mbsupport.h" -@@ -43,6 +48,9 @@ - #ifdef HAVE_LIBPCRE - # include - #endif -+#ifdef HAVE_LANGINFO_CODESET -+# include -+#endif - - #define NCHAR (UCHAR_MAX + 1) - -@@ -68,6 +76,19 @@ kwsinit (void) - error (2, 0, _("memory exhausted")); - } - -+/* UTF-8 encoding allows some optimizations that we can't otherwise -+ assume in a multibyte encoding. */ -+static int using_utf8; -+ -+void -+check_utf8 (void) -+{ -+#ifdef HAVE_LANGINFO_CODESET -+ if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) -+ using_utf8 = 1; -+#endif -+} -+ - #ifndef FGREP_PROGRAM - /* DFA compiled regexp. */ - static struct dfa dfa; -@@ -134,49 +155,6 @@ kwsmusts (void) - } - #endif /* !FGREP_PROGRAM */ - --#ifdef MBS_SUPPORT --/* This function allocate the array which correspond to "buf". -- Then this check multibyte string and mark on the positions which -- are not single byte character nor the first byte of a multibyte -- character. Caller must free the array. */ --static char* --check_multibyte_string(char const *buf, size_t size) --{ -- char *mb_properties = xmalloc(size); -- mbstate_t cur_state; -- wchar_t wc; -- int i; -- -- memset(&cur_state, 0, sizeof(mbstate_t)); -- memset(mb_properties, 0, sizeof(char)*size); -- -- for (i = 0; i < size ;) -- { -- size_t mbclen; -- mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); -- -- if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) -- { -- /* An invalid sequence, or a truncated multibyte character. -- We treat it as a single byte character. */ -- mbclen = 1; -- } -- else if (match_icase) -- { -- if (iswupper((wint_t)wc)) -- { -- wc = towlower((wint_t)wc); -- wcrtomb(buf + i, wc, &cur_state); -- } -- } -- mb_properties[i] = mbclen; -- i += mbclen; -- } -- -- return mb_properties; --} --#endif /* MBS_SUPPORT */ -- - #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) - #ifdef EGREP_PROGRAM - COMPILE_FCT(Ecompile) -@@ -193,10 +171,9 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) - size_t total = size; - char const *motif = pattern; - --#if 0 -+ check_utf8 (); - if (match_icase) - syntax_bits |= RE_ICASE; --#endif - re_set_syntax (syntax_bits); - dfasyntax (syntax_bits, match_icase, eolbyte); - -@@ -303,20 +280,9 @@ EXECUTE_FCT(EGexecute) - struct kwsmatch kwsm; - size_t i, ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- if (start_ptr) -- start_ptr = case_buf + (start_ptr - buf); -- buf = case_buf; -- } -- if (kwset) -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - - buflim = buf + size; -@@ -329,21 +295,63 @@ EXECUTE_FCT(EGexecute) - if (kwset) - { - /* Find a possible match using the KWset matcher. */ -- size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ -+ size_t offset; -+#ifdef MBS_SUPPORT -+ /* kwsexec doesn't work with match_icase and multibyte input. */ -+ if (match_icase && mb_cur_max > 1) -+ /* Avoid kwset */ -+ offset = 0; -+ else -+#endif /* MBS_SUPPORT */ -+ offset = kwsexec (kwset, beg, buflim - beg, &kwsm); - if (offset == (size_t) -1) -- goto failure; -+ return (size_t)-1; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - /* Narrow down to the line containing the candidate, and - run it through DFA. */ - end = memchr(beg, eol, buflim - beg); - end++; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) -+ if (mb_cur_max > 1 && bytes_left) - continue; - #endif - while (beg > buf && beg[-1] != eol) - --beg; -- if (kwsm.index < kwset_exact_matches) -+ if ( -+#ifdef MBS_SUPPORT -+ !(match_icase && mb_cur_max > 1) && -+#endif /* MBS_SUPPORT */ -+ (kwsm.index < kwset_exact_matches)) - goto success; - if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; -@@ -351,13 +359,47 @@ EXECUTE_FCT(EGexecute) - else - { - /* No good fixed strings; start with DFA. */ -+#ifdef MBS_SUPPORT -+ size_t bytes_left = 0; -+#endif /* MBS_SUPPORT */ - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ - beg += offset; - end = memchr (beg, eol, buflim - beg); - end++; -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && bytes_left) -+ continue; -+#endif /* MBS_SUPPORT */ - while (beg > buf && beg[-1] != eol) - --beg; - } -@@ -475,24 +517,144 @@ EXECUTE_FCT(EGexecute) - *match_size = len; - ret_val = beg - buf; - out: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- free((char*)buf); -- if (mb_properties) -- free(mb_properties); -- } --#endif /* MBS_SUPPORT */ - return ret_val; - } - #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ - -+#ifdef MBS_SUPPORT -+static int f_i_multibyte; /* whether we're using the new -Fi MB method */ -+static struct -+{ -+ wchar_t **patterns; -+ size_t count, maxlen; -+ unsigned char *match; -+} Fimb; -+#endif -+ - #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) - COMPILE_FCT(Fcompile) - { -+ int mb_cur_max = MB_CUR_MAX; - char const *beg, *lim, *err; - -+ check_utf8 (); -+#ifdef MBS_SUPPORT -+ /* Support -F -i for UTF-8 input. */ -+ if (match_icase && mb_cur_max > 1) -+ { -+ mbstate_t mbs; -+ wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); -+ const char *patternend = pattern; -+ size_t wcsize; -+ kwset_t fimb_kwset = NULL; -+ char *starts = NULL; -+ wchar_t *wcbeg, *wclim; -+ size_t allocated = 0; -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+# ifdef __GNU_LIBRARY__ -+ wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); -+ if (patternend != pattern + size) -+ wcsize = (size_t) -1; -+# else -+ { -+ char *patterncopy = xmalloc (size + 1); -+ -+ memcpy (patterncopy, pattern, size); -+ patterncopy[size] = '\0'; -+ patternend = patterncopy; -+ wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); -+ if (patternend != patterncopy + size) -+ wcsize = (size_t) -1; -+ free (patterncopy); -+ } -+# endif -+ if (wcsize + 2 <= 2) -+ { -+fimb_fail: -+ free (wcpattern); -+ free (starts); -+ if (fimb_kwset) -+ kwsfree (fimb_kwset); -+ free (Fimb.patterns); -+ Fimb.patterns = NULL; -+ } -+ else -+ { -+ if (!(fimb_kwset = kwsalloc (NULL))) -+ error (2, 0, _("memory exhausted")); -+ -+ starts = xmalloc (mb_cur_max * 3); -+ wcbeg = wcpattern; -+ do -+ { -+ int i; -+ size_t wclen; -+ -+ if (Fimb.count >= allocated) -+ { -+ if (allocated == 0) -+ allocated = 128; -+ else -+ allocated *= 2; -+ Fimb.patterns = xrealloc (Fimb.patterns, -+ sizeof (wchar_t *) * allocated); -+ } -+ Fimb.patterns[Fimb.count++] = wcbeg; -+ for (wclim = wcbeg; -+ wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) -+ *wclim = towlower (*wclim); -+ *wclim = L'\0'; -+ wclen = wclim - wcbeg; -+ if (wclen > Fimb.maxlen) -+ Fimb.maxlen = wclen; -+ if (wclen > 3) -+ wclen = 3; -+ if (wclen == 0) -+ { -+ if ((err = kwsincr (fimb_kwset, "", 0)) != 0) -+ error (2, 0, err); -+ } -+ else -+ for (i = 0; i < (1 << wclen); i++) -+ { -+ char *p = starts; -+ int j, k; -+ -+ for (j = 0; j < wclen; ++j) -+ { -+ wchar_t wc = wcbeg[j]; -+ if (i & (1 << j)) -+ { -+ wc = towupper (wc); -+ if (wc == wcbeg[j]) -+ continue; -+ } -+ k = wctomb (p, wc); -+ if (k <= 0) -+ goto fimb_fail; -+ p += k; -+ } -+ if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) -+ error (2, 0, err); -+ } -+ if (wclim < wcpattern + wcsize) -+ ++wclim; -+ wcbeg = wclim; -+ } -+ while (wcbeg < wcpattern + wcsize); -+ f_i_multibyte = 1; -+ kwset = fimb_kwset; -+ free (starts); -+ Fimb.match = xmalloc (Fimb.count); -+ if ((err = kwsprep (kwset)) != 0) -+ error (2, 0, err); -+ return; -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ -+ - kwsinit (); - beg = pattern; - do -@@ -511,6 +673,76 @@ COMPILE_FCT(Fcompile) - error (2, 0, err); - } - -+#ifdef MBS_SUPPORT -+static int -+Fimbexec (const char *buf, size_t size, size_t *plen, int exact) -+{ -+ size_t len, letter, i; -+ int ret = -1; -+ mbstate_t mbs; -+ wchar_t wc; -+ int patterns_left; -+ -+ assert (match_icase && f_i_multibyte == 1); -+ assert (MB_CUR_MAX > 1); -+ -+ memset (&mbs, '\0', sizeof (mbs)); -+ memset (Fimb.match, '\1', Fimb.count); -+ letter = len = 0; -+ patterns_left = 1; -+ while (patterns_left && len <= size) -+ { -+ size_t c; -+ -+ patterns_left = 0; -+ if (len < size) -+ { -+ c = mbrtowc (&wc, buf + len, size - len, &mbs); -+ if (c + 2 <= 2) -+ return ret; -+ -+ wc = towlower (wc); -+ } -+ else -+ { -+ c = 1; -+ wc = L'\0'; -+ } -+ -+ for (i = 0; i < Fimb.count; i++) -+ { -+ if (Fimb.match[i]) -+ { -+ if (Fimb.patterns[i][letter] == L'\0') -+ { -+ /* Found a match. */ -+ *plen = len; -+ if (!exact && !match_words) -+ return 0; -+ else -+ { -+ /* For -w or exact look for longest match. */ -+ ret = 0; -+ Fimb.match[i] = '\0'; -+ continue; -+ } -+ } -+ -+ if (Fimb.patterns[i][letter] == wc) -+ patterns_left = 1; -+ else -+ Fimb.match[i] = '\0'; -+ } -+ } -+ -+ len += c; -+ letter++; -+ } -+ -+ return ret; -+} -+#endif /* MBS_SUPPORT */ -+ - EXECUTE_FCT(Fexecute) - { - register char const *beg, *try, *end; -@@ -519,69 +751,256 @@ EXECUTE_FCT(Fexecute) - struct kwsmatch kwsmatch; - size_t ret_val; - #ifdef MBS_SUPPORT -- char *mb_properties = NULL; -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- { -- char *case_buf = xmalloc(size); -- memcpy(case_buf, buf, size); -- if (start_ptr) -- start_ptr = case_buf + (start_ptr - buf); -- buf = case_buf; -- } -- mb_properties = check_multibyte_string(buf, size); -- } -+ int mb_cur_max = MB_CUR_MAX; -+ mbstate_t mbs; -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - - for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) - { - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); - if (offset == (size_t) -1) -- goto failure; -+ return offset; - #ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) -- continue; /* It is a part of multibyte character. */ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ /* Offset points inside multibyte character: no good. */ -+ break; -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ continue; -+ } -+ else - #endif /* MBS_SUPPORT */ - beg += offset; -+#ifdef MBS_SUPPORT -+ /* For f_i_multibyte, the string at beg now matches first 3 chars of -+ one of the search strings (less if there are shorter search strings). -+ See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ - len = kwsmatch.size[0]; - if (start_ptr && !match_words) - goto success_in_beg_and_len; - if (match_lines) - { - if (beg > buf && beg[-1] != eol) -- continue; -+ goto next_char; - if (beg + len < buf + size && beg[len] != eol) -- continue; -+ goto next_char; - goto success; - } - else if (match_words) -- for (try = beg; len; ) -- { -- if (try > buf && WCHAR((unsigned char) try[-1])) -- break; -- if (try + len < buf + size && WCHAR((unsigned char) try[len])) -- { -- offset = kwsexec (kwset, beg, --len, &kwsmatch); -- if (offset == (size_t) -1) -- break; -- try = beg + offset; -- len = kwsmatch.size[0]; -- } -- else if (!start_ptr) -- goto success; -- else -- goto success_in_beg_and_len; -- } /* for (try) */ -- else -+ { -+ while (len) -+ { -+ int word_match = 0; -+ if (beg > buf) -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ -+ if (using_utf8) -+ { -+ s = beg - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, beg - s); -+ if (mr <= 0) -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ else if ((iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (beg - s)) -+ goto next_char; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (WCHAR ((unsigned char) beg[-1])) -+ goto next_char; -+ } -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, beg + len, buf + size - beg - len); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ word_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ word_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) -+ word_match = 1; -+ if (word_match) -+ { -+ if (start_ptr == NULL) -+ /* Returns the whole line now we know there's a word match. */ -+ goto success; -+ else { -+ /* Returns just this word match. */ -+ *match_size = len; -+ return beg - buf; -+ } -+ } -+ if (len > 0) -+ { -+ /* Try a shorter length anchored at the same place. */ -+ --len; -+ offset = kwsexec (kwset, beg, len, &kwsmatch); -+ -+ if (offset == -1) -+ goto next_char; /* Try a different anchor. */ -+#ifdef MBS_SUPPORT -+ -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ size_t bytes_left = offset; -+ while (bytes_left) -+ { -+ size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; -+ if (mlen == (size_t) -1 || mlen == 0) -+ { -+ /* Incomplete character: treat as single-byte. */ -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ beg++; -+ bytes_left--; -+ continue; -+ } -+ -+ if (mlen == (size_t) -2) -+ { -+ /* Offset points inside multibyte character: -+ * no good. */ -+ break; -+ } -+ -+ beg += mlen; -+ bytes_left -= mlen; -+ } -+ -+ if (bytes_left) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ goto next_char; /* Try a different anchor. */ -+ } -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ beg += offset; -+#ifdef MBS_SUPPORT -+ /* The string at beg now matches first 3 chars of one of -+ the search strings (less if there are shorter search -+ strings). See if this is a real match. */ -+ if (f_i_multibyte -+ && Fimbexec (beg, len - offset, &kwsmatch.size[0], -+ start_ptr == NULL)) -+ goto next_char; -+#endif /* MBS_SUPPORT */ -+ len = kwsmatch.size[0]; -+ } -+ } -+ } -+ else - goto success; -- } /* for (beg in buf) */ -+next_char:; -+#ifdef MBS_SUPPORT -+ /* Advance to next character. For MB_CUR_MAX == 1 case this is handled -+ by ++beg above. */ -+ if (mb_cur_max > 1) -+ { -+ if (using_utf8) -+ { -+ unsigned char c = *beg; -+ if (c >= 0xc2) -+ { -+ if (c < 0xe0) -+ ++beg; -+ else if (c < 0xf0) -+ beg += 2; -+ else if (c < 0xf8) -+ beg += 3; -+ else if (c < 0xfc) -+ beg += 4; -+ else if (c < 0xfe) -+ beg += 5; -+ } -+ } -+ else -+ { -+ size_t l = mbrlen (beg, buf + size - beg, &mbs); - -- failure: -- ret_val = -1; -- goto out; -+ last_char = beg; -+ if (l + 2 >= 2) -+ beg += l - 1; -+ else -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ } -+ } -+#endif /* MBS_SUPPORT */ -+ } -+ -+ return -1; - - success: -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1 && !using_utf8) -+ { -+ end = beg + len; -+ while (end < buf + size) -+ { -+ size_t mlen = mbrlen (end, buf + size - end, &mbs); -+ if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ mlen = 1; -+ } -+ if (mlen == 1 && *end == eol) -+ break; -+ -+ end += mlen; -+ } -+ } -+ else -+ #endif /* MBS_SUPPORT */ - end = memchr (beg + len, eol, (buf + size) - (beg + len)); - end++; - while (buf < beg && beg[-1] != eol) -@@ -591,15 +1010,6 @@ EXECUTE_FCT(Fexecute) - *match_size = len; - ret_val = beg - buf; - out: --#ifdef MBS_SUPPORT -- if (MB_CUR_MAX > 1) -- { -- if (match_icase) -- free((char*)buf); -- if (mb_properties) -- free(mb_properties); -- } --#endif /* MBS_SUPPORT */ - return ret_val; - } - #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ --- -1.5.5.1 - diff --git a/grep-2.5.3-fedora-tests.patch b/grep-2.5.3-fedora-tests.patch deleted file mode 100644 index d39f38d..0000000 --- a/grep-2.5.3-fedora-tests.patch +++ /dev/null @@ -1,146 +0,0 @@ -From aad5428ee639eb9c462097022a0954e8a2bcfb90 Mon Sep 17 00:00:00 2001 -From: Lubomir Rintel -Date: Sun, 23 Nov 2008 17:24:13 +0100 -Subject: [PATCH] Fedora Grep regression test suite - ---- - tests/Makefile.am | 2 +- - tests/fedora.sh | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 114 insertions(+), 1 deletions(-) - create mode 100644 tests/fedora.sh - -diff --git a/tests/Makefile.am b/tests/Makefile.am -index 6ac60e6..02f30ac 100644 ---- a/tests/Makefile.am -+++ b/tests/Makefile.am -@@ -4,7 +4,7 @@ AWK=@AWK@ - - TESTS = warning.sh khadafy.sh spencer1.sh bre.sh ere.sh pcre.sh \ - status.sh empty.sh options.sh backref.sh file.sh foad1.sh \ -- fmbtest.sh yesno.sh -+ fmbtest.sh yesno.sh fedora.sh - EXTRA_DIST = $(TESTS) \ - khadafy.lines khadafy.regexp \ - spencer1.awk spencer1.tests \ -diff --git a/tests/fedora.sh b/tests/fedora.sh -new file mode 100644 -index 0000000..9a140b7 ---- /dev/null -+++ b/tests/fedora.sh -@@ -0,0 +1,113 @@ -+#!/bin/bash -+ -+# GREP Regression test suite for Fedora bugs and fixes -+# (c) 2008 Lubomir Rintel -+# Licensed under the same terms as GNU Grep itself -+ -+if [ -t 1 ] -+then -+ # Colored output on terimal -+ G='\033[32m' -+ R='\033[31m' -+ D='\033[0m' -+fi -+ -+ok () { echo -e "${G}OK${D}"; } -+fail () { echo -e "${R}FAIL${D} (See ${U})"; failures=1; } -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=116909 -+echo -n "fgrep false negatives: " -+diff <(${GREP} -F -w -f \ -+<(cat <' | LANG=it_IT ${GREP} -iE '\Wform\W') \ -+<(cat < -+EOF -+) && ok || fail -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=189580 -+echo -n "grep -D skip opening a special file: " -+${GREP} -D skip foo /dev/zero & -+sleep 1 -+kill $! 2>/dev/null && fail || ok -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=169524 -+echo -n "grep -Fw looping infinitely: " -+echo foobar | ${GREP} -Fw "" & -+sleep 1 -+kill $! 2>/dev/null && fail || ok -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=140781 -+echo -n "fgrep hangs on binary files: " -+${GREP} -F grep $(which ${GREP}) >/dev/null & -+sleep 1 -+kill $! 2>/dev/null && fail || ok -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=161700 -+echo -n "grep -Fw fails to match anything: " -+diff <(${GREP} -Fw test <(cat </dev/null || exit $failures -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=171379 -+echo -n "grep -P crashes on whitespace lines: " -+diff <(${GREP} -P '^\s+$' <(echo " ")) \ -+<(echo " ") && ok || fail -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=204255 -+echo -n "-e '' does not work if not a first parameter: " -+diff <(echo test | grep -e 'HighlightThis' -e '') \ -+ <(echo test | grep -e '' -e 'HighlightThis') && ok || fail -+ -+U=https://bugzilla.redhat.com/show_bug.cgi?id=324781 -+echo -n "bad handling of line breaks with grep -P #1: " -+echo -ne "a\na" | ${GREP} -P '[^a]' >/dev/null && fail || ok -+ -+# This is mostly a check that fix for above doesn't break -P further -+echo -n "bad handling of line breaks with grep -P #2: " -+echo -ne "a\na" | ${GREP} -P '[^b].[^b]' >/dev/null && fail || ok -+ -+exit $failures --- -1.5.5.1 - diff --git a/grep-2.5.3-pcrewrap.patch b/grep-2.5.3-pcrewrap.patch deleted file mode 100644 index 001023e..0000000 --- a/grep-2.5.3-pcrewrap.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 0c56b664a73764ed01607f47731c8e4607f478d5 Mon Sep 17 00:00:00 2001 -From: Lubomir Rintel -Date: Sun, 23 Nov 2008 17:25:57 +0100 -Subject: [PATCH] Fix line wrapping in PCRE backend - -PCRE can't limit the matching to space between newlines (i.e -[^a] will allways match newline, see pcreposix(3) for details), -therefore whe have to split the buffer into lines and match each -line in the buffer separately. - -Original ticket: https://bugzilla.redhat.com/show_bug.cgi?id=324781 ---- - src/search.c | 33 ++++++++++++++++++++++++++++----- - 1 files changed, 28 insertions(+), 5 deletions(-) - -diff --git a/src/search.c b/src/search.c -index 0b3e0e8..7f5f187 100644 ---- a/src/search.c -+++ b/src/search.c -@@ -689,9 +689,32 @@ EXECUTE_FCT(Pexecute) - is just for performance improvement in pcre_exec. */ - int sub[300]; - -- int e = pcre_exec (cre, extra, buf, size, -- start_ptr ? (start_ptr - buf) : 0, 0, -- sub, sizeof sub / sizeof *sub); -+ char *line_buf = buf; -+ int line_size = 0; -+ int e = 0; -+ -+ /* PCRE can't limit the matching to space between newlines (i.e -+ [^a] will allways match newline, see pcreposix(3) for details), -+ therefore whe have to match each line in the buffer separately */ -+ do { -+ /* We're not at the of buffer or end of line, get another char */ -+ if (line_buf + line_size < buf + size && line_buf[line_size++] != eolbyte) { -+ continue; -+ } -+ -+ /* Match the part of buffer that constitutes a line */ -+ e = pcre_exec (cre, extra, line_buf, line_size - 1, -+ start_ptr ? (start_ptr - buf) : 0, 0, -+ sub, sizeof sub / sizeof *sub); -+ -+ /* Don't try other lines if this one matched or returned an error */ -+ if (e != PCRE_ERROR_NOMATCH) -+ break; -+ -+ /* Wrap up */ -+ line_buf += line_size; -+ line_size = 0; -+ } while (line_buf < buf + size); - - if (e <= 0) - { -@@ -710,8 +733,8 @@ EXECUTE_FCT(Pexecute) - else - { - /* Narrow down to the line we've found. */ -- char const *beg = buf + sub[0]; -- char const *end = buf + sub[1]; -+ char const *beg = line_buf + sub[0]; -+ char const *end = line_buf + sub[1]; - char const *buflim = buf + size; - char eol = eolbyte; - if (!start_ptr) --- -1.5.5.1 - diff --git a/grep-2.5.4-dfa-optional.patch b/grep-2.5.4-dfa-optional.patch deleted file mode 100644 index a01a99a..0000000 --- a/grep-2.5.4-dfa-optional.patch +++ /dev/null @@ -1,68 +0,0 @@ ---- ./src/search.c.old 2010-02-12 14:29:01.000000000 +0100 -+++ ./src/search.c 2010-02-12 14:29:18.207410166 +0100 -@@ -278,6 +278,8 @@ - char eol = eolbyte; - int backref, start, len, best_len; - struct kwsmatch kwsm; -+ static int use_dfa; -+ static int use_dfa_checked = 0; - size_t i, ret_val; - #ifdef MBS_SUPPORT - int mb_cur_max = MB_CUR_MAX; -@@ -285,6 +287,26 @@ - memset (&mbs, '\0', sizeof (mbstate_t)); - #endif /* MBS_SUPPORT */ - -+ if (!use_dfa_checked) -+ { -+ char *grep_use_dfa = getenv ("GREP_USE_DFA"); -+ if (!grep_use_dfa) -+ { -+#ifdef MBS_SUPPORT -+ /* Turn off DFA when processing multibyte input. */ -+ use_dfa = (MB_CUR_MAX == 1); -+#else -+ use_dfa = 1; -+#endif /* MBS_SUPPORT */ -+ } -+ else -+ { -+ use_dfa = atoi (grep_use_dfa); -+ } -+ -+ use_dfa_checked = 1; -+ } -+ - buflim = buf + size; - - for (beg = end = buf; end < buflim; beg = end) -@@ -353,7 +375,8 @@ - #endif /* MBS_SUPPORT */ - (kwsm.index < kwset_exact_matches)) - goto success; -- if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) -+ if (use_dfa && -+ dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) - continue; - } - else -@@ -362,7 +385,9 @@ - #ifdef MBS_SUPPORT - size_t bytes_left = 0; - #endif /* MBS_SUPPORT */ -- size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); -+ size_t offset = 0; -+ if (use_dfa) -+ offset = dfaexec (&dfa, beg, buflim - beg, &backref); - if (offset == (size_t) -1) - break; - /* Narrow down to the line we've found. */ -@@ -404,7 +429,7 @@ - --beg; - } - /* Successful, no backreferences encountered! */ -- if (!backref) -+ if (use_dfa && !backref) - goto success; - } - else diff --git a/grep-2.5.4-utf8.patch b/grep-2.5.4-utf8.patch deleted file mode 100644 index 6b31597..0000000 --- a/grep-2.5.4-utf8.patch +++ /dev/null @@ -1,14 +0,0 @@ -From latin1 to utf8 - -diff -up grep-2.5.4/AUTHORS.utf8 grep-2.5.4/AUTHORS ---- grep-2.5.1a/AUTHORS.utf8 2008-10-01 18:42:53.000000000 +0200 -+++ grep-2.5.1a/AUTHORS 2008-10-01 18:43:12.000000000 +0200 -@@ -40,7 +40,7 @@ send me email. - - Alain Magloire maintained GNU grep until version 2.5e. - --Bernhard "Bero" Rosenkränzer maintained GNU grep until -+Bernhard "Bero" Rosenkränzer maintained GNU grep until - version 2.5.1, ie. from Sep 2001 till 2003. - - Stepan Kasal maintained GNU grep since Feb 2004. diff --git a/grep-2.5.4-w.patch b/grep-2.5.4-w.patch deleted file mode 100644 index beb8407..0000000 --- a/grep-2.5.4-w.patch +++ /dev/null @@ -1,117 +0,0 @@ ---- a/src/search.c 2007-10-01 14:47:55.000000000 +0200 -+++ b/src/search.c 2007-09-30 23:38:45.000000000 +0200 -@@ -282,6 +282,7 @@ - static int use_dfa_checked = 0; - size_t i, ret_val; - #ifdef MBS_SUPPORT -+ const char *last_char = NULL; - int mb_cur_max = MB_CUR_MAX; - mbstate_t mbs; - memset (&mbs, '\0', sizeof (mbstate_t)); -@@ -338,6 +341,8 @@ - while (bytes_left) - { - size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; - if (mlen == (size_t) -1 || mlen == 0) - { - /* Incomplete character: treat as single-byte. */ -@@ -398,6 +403,8 @@ - while (bytes_left) - { - size_t mlen = mbrlen (beg, bytes_left, &mbs); -+ -+ last_char = beg; - if (mlen == (size_t) -1 || mlen == 0) - { - /* Incomplete character: treat as single-byte. */ -@@ -475,10 +483,84 @@ - if (match_words) - while (match <= best_match) - { -+ int lword_match = 0; -+ if (match == buf) -+ lword_match = 1; -+ else -+ { -+ assert (start > 0); -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ const char *s; -+ int mr; -+ wchar_t pwc; -+ if (using_utf8) -+ { -+ s = match - 1; -+ while (s > buf -+ && (unsigned char) *s >= 0x80 -+ && (unsigned char) *s <= 0xbf) -+ --s; -+ } -+ else -+ s = last_char; -+ mr = mbtowc (&pwc, s, match - s); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ lword_match = 1; -+ } -+ else if (!(iswalnum (pwc) || pwc == L'_') -+ && mr == (int) (match - s)) -+ lword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) match[-1])) -+ lword_match = 1; -+ } -+ -+ if (lword_match) -+ { -+ int rword_match = 0; -+ if (start + len == end - beg - 1) -+ rword_match = 1; -+ else -+ { -+#ifdef MBS_SUPPORT -+ if (mb_cur_max > 1) -+ { -+ wchar_t nwc; -+ int mr; -+ -+ mr = mbtowc (&nwc, buf + start + len, -+ end - buf - start - len - 1); -+ if (mr <= 0) -+ { -+ memset (&mbs, '\0', sizeof (mbstate_t)); -+ rword_match = 1; -+ } -+ else if (!iswalnum (nwc) && nwc != L'_') -+ rword_match = 1; -+ } -+ else -+#endif /* MBS_SUPPORT */ -+ if (!WCHAR ((unsigned char) match[len])) -+ rword_match = 1; -+ } -+ -+ if (rword_match) -+ { -+ if (!start_ptr) -+ /* Returns the whole line. */ -+ goto success; -+ else -+ { -+ goto assess_pattern_match; -+ } -+ } -+ } -- if ((match == buf || !WCHAR ((unsigned char) match[-1])) -- && (len == end - beg - 1 -- || !WCHAR ((unsigned char) match[len]))) -- goto assess_pattern_match; - if (len > 0) - { - /* Try a shorter length anchored at the same place. */ diff --git a/grep-2.6-sigsegv.patch b/grep-2.6-sigsegv.patch new file mode 100644 index 0000000..59ab50d --- /dev/null +++ b/grep-2.6-sigsegv.patch @@ -0,0 +1,20 @@ +--- a/src/dfa.c 2010-03-22 17:22:51.000000000 +0100 ++++ b/src/dfa.c 2010-03-23 16:55:43.974271709 +0100 +@@ -243,7 +243,7 @@ + For MB_CUR_MAX > 1, one or both of the two cases may not be set, + so the resulting charset may only be used as an optimization. */ + static void +-setbit_case_fold (unsigned int b, charclass c) ++setbit_case_fold (wint_t b, charclass c) + { + if (case_fold) + { +@@ -691,7 +691,7 @@ + continue; + } + +- setbit_case_fold (c, ccl); ++ setbit_case_fold (wc, ccl); + #ifdef MBS_SUPPORT + /* Build normal characters. */ + if (MB_CUR_MAX > 1) diff --git a/grep-bz460641.patch b/grep-bz460641.patch deleted file mode 100644 index 4f1565f..0000000 --- a/grep-bz460641.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up grep-2.5.3/src/search.c.bz479152 grep-2.5.3/src/search.c ---- grep-2.5.3/src/search.c.bz479152 2009-01-08 09:57:12.000000000 +0100 -+++ grep-2.5.3/src/search.c 2009-01-08 09:58:57.000000000 +0100 -@@ -757,7 +757,7 @@ EXECUTE_FCT(Fexecute) - const char *last_char = NULL; - #endif /* MBS_SUPPORT */ - -- for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) -+ for (beg = start_ptr ? start_ptr : buf; beg < buf + size; beg++) - { - size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); - if (offset == (size_t) -1) diff --git a/grep.spec b/grep.spec index e2c7dce..2ced74f 100644 --- a/grep.spec +++ b/grep.spec @@ -2,21 +2,12 @@ Summary: Pattern matching utilities Name: grep -Version: 2.5.4 -Release: 2%{?dist} +Version: 2.6 +Release: 1%{?dist} License: GPLv3+ Group: Applications/Text -Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.bz2 -Patch0: grep-2.5.3-fedora-tests.patch -Patch2: grep-2.5.3-pcrewrap.patch -Patch3: grep-2.5.3-case.patch -Patch4: grep-2.5.3-egf-speedup.patch -# bug #460641 (a.k.a. 479152) -Patch40: grep-bz460641.patch -Patch5: grep-2.5.4-utf8.patch -Patch6: grep-2.5.4-dfa-optional.patch -# fix -w switch behaviour broken by dfa-optional.patch -Patch13: grep-2.5.4-w.patch +Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz +Patch0: grep-2.6-sigsegv.patch URL: http://www.gnu.org/software/grep/ Requires(post): /sbin/install-info Requires(preun): /sbin/install-info @@ -33,14 +24,7 @@ GNU grep is needed by many scripts, so it shall be installed on every system. %prep %setup -q -%patch0 -p1 -b .fedora-tests -%patch2 -p1 -b .pcrewrap -%patch3 -p1 -b .case -%patch4 -p1 -b .egf-speedup -%patch40 -p1 -b .bz460641 -%patch5 -p1 -b .utf8 -%patch6 -p1 -b .dfa-optional -%patch13 -p1 -b .w +%patch0 -p1 -b .sigsegv %build %configure --without-included-regex CPPFLAGS="-I%{_includedir}/pcre" @@ -62,11 +46,9 @@ make check ||: rm -rf ${RPM_BUILD_ROOT} %post -#exec >/dev/null 2>&1 /sbin/install-info --quiet --info-dir=%{_infodir} %{_infodir}/grep.info.gz || : %preun -#exec >/dev/null 2>&1 if [ $1 = 0 ]; then /sbin/install-info --quiet --info-dir=%{_infodir} --delete %{_infodir}/grep.info.gz || : fi @@ -80,6 +62,12 @@ fi %{_mandir}/*/* %changelog +* Tue Mar 23 2010 Jaroslav Å karvada - 2.6-1 +- New version: grep-2.6 +- Added sigsegv patch (after release patch from upstream) +- Dropped obsoleted patches: fedora-tests, pcrewrap, case, egf-speedup, + bz460641, utf8, dfa-optional, w + * Fri Mar 05 2010 Jaroslav Å karvada - 2.5.4-2 - Added w patch to fix -w switch behaviour broken by dfa-optional patch diff --git a/sources b/sources index bddeb32..63cd508 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -5650ee2ae6ea4b39e9459d7d0585b315 grep-2.5.4.tar.bz2 +a55bd2d80f7b76563f0f069c52aa730e grep-2.6.tar.xz