New version

Resolves: rhbz#1167657
- De-fuzzified patches
- Dropped pcre-backported-fixes patch (not needed)
This commit is contained in:
Jaroslav Škarvada 2014-11-25 11:09:35 +01:00
parent 23be49cbbf
commit 712f266053
5 changed files with 22 additions and 407 deletions

View File

@ -1,389 +0,0 @@
diff --git a/src/grep.h b/src/grep.h
index 4935872..729c906 100644
--- a/src/grep.h
+++ b/src/grep.h
@@ -27,4 +27,19 @@ extern int match_words; /* -w */
extern int match_lines; /* -x */
extern unsigned char eolbyte; /* -z */
+/* An enum textbin describes the file's type, inferred from data read
+ before the first line is selected for output. */
+enum textbin
+ {
+ /* Binary, as it contains null bytes and the -z option is not in effect,
+ or it contains encoding errors. */
+ TEXTBIN_BINARY = -1,
+
+ /* Not known yet. Only text has been seen so far. */
+ TEXTBIN_UNKNOWN = 0,
+
+ /* Text. */
+ TEXTBIN_TEXT = 1
+ };
+
#endif
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 820dd00..9938ffc 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -33,13 +33,19 @@ static pcre *cre;
/* Additional information about the pattern. */
static pcre_extra *extra;
-# ifdef PCRE_STUDY_JIT_COMPILE
-static pcre_jit_stack *jit_stack;
-# else
+# ifndef PCRE_STUDY_JIT_COMPILE
# define PCRE_STUDY_JIT_COMPILE 0
# endif
#endif
+/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
+ string matches when that flag is used. */
+static int empty_match[2];
+
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -52,13 +58,17 @@ Pcompile (char const *pattern, size_t size)
char const *ep;
char *re = xnmalloc (4, size + 7);
int flags = (PCRE_MULTILINE
- | (match_icase ? PCRE_CASELESS : 0)
- | (using_utf8 () ? PCRE_UTF8 : 0));
+ | (match_icase ? PCRE_CASELESS : 0));
char const *patlim = pattern + size;
char *n = re;
char const *p;
char const *pnul;
+ if (using_utf8 ())
+ flags |= PCRE_UTF8;
+ else if (MB_CUR_MAX != 1)
+ error (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
+
/* FIXME: Remove these restrictions. */
if (memchr (pattern, '\n', size))
error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
@@ -114,14 +124,20 @@ Pcompile (char const *pattern, size_t size)
/* A 32K stack is allocated for the machine code by default, which
can grow to 512K if necessary. Since JIT uses far less memory
than the interpreter, this should be enough in practice. */
- jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
+ pcre_jit_stack *jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
if (!jit_stack)
error (EXIT_TROUBLE, 0,
_("failed to allocate memory for the PCRE JIT stack"));
pcre_assign_jit_stack (extra, NULL, jit_stack);
}
+
# endif
free (re);
+
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -134,36 +150,110 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- int sub[300];
-
- const char *line_buf, *line_end, *line_next;
+ int sub[NSUB];
+ char const *p = start_ptr ? start_ptr : buf;
+ bool bol = p[-1] == eolbyte;
+ char const *line_start = buf;
int e = PCRE_ERROR_NOMATCH;
- ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;
+ char const *line_end;
- /* PCRE can't limit the matching to single lines, therefore we have to
- match each line in the buffer separately. */
- for (line_next = buf;
- e == PCRE_ERROR_NOMATCH && line_next < buf + size;
- start_ofs -= line_next - line_buf)
+ /* If the input type is unknown, the caller is still testing the
+ input, which means the current buffer cannot contain encoding
+ errors and a multiline search is typically more efficient.
+ Otherwise, a single-line search is typically faster, so that
+ pcre_exec doesn't waste time validating the entire input
+ buffer. */
+ bool multiline = TEXTBIN_UNKNOWN;
+
+ for (; p < buf + size; p = line_start = line_end + 1)
{
- line_buf = line_next;
- line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
- if (line_end == NULL)
- line_next = line_end = buf + size;
- else
- line_next = line_end + 1;
+ bool too_big;
- if (start_ptr && start_ptr >= line_end)
- continue;
+ if (multiline)
+ {
+ size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);
+ size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);
+ line_end = memrchr (p, eolbyte, scan_size);
+ too_big = ! line_end;
+ }
+ else
+ {
+ line_end = memchr (p, eolbyte, buf + size - p);
+ too_big = INT_MAX < line_end - p;
+ }
- if (INT_MAX < line_end - line_buf)
+ if (too_big)
error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
- e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
- start_ofs < 0 ? 0 : start_ofs, 0,
- sub, sizeof sub / sizeof *sub);
+ for (;;)
+ {
+ /* Skip past bytes that are easily determined to be encoding
+ errors, treating them as data that cannot match. This is
+ faster than having pcre_exec check them. */
+ while (mbclen_cache[to_uchar (*p)] == (size_t) -1)
+ {
+ p++;
+ bol = false;
+ }
+
+ /* Check for an empty match; this is faster than letting
+ pcre_exec do it. */
+ int search_bytes = line_end - p;
+ if (search_bytes == 0)
+ {
+ sub[0] = sub[1] = 0;
+ e = empty_match[bol];
+ break;
+ }
+
+ int options = 0;
+ if (!bol)
+ options |= PCRE_NOTBOL;
+ if (multiline)
+ options |= PCRE_NO_UTF8_CHECK;
+
+ e = pcre_exec (cre, extra, p, search_bytes, 0,
+ options, sub, NSUB);
+ if (e != PCRE_ERROR_BADUTF8)
+ {
+ if (0 < e && multiline && sub[1] - sub[0] != 0)
+ {
+ char const *nl = memchr (p + sub[0], eolbyte,
+ sub[1] - sub[0]);
+ if (nl)
+ {
+ /* This match crosses a line boundary; reject it. */
+ p += sub[0];
+ line_end = nl;
+ continue;
+ }
+ }
+ break;
+ }
+ int valid_bytes = sub[0];
+
+ /* Try to match the string before the encoding error.
+ Again, handle the empty-match case specially, for speed. */
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
+ if (e != PCRE_ERROR_NOMATCH)
+ break;
+
+ /* Treat the encoding error as data that cannot match. */
+ p += valid_bytes + 1;
+ bol = false;
+ }
+
+ if (e != PCRE_ERROR_NOMATCH)
+ break;
+ bol = true;
}
if (e <= 0)
@@ -171,7 +261,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
switch (e)
{
case PCRE_ERROR_NOMATCH:
- return -1;
+ break;
case PCRE_ERROR_NOMEMORY:
error (EXIT_TROUBLE, 0, _("memory exhausted"));
@@ -180,10 +270,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0,
_("exceeded PCRE's backtracking limit"));
- case PCRE_ERROR_BADUTF8:
- error (EXIT_TROUBLE, 0,
- _("invalid UTF-8 byte sequence in input"));
-
default:
/* For now, we lump all remaining PCRE failures into this basket.
If anyone cares to provide sample grep usage that can trigger
@@ -192,30 +278,33 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
}
- /* NOTREACHED */
return -1;
}
else
{
- /* Narrow down to the line we've found. */
- char const *beg = line_buf + sub[0];
- char const *end = line_buf + sub[1];
- char const *buflim = buf + size;
- char eol = eolbyte;
- if (!start_ptr)
+ char const *matchbeg = p + sub[0];
+ char const *matchend = p + sub[1];
+ char const *beg;
+ char const *end;
+ if (start_ptr)
{
- /* FIXME: The case when '\n' is not found indicates a bug:
- Since grep is line oriented, the match should never contain
- a newline, so there _must_ be a newline following.
- */
- if (!(end = memchr (end, eol, buflim - end)))
- end = buflim;
- else
- end++;
- while (buf < beg && beg[-1] != eol)
- --beg;
+ beg = matchbeg;
+ end = matchend;
+ }
+ else if (multiline)
+ {
+ char const *prev_nl = memrchr (line_start - 1, eolbyte,
+ matchbeg - (line_start - 1));
+ char const *next_nl = memchr (matchend, eolbyte,
+ line_end + 1 - matchend);
+ beg = prev_nl + 1;
+ end = next_nl + 1;
+ }
+ else
+ {
+ beg = line_start;
+ end = line_end + 1;
}
-
*match_size = end - beg;
return beg - buf;
}
diff --git a/src/search.h b/src/search.h
index 14877bc..e671bea 100644
--- a/src/search.h
+++ b/src/search.h
@@ -45,6 +45,7 @@ extern void kwsinit (kwset_t *);
extern char *mbtoupper (char const *, size_t *, mb_len_map_t **);
extern void build_mbclen_cache (void);
+extern size_t mbclen_cache[];
extern ptrdiff_t mb_goback (char const **, char const *, char const *);
extern wint_t mb_prev_wc (char const *, char const *, char const *);
extern wint_t mb_next_wc (char const *, char const *);
diff --git a/src/searchutils.c b/src/searchutils.c
index 5eb9a12..aba9335 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -22,7 +22,7 @@
#define NCHAR (UCHAR_MAX + 1)
-static size_t mbclen_cache[NCHAR];
+size_t mbclen_cache[NCHAR];
void
kwsinit (kwset_t *kwset)
diff --git a/tests/pcre-infloop b/tests/pcre-infloop
index 1b33e72..8054844 100755
--- a/tests/pcre-infloop
+++ b/tests/pcre-infloop
@@ -18,16 +18,16 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
. "${srcdir=.}/init.sh"; path_prepend_ ../src
-require_pcre_
require_timeout_
require_en_utf8_locale_
require_compiled_in_MB_support
+LC_ALL=en_US.UTF-8 require_pcre_
printf 'a\201b\r' > in || framework_failure_
fail=0
LC_ALL=en_US.UTF-8 timeout 3 grep -P 'a.?..b' in
-test $? = 2 || fail_ "libpcre's match function appears to infloop"
+test $? = 1 || fail_ "libpcre's match function appears to infloop"
Exit $fail
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 913e8ee..abcc7e8 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -8,14 +8,19 @@
# notice and this notice are preserved.
. "${srcdir=.}/init.sh"; path_prepend_ ../src
-require_pcre_
+require_timeout_
require_en_utf8_locale_
+require_compiled_in_MB_support
+LC_ALL=en_US.UTF-8 require_pcre_
fail=0
-printf 'j\202\nj\n' > in || framework_failure_
+printf 'j\202j\nj\nk\202\n' > in || framework_failure_
-LC_ALL=en_US.UTF-8 grep -P j in
-test $? -eq 2 || fail=1
+LC_ALL=en_US.UTF-8 timeout 3 grep -P j in
+test $? -eq 0 || fail=1
+
+LC_ALL=en_US.UTF-8 timeout 3 grep -P 'k$' in
+test $? -eq 1 || fail=1
Exit $fail
diff --git a/tests/pcre-utf8 b/tests/pcre-utf8
index 41676f4..2dda116 100755
--- a/tests/pcre-utf8
+++ b/tests/pcre-utf8
@@ -8,8 +8,8 @@
# notice and this notice are preserved.
. "${srcdir=.}/init.sh"; path_prepend_ ../src
-require_pcre_
require_en_utf8_locale_
+LC_ALL=en_US.UTF-8 require_pcre_
fail=0

View File

@ -1,8 +1,8 @@
diff --git a/src/grep.c b/src/grep.c
index 0fcc272..2208a4e 100644
index e3461a7..50a9868 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1579,16 +1579,19 @@ Output control:\n\
@@ -1757,17 +1757,20 @@ Output control:\n\
-D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
ACTION is 'read' or 'skip'\n\
-r, --recursive like --directories=recurse\n\
@ -12,11 +12,12 @@ index 0fcc272..2208a4e 100644
"));
printf (_("\
- --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
- --exclude=FILE_PATTERN skip files and directories matching FILE_PATTERN\n\
- --exclude=FILE_PATTERN skip files and directories matching\
+ --include=FILE_PATTERN\n\
+ search only files that match FILE_PATTERN\n\
+ --exclude=FILE_PATTERN\n\
+ skip files and directories matching FILE_PATTERN\n\
+ skip files and directories matching\
FILE_PATTERN\n\
--exclude-from=FILE skip files matching any file pattern from FILE\n\
- --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
+ --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\

View File

@ -1,8 +1,8 @@
diff --git a/doc/grep.in.1 b/doc/grep.in.1
index 58a6c0e..3e6a8cf 100644
index b6362ee..5a1e3ea 100644
--- a/doc/grep.in.1
+++ b/doc/grep.in.1
@@ -377,7 +377,7 @@ Print
@@ -314,7 +314,7 @@ Print
.I NUM
lines of trailing context after matching lines.
Places a line containing a group separator
@ -11,7 +11,7 @@ index 58a6c0e..3e6a8cf 100644
between contiguous groups of matches.
With the
.B \-o
@@ -390,7 +390,7 @@ Print
@@ -327,7 +327,7 @@ Print
.I NUM
lines of leading context before matching lines.
Places a line containing a group separator
@ -20,7 +20,7 @@ index 58a6c0e..3e6a8cf 100644
between contiguous groups of matches.
With the
.B \-o
@@ -403,13 +403,24 @@ Print
@@ -340,13 +340,24 @@ Print
.I NUM
lines of output context.
Places a line containing a group separator
@ -47,10 +47,10 @@ index 58a6c0e..3e6a8cf 100644
.TP
.BR \-a ", " \-\^\-text
diff --git a/src/grep.c b/src/grep.c
index 7c0f8a8..0fcc272 100644
index 8dbf86e..e3461a7 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1602,6 +1602,8 @@ Context control:\n\
@@ -1781,6 +1781,8 @@ Context control:\n\
"));
printf (_("\
-NUM same as --context=NUM\n\

View File

@ -2,8 +2,8 @@
Summary: Pattern matching utilities
Name: grep
Version: 2.20
Release: 7%{?dist}
Version: 2.21
Release: 1%{?dist}
License: GPLv3+
Group: Applications/Text
Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz
@ -12,11 +12,9 @@ Source2: colorgrep.csh
Source3: GREP_COLORS
Source4: grepconf.sh
# upstream ticket 39444
Patch0: grep-2.20-man-fix-gs.patch
Patch0: grep-2.21-man-fix-gs.patch
# upstream ticket 39445
Patch1: grep-2.20-help-align.patch
# backported from upstream
Patch2: grep-2.20-pcre-backported-fixes.patch
Patch1: grep-2.21-help-align.patch
URL: http://www.gnu.org/software/grep/
Requires(post): /sbin/install-info
Requires(preun): /sbin/install-info
@ -37,7 +35,6 @@ GNU grep is needed by many scripts, so it shall be installed on every system.
%setup -q
%patch0 -p1 -b .man-fix-gs
%patch1 -p1 -b .help-align
%patch2 -p1 -b .pcre-backported-fixes
%build
%global BUILD_FLAGS $RPM_OPT_FLAGS
@ -93,6 +90,12 @@ fi
%{_libexecdir}/grepconf.sh
%changelog
* Tue Nov 25 2014 Jaroslav Škarvada <jskarvad@redhat.com> - 2.21-1
- New version
Resolves: rhbz#1167657
- De-fuzzified patches
- Dropped pcre-backported-fixes patch (not needed)
* Fri Nov 14 2014 Jaroslav Škarvada <jskarvad@redhat.com> - 2.20-7
- Backported more PCRE fixes (by pcre-backported-fixes patch)
- Dropped pcre-invalid-utf8-fix patch, handled by pcre-backported-fixes patch

View File

@ -1 +1 @@
2cbea44a4f1548aee20b9ff2d3076908 grep-2.20.tar.xz
43c48064d6409862b8a850db83c8038a grep-2.21.tar.xz