Fixed invalid UTF-8 byte sequence error in PCRE mode
(by pcre-invalid-utf8-fix patch) Resolves: rhbz#1161832
This commit is contained in:
parent
4ca30e887d
commit
33ee7fe86e
136
grep-2.20-pcre-invalid-utf8-fix.patch
Normal file
136
grep-2.20-pcre-invalid-utf8-fix.patch
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
diff --git a/src/pcresearch.c b/src/pcresearch.c
|
||||||
|
index 820dd00..11df488 100644
|
||||||
|
--- a/src/pcresearch.c
|
||||||
|
+++ b/src/pcresearch.c
|
||||||
|
@@ -136,34 +136,42 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||||
|
#else
|
||||||
|
/* This array must have at least two elements; everything after that
|
||||||
|
is just for performance improvement in pcre_exec. */
|
||||||
|
- int sub[300];
|
||||||
|
+ enum { nsub = 300 };
|
||||||
|
+ int sub[nsub];
|
||||||
|
|
||||||
|
- const char *line_buf, *line_end, *line_next;
|
||||||
|
+ char const *p = start_ptr ? start_ptr : buf;
|
||||||
|
+ int options = p == buf || p[-1] == eolbyte ? 0 : PCRE_NOTBOL;
|
||||||
|
+ char const *line_start = buf;
|
||||||
|
int e = PCRE_ERROR_NOMATCH;
|
||||||
|
- ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;
|
||||||
|
+ char const *line_end;
|
||||||
|
|
||||||
|
/* PCRE can't limit the matching to single lines, therefore we have to
|
||||||
|
match each line in the buffer separately. */
|
||||||
|
- for (line_next = buf;
|
||||||
|
- e == PCRE_ERROR_NOMATCH && line_next < buf + size;
|
||||||
|
- start_ofs -= line_next - line_buf)
|
||||||
|
+ for (; p < buf + size; p = line_start = line_end + 1)
|
||||||
|
{
|
||||||
|
- line_buf = line_next;
|
||||||
|
- line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
|
||||||
|
- if (line_end == NULL)
|
||||||
|
- line_next = line_end = buf + size;
|
||||||
|
- else
|
||||||
|
- line_next = line_end + 1;
|
||||||
|
-
|
||||||
|
- if (start_ptr && start_ptr >= line_end)
|
||||||
|
- continue;
|
||||||
|
+ line_end = memchr (p, eolbyte, buf + size - p);
|
||||||
|
|
||||||
|
- if (INT_MAX < line_end - line_buf)
|
||||||
|
+ if (INT_MAX < line_end - p)
|
||||||
|
error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
|
||||||
|
|
||||||
|
- e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
|
||||||
|
- start_ofs < 0 ? 0 : start_ofs, 0,
|
||||||
|
- sub, sizeof sub / sizeof *sub);
|
||||||
|
+ /* Treat encoding-error bytes as data that cannot match. */
|
||||||
|
+ for (;;)
|
||||||
|
+ {
|
||||||
|
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
|
||||||
|
+ if (e != PCRE_ERROR_BADUTF8)
|
||||||
|
+ break;
|
||||||
|
+ e = pcre_exec (cre, extra, p, sub[0], 0,
|
||||||
|
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
|
||||||
|
+ sub, nsub);
|
||||||
|
+ if (e != PCRE_ERROR_NOMATCH)
|
||||||
|
+ break;
|
||||||
|
+ p += sub[0] + 1;
|
||||||
|
+ options = PCRE_NOTBOL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (e != PCRE_ERROR_NOMATCH)
|
||||||
|
+ break;
|
||||||
|
+ options = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (e <= 0)
|
||||||
|
@@ -180,10 +188,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||||
|
error (EXIT_TROUBLE, 0,
|
||||||
|
_("exceeded PCRE's backtracking limit"));
|
||||||
|
|
||||||
|
- case PCRE_ERROR_BADUTF8:
|
||||||
|
- error (EXIT_TROUBLE, 0,
|
||||||
|
- _("invalid UTF-8 byte sequence in input"));
|
||||||
|
-
|
||||||
|
default:
|
||||||
|
/* For now, we lump all remaining PCRE failures into this basket.
|
||||||
|
If anyone cares to provide sample grep usage that can trigger
|
||||||
|
@@ -197,25 +201,8 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- /* Narrow down to the line we've found. */
|
||||||
|
- char const *beg = line_buf + sub[0];
|
||||||
|
- char const *end = line_buf + sub[1];
|
||||||
|
- char const *buflim = buf + size;
|
||||||
|
- char eol = eolbyte;
|
||||||
|
- if (!start_ptr)
|
||||||
|
- {
|
||||||
|
- /* FIXME: The case when '\n' is not found indicates a bug:
|
||||||
|
- Since grep is line oriented, the match should never contain
|
||||||
|
- a newline, so there _must_ be a newline following.
|
||||||
|
- */
|
||||||
|
- if (!(end = memchr (end, eol, buflim - end)))
|
||||||
|
- end = buflim;
|
||||||
|
- else
|
||||||
|
- end++;
|
||||||
|
- while (buf < beg && beg[-1] != eol)
|
||||||
|
- --beg;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
+ char const *beg = start_ptr ? p + sub[0] : line_start;
|
||||||
|
+ char const *end = start_ptr ? p + sub[1] : line_end + 1;
|
||||||
|
*match_size = end - beg;
|
||||||
|
return beg - buf;
|
||||||
|
}
|
||||||
|
diff --git a/tests/pcre-infloop b/tests/pcre-infloop
|
||||||
|
index 1b33e72..b92f8e1 100755
|
||||||
|
--- a/tests/pcre-infloop
|
||||||
|
+++ b/tests/pcre-infloop
|
||||||
|
@@ -28,6 +28,6 @@ printf 'a\201b\r' > in || framework_failure_
|
||||||
|
fail=0
|
||||||
|
|
||||||
|
LC_ALL=en_US.UTF-8 timeout 3 grep -P 'a.?..b' in
|
||||||
|
-test $? = 2 || fail_ "libpcre's match function appears to infloop"
|
||||||
|
+test $? = 1 || fail_ "libpcre's match function appears to infloop"
|
||||||
|
|
||||||
|
Exit $fail
|
||||||
|
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
|
||||||
|
index 913e8ee..9da4b18 100755
|
||||||
|
--- a/tests/pcre-invalid-utf8-input
|
||||||
|
+++ b/tests/pcre-invalid-utf8-input
|
||||||
|
@@ -13,9 +13,12 @@ require_en_utf8_locale_
|
||||||
|
|
||||||
|
fail=0
|
||||||
|
|
||||||
|
-printf 'j\202\nj\n' > in || framework_failure_
|
||||||
|
+printf 'j\202j\nj\nk\202\n' > in || framework_failure_
|
||||||
|
|
||||||
|
LC_ALL=en_US.UTF-8 grep -P j in
|
||||||
|
-test $? -eq 2 || fail=1
|
||||||
|
+test $? -eq 0 || fail=1
|
||||||
|
+
|
||||||
|
+LC_ALL=en_US.UTF-8 grep -P 'k$' in
|
||||||
|
+test $? -eq 1 || fail=1
|
||||||
|
|
||||||
|
Exit $fail
|
10
grep.spec
10
grep.spec
@ -3,7 +3,7 @@
|
|||||||
Summary: Pattern matching utilities
|
Summary: Pattern matching utilities
|
||||||
Name: grep
|
Name: grep
|
||||||
Version: 2.20
|
Version: 2.20
|
||||||
Release: 5%{?dist}
|
Release: 6%{?dist}
|
||||||
License: GPLv3+
|
License: GPLv3+
|
||||||
Group: Applications/Text
|
Group: Applications/Text
|
||||||
Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz
|
Source: ftp://ftp.gnu.org/pub/gnu/grep/grep-%{version}.tar.xz
|
||||||
@ -15,6 +15,8 @@ Source4: grepconf.sh
|
|||||||
Patch0: grep-2.20-man-fix-gs.patch
|
Patch0: grep-2.20-man-fix-gs.patch
|
||||||
# upstream ticket 39445
|
# upstream ticket 39445
|
||||||
Patch1: grep-2.20-help-align.patch
|
Patch1: grep-2.20-help-align.patch
|
||||||
|
# backported from upstream
|
||||||
|
Patch2: grep-2.20-pcre-invalid-utf8-fix.patch
|
||||||
URL: http://www.gnu.org/software/grep/
|
URL: http://www.gnu.org/software/grep/
|
||||||
Requires(post): /sbin/install-info
|
Requires(post): /sbin/install-info
|
||||||
Requires(preun): /sbin/install-info
|
Requires(preun): /sbin/install-info
|
||||||
@ -35,6 +37,7 @@ GNU grep is needed by many scripts, so it shall be installed on every system.
|
|||||||
%setup -q
|
%setup -q
|
||||||
%patch0 -p1 -b .man-fix-gs
|
%patch0 -p1 -b .man-fix-gs
|
||||||
%patch1 -p1 -b .help-align
|
%patch1 -p1 -b .help-align
|
||||||
|
%patch2 -p1 -b .pcre-invalid-utf8-fix
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%global BUILD_FLAGS $RPM_OPT_FLAGS
|
%global BUILD_FLAGS $RPM_OPT_FLAGS
|
||||||
@ -90,6 +93,11 @@ fi
|
|||||||
%{_libexecdir}/grepconf.sh
|
%{_libexecdir}/grepconf.sh
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Nov 11 2014 Jaroslav Škarvada <jskarvad@redhat.com> - 2.20-6
|
||||||
|
- Fixed invalid UTF-8 byte sequence error in PCRE mode
|
||||||
|
(by pcre-invalid-utf8-fix patch)
|
||||||
|
Resolves: rhbz#1161832
|
||||||
|
|
||||||
* Wed Aug 20 2014 Jaroslav Škarvada <jskarvad@redhat.com> - 2.20-5
|
* Wed Aug 20 2014 Jaroslav Škarvada <jskarvad@redhat.com> - 2.20-5
|
||||||
- Added script to check whether grep is coloured
|
- Added script to check whether grep is coloured
|
||||||
Resolves: rhbz#1034631
|
Resolves: rhbz#1034631
|
||||||
|
Loading…
Reference in New Issue
Block a user