New version
Resolves: rhbz#1305035 - Dropped disable-performance-related-tests, better-encoding-errors-handling, Pc-consistent-results, and test-pcre-count-fix patches (all upstreamed) - De-fuzzified man-fx-gs, and help-align patches
This commit is contained in:
parent
9cc608b66b
commit
08a2c56622
@ -1,81 +0,0 @@
|
||||
diff --git a/src/grep.c b/src/grep.c
|
||||
index eac540a..9fabeb8 100644
|
||||
--- a/src/grep.c
|
||||
+++ b/src/grep.c
|
||||
@@ -1386,7 +1386,8 @@ grep (int fd, struct stat const *st)
|
||||
has_nulls = true;
|
||||
if (binary_files == WITHOUT_MATCH_BINARY_FILES)
|
||||
return 0;
|
||||
- done_on_match = out_quiet = true;
|
||||
+ if (!count_matches)
|
||||
+ done_on_match = out_quiet = true;
|
||||
nul_zapper = eol;
|
||||
skip_nuls = skip_empty_lines;
|
||||
}
|
||||
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||
index 2ade5be..2079ae5 100644
|
||||
--- a/tests/Makefile.am
|
||||
+++ b/tests/Makefile.am
|
||||
@@ -106,6 +106,7 @@ TESTS = \
|
||||
pcre \
|
||||
pcre-abort \
|
||||
pcre-context \
|
||||
+ pcre-count \
|
||||
pcre-infloop \
|
||||
pcre-invalid-utf8-input \
|
||||
pcre-jitstack \
|
||||
diff --git a/tests/Makefile.in b/tests/Makefile.in
|
||||
index b5bd7b5..ee516da 100644
|
||||
--- a/tests/Makefile.in
|
||||
+++ b/tests/Makefile.in
|
||||
@@ -1442,6 +1442,7 @@ TESTS = \
|
||||
pcre \
|
||||
pcre-abort \
|
||||
pcre-context \
|
||||
+ pcre-count \
|
||||
pcre-infloop \
|
||||
pcre-invalid-utf8-input \
|
||||
pcre-jitstack \
|
||||
@@ -2250,6 +2251,13 @@ pcre-context.log: pcre-context
|
||||
--log-file $$b.log --trs-file $$b.trs \
|
||||
$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
|
||||
"$$tst" $(AM_TESTS_FD_REDIRECT)
|
||||
+pcre-count.log: pcre-count
|
||||
+ @p='pcre-count'; \
|
||||
+ b='pcre-count'; \
|
||||
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
|
||||
+ --log-file $$b.log --trs-file $$b.trs \
|
||||
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
|
||||
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
|
||||
pcre-infloop.log: pcre-infloop
|
||||
@p='pcre-infloop'; \
|
||||
b='pcre-infloop'; \
|
||||
diff --git a/tests/pcre-count b/tests/pcre-count
|
||||
new file mode 100755
|
||||
index 0000000..78e1c7c
|
||||
--- /dev/null
|
||||
+++ b/tests/pcre-count
|
||||
@@ -0,0 +1,23 @@
|
||||
+#! /bin/sh
|
||||
+# grep -P / grep -Pc are inconsistent results
|
||||
+# This bug affected grep versions 2.21 through 2.22.
|
||||
+#
|
||||
+# Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
+#
|
||||
+# Copying and distribution of this file, with or without modification,
|
||||
+# are permitted in any medium without royalty provided the copyright
|
||||
+# notice and this notice are preserved.
|
||||
+
|
||||
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
+require_pcre_
|
||||
+
|
||||
+fail=0
|
||||
+
|
||||
+printf 'a\n%032768d\nb\x0\n%032768d\na\n' 0 0 > in
|
||||
+
|
||||
+LC_ALL=C grep -P 'a' in | wc -l > exp
|
||||
+
|
||||
+LC_ALL=C grep -Pc 'a' in > out || fail=1
|
||||
+compare exp out || fail=1
|
||||
+
|
||||
+Exit $fail
|
@ -1,703 +0,0 @@
|
||||
diff --git a/doc/grep.texi b/doc/grep.texi
|
||||
index e3495bb..41f4fa5 100644
|
||||
--- a/doc/grep.texi
|
||||
+++ b/doc/grep.texi
|
||||
@@ -596,13 +596,13 @@ If a file's allocation metadata,
|
||||
or if its data read before a line is selected for output,
|
||||
indicate that the file contains binary data,
|
||||
assume that the file is of type @var{type}.
|
||||
-Non-text bytes indicate binary data; these are either data bytes
|
||||
-improperly encoded for the current locale, or null bytes when the
|
||||
+Non-text bytes indicate binary data; these are either output bytes that are
|
||||
+improperly encoded for the current locale, or null input bytes when the
|
||||
@option{-z} (@option{--null-data}) option is not given (@pxref{Other
|
||||
Options}).
|
||||
|
||||
-By default, @var{type} is @samp{binary},
|
||||
-and @command{grep} normally outputs either
|
||||
+By default, @var{type} is @samp{binary}, and when @command{grep}
|
||||
+discovers that a file is binary it normally outputs either
|
||||
a one-line message saying that a binary file matches,
|
||||
or no message if there is no match.
|
||||
When processing binary data, @command{grep} may treat non-text bytes
|
||||
@@ -611,7 +611,8 @@ not match a null byte, as the null byte might be treated as a line
|
||||
terminator even without the @option{-z} (@option{--null-data}) option.
|
||||
|
||||
If @var{type} is @samp{without-match},
|
||||
-@command{grep} assumes that a binary file does not match;
|
||||
+when @command{grep} discovers that a file is binary
|
||||
+it assumes that the rest of the file does not match;
|
||||
this is equivalent to the @option{-I} option.
|
||||
|
||||
If @var{type} is @samp{text},
|
||||
diff --git a/src/grep.c b/src/grep.c
|
||||
index 2748fd3..eac540a 100644
|
||||
--- a/src/grep.c
|
||||
+++ b/src/grep.c
|
||||
@@ -377,7 +377,6 @@ bool match_icase;
|
||||
bool match_words;
|
||||
bool match_lines;
|
||||
char eolbyte;
|
||||
-enum textbin input_textbin;
|
||||
|
||||
static char const *matcher;
|
||||
|
||||
@@ -389,6 +388,10 @@ static bool omit_dot_slash;
|
||||
static bool errseen;
|
||||
static bool write_error_seen;
|
||||
|
||||
+/* True if output from the current input file has been suppressed
|
||||
+ because an output line had an encoding error. */
|
||||
+static bool encoding_error_output;
|
||||
+
|
||||
enum directories_type
|
||||
{
|
||||
READ_DIRECTORIES = 2,
|
||||
@@ -481,12 +484,6 @@ clean_up_stdout (void)
|
||||
close_stdout ();
|
||||
}
|
||||
|
||||
-static bool
|
||||
-textbin_is_binary (enum textbin textbin)
|
||||
-{
|
||||
- return textbin < TEXTBIN_UNKNOWN;
|
||||
-}
|
||||
-
|
||||
/* The high-order bit of a byte. */
|
||||
enum { HIBYTE = 0x80 };
|
||||
|
||||
@@ -551,58 +548,60 @@ skip_easy_bytes (char const *buf)
|
||||
return p;
|
||||
}
|
||||
|
||||
-/* Return the text type of data in BUF, of size SIZE.
|
||||
+/* Return true if BUF, of size SIZE, has an encoding error.
|
||||
BUF must be followed by at least sizeof (uword) bytes,
|
||||
- which may be arbitrarily written to or read from. */
|
||||
-static enum textbin
|
||||
-buffer_textbin (char *buf, size_t size)
|
||||
+ the first of which may be modified. */
|
||||
+static bool
|
||||
+buf_has_encoding_errors (char *buf, size_t size)
|
||||
{
|
||||
- if (eolbyte && memchr (buf, '\0', size))
|
||||
- return TEXTBIN_BINARY;
|
||||
+ if (MB_CUR_MAX <= 1)
|
||||
+ return false;
|
||||
|
||||
- if (1 < MB_CUR_MAX)
|
||||
- {
|
||||
- mbstate_t mbs = { 0 };
|
||||
- size_t clen;
|
||||
- char const *p;
|
||||
+ mbstate_t mbs = { 0 };
|
||||
+ size_t clen;
|
||||
|
||||
- buf[size] = -1;
|
||||
- for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
|
||||
- {
|
||||
- clen = mbrlen (p, buf + size - p, &mbs);
|
||||
- if ((size_t) -2 <= clen)
|
||||
- return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY;
|
||||
- }
|
||||
+ buf[size] = -1;
|
||||
+ for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
|
||||
+ {
|
||||
+ clen = mbrlen (p, buf + size - p, &mbs);
|
||||
+ if ((size_t) -2 <= clen)
|
||||
+ return true;
|
||||
}
|
||||
|
||||
- return TEXTBIN_TEXT;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
-/* Return the text type of a file. BUF, of size SIZE, is the initial
|
||||
- buffer read from the file with descriptor FD and status ST.
|
||||
- BUF must be followed by at least sizeof (uword) bytes,
|
||||
+
|
||||
+/* Return true if BUF, of size SIZE, has a null byte.
|
||||
+ BUF must be followed by at least one byte,
|
||||
which may be arbitrarily written to or read from. */
|
||||
-static enum textbin
|
||||
-file_textbin (char *buf, size_t size, int fd, struct stat const *st)
|
||||
+static bool
|
||||
+buf_has_nulls (char *buf, size_t size)
|
||||
{
|
||||
- enum textbin textbin = buffer_textbin (buf, size);
|
||||
- if (textbin_is_binary (textbin))
|
||||
- return textbin;
|
||||
+ buf[size] = 0;
|
||||
+ return strlen (buf) != size;
|
||||
+}
|
||||
|
||||
+/* Return true if a file is known to contain null bytes.
|
||||
+ SIZE bytes have already been read from the file
|
||||
+ with descriptor FD and status ST. */
|
||||
+static bool
|
||||
+file_must_have_nulls (size_t size, int fd, struct stat const *st)
|
||||
+{
|
||||
if (usable_st_size (st))
|
||||
{
|
||||
if (st->st_size <= size)
|
||||
- return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin;
|
||||
+ return false;
|
||||
|
||||
/* If the file has holes, it must contain a null byte somewhere. */
|
||||
- if (SEEK_HOLE != SEEK_SET && eolbyte)
|
||||
+ if (SEEK_HOLE != SEEK_SET)
|
||||
{
|
||||
off_t cur = size;
|
||||
if (O_BINARY || fd == STDIN_FILENO)
|
||||
{
|
||||
cur = lseek (fd, 0, SEEK_CUR);
|
||||
if (cur < 0)
|
||||
- return TEXTBIN_UNKNOWN;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Look for a hole after the current location. */
|
||||
@@ -612,12 +611,12 @@ file_textbin (char *buf, size_t size, int fd, struct stat const *st)
|
||||
if (lseek (fd, cur, SEEK_SET) < 0)
|
||||
suppressible_error (filename, errno);
|
||||
if (hole_start < st->st_size)
|
||||
- return TEXTBIN_BINARY;
|
||||
+ return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- return TEXTBIN_UNKNOWN;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Convert STR to a nonnegative integer, storing the result in *OUT.
|
||||
@@ -888,7 +887,7 @@ static char *label = NULL; /* Fake filename for stdin */
|
||||
/* Internal variables to keep track of byte count, context, etc. */
|
||||
static uintmax_t totalcc; /* Total character count before bufbeg. */
|
||||
static char const *lastnl; /* Pointer after last newline counted. */
|
||||
-static char const *lastout; /* Pointer after last character output;
|
||||
+static char *lastout; /* Pointer after last character output;
|
||||
NULL if no character has been output
|
||||
or if it's conceptually before bufbeg. */
|
||||
static intmax_t outleft; /* Maximum number of lines to be output. */
|
||||
@@ -960,10 +959,31 @@ print_offset (uintmax_t pos, int min_width, const char *color)
|
||||
pr_sgr_end_if (color);
|
||||
}
|
||||
|
||||
-/* Print a whole line head (filename, line, byte). */
|
||||
-static void
|
||||
-print_line_head (char const *beg, char const *lim, char sep)
|
||||
+/* Print a whole line head (filename, line, byte). The output data
|
||||
+ starts at BEG and contains LEN bytes; it is followed by at least
|
||||
+ sizeof (uword) bytes, the first of which may be temporarily modified.
|
||||
+ The output data comes from what is perhaps a larger input line that
|
||||
+ goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
|
||||
+ the separator on output.
|
||||
+
|
||||
+ Return true unless the line was suppressed due to an encoding error. */
|
||||
+
|
||||
+static bool
|
||||
+print_line_head (char *beg, size_t len, char const *lim, char sep)
|
||||
{
|
||||
+ bool encoding_errors = false;
|
||||
+ if (binary_files != TEXT_BINARY_FILES)
|
||||
+ {
|
||||
+ char ch = beg[len];
|
||||
+ encoding_errors = buf_has_encoding_errors (beg, len);
|
||||
+ beg[len] = ch;
|
||||
+ }
|
||||
+ if (encoding_errors)
|
||||
+ {
|
||||
+ encoding_error_output = done_on_match = out_quiet = true;
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
bool pending_sep = false;
|
||||
|
||||
if (out_file)
|
||||
@@ -1010,22 +1030,27 @@ print_line_head (char const *beg, char const *lim, char sep)
|
||||
|
||||
print_sep (sep);
|
||||
}
|
||||
+
|
||||
+ return true;
|
||||
}
|
||||
|
||||
-static const char *
|
||||
-print_line_middle (const char *beg, const char *lim,
|
||||
+static char *
|
||||
+print_line_middle (char *beg, char *lim,
|
||||
const char *line_color, const char *match_color)
|
||||
{
|
||||
size_t match_size;
|
||||
size_t match_offset;
|
||||
- const char *cur = beg;
|
||||
- const char *mid = NULL;
|
||||
-
|
||||
- while (cur < lim
|
||||
- && ((match_offset = execute (beg, lim - beg, &match_size, cur))
|
||||
- != (size_t) -1))
|
||||
+ char *cur = beg;
|
||||
+ char *mid = NULL;
|
||||
+ char *b;
|
||||
+
|
||||
+ for (cur = beg;
|
||||
+ (cur < lim
|
||||
+ && ((match_offset = execute (beg, lim - beg, &match_size, cur))
|
||||
+ != (size_t) -1));
|
||||
+ cur = b + match_size)
|
||||
{
|
||||
- char const *b = beg + match_offset;
|
||||
+ b = beg + match_offset;
|
||||
|
||||
/* Avoid matching the empty line at the end of the buffer. */
|
||||
if (b == lim)
|
||||
@@ -1045,8 +1070,11 @@ print_line_middle (const char *beg, const char *lim,
|
||||
/* This function is called on a matching line only,
|
||||
but is it selected or rejected/context? */
|
||||
if (only_matching)
|
||||
- print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED
|
||||
- : SEP_CHAR_SELECTED));
|
||||
+ {
|
||||
+ char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
|
||||
+ if (! print_line_head (b, match_size, lim, sep))
|
||||
+ return NULL;
|
||||
+ }
|
||||
else
|
||||
{
|
||||
pr_sgr_start (line_color);
|
||||
@@ -1064,7 +1092,6 @@ print_line_middle (const char *beg, const char *lim,
|
||||
if (only_matching)
|
||||
fputs ("\n", stdout);
|
||||
}
|
||||
- cur = b + match_size;
|
||||
}
|
||||
|
||||
if (only_matching)
|
||||
@@ -1075,8 +1102,8 @@ print_line_middle (const char *beg, const char *lim,
|
||||
return cur;
|
||||
}
|
||||
|
||||
-static const char *
|
||||
-print_line_tail (const char *beg, const char *lim, const char *line_color)
|
||||
+static char *
|
||||
+print_line_tail (char *beg, const char *lim, const char *line_color)
|
||||
{
|
||||
size_t eol_size;
|
||||
size_t tail_size;
|
||||
@@ -1097,14 +1124,15 @@ print_line_tail (const char *beg, const char *lim, const char *line_color)
|
||||
}
|
||||
|
||||
static void
|
||||
-prline (char const *beg, char const *lim, char sep)
|
||||
+prline (char *beg, char *lim, char sep)
|
||||
{
|
||||
bool matching;
|
||||
const char *line_color;
|
||||
const char *match_color;
|
||||
|
||||
if (!only_matching)
|
||||
- print_line_head (beg, lim, sep);
|
||||
+ if (! print_line_head (beg, lim - beg - 1, lim, sep))
|
||||
+ return;
|
||||
|
||||
matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
|
||||
|
||||
@@ -1124,7 +1152,11 @@ prline (char const *beg, char const *lim, char sep)
|
||||
{
|
||||
/* We already know that non-matching lines have no match (to colorize). */
|
||||
if (matching && (only_matching || *match_color))
|
||||
- beg = print_line_middle (beg, lim, line_color, match_color);
|
||||
+ {
|
||||
+ beg = print_line_middle (beg, lim, line_color, match_color);
|
||||
+ if (! beg)
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
if (!only_matching && *line_color)
|
||||
{
|
||||
@@ -1158,7 +1190,7 @@ prpending (char const *lim)
|
||||
lastout = bufbeg;
|
||||
while (pending > 0 && lastout < lim)
|
||||
{
|
||||
- char const *nl = memchr (lastout, eolbyte, lim - lastout);
|
||||
+ char *nl = memchr (lastout, eolbyte, lim - lastout);
|
||||
size_t match_size;
|
||||
--pending;
|
||||
if (outleft
|
||||
@@ -1173,7 +1205,7 @@ prpending (char const *lim)
|
||||
|
||||
/* Output the lines between BEG and LIM. Deal with context. */
|
||||
static void
|
||||
-prtext (char const *beg, char const *lim)
|
||||
+prtext (char *beg, char *lim)
|
||||
{
|
||||
static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
|
||||
char eol = eolbyte;
|
||||
@@ -1181,7 +1213,7 @@ prtext (char const *beg, char const *lim)
|
||||
if (!out_quiet && pending > 0)
|
||||
prpending (beg);
|
||||
|
||||
- char const *p = beg;
|
||||
+ char *p = beg;
|
||||
|
||||
if (!out_quiet)
|
||||
{
|
||||
@@ -1207,7 +1239,7 @@ prtext (char const *beg, char const *lim)
|
||||
|
||||
while (p < beg)
|
||||
{
|
||||
- char const *nl = memchr (p, eol, beg - p);
|
||||
+ char *nl = memchr (p, eol, beg - p);
|
||||
nl++;
|
||||
prline (p, nl, SEP_CHAR_REJECTED);
|
||||
p = nl;
|
||||
@@ -1220,7 +1252,7 @@ prtext (char const *beg, char const *lim)
|
||||
/* One or more lines are output. */
|
||||
for (n = 0; p < lim && n < outleft; n++)
|
||||
{
|
||||
- char const *nl = memchr (p, eol, lim - p);
|
||||
+ char *nl = memchr (p, eol, lim - p);
|
||||
nl++;
|
||||
if (!out_quiet)
|
||||
prline (p, nl, SEP_CHAR_SELECTED);
|
||||
@@ -1267,13 +1299,12 @@ zap_nuls (char *p, char *lim, char eol)
|
||||
between matching lines if OUT_INVERT is true). Return a count of
|
||||
lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
|
||||
static intmax_t
|
||||
-grepbuf (char const *beg, char const *lim)
|
||||
+grepbuf (char *beg, char const *lim)
|
||||
{
|
||||
intmax_t outleft0 = outleft;
|
||||
- char const *p;
|
||||
- char const *endp;
|
||||
+ char *endp;
|
||||
|
||||
- for (p = beg; p < lim; p = endp)
|
||||
+ for (char *p = beg; p < lim; p = endp)
|
||||
{
|
||||
size_t match_size;
|
||||
size_t match_offset = execute (p, lim - p, &match_size, NULL);
|
||||
@@ -1284,15 +1315,15 @@ grepbuf (char const *beg, char const *lim)
|
||||
match_offset = lim - p;
|
||||
match_size = 0;
|
||||
}
|
||||
- char const *b = p + match_offset;
|
||||
+ char *b = p + match_offset;
|
||||
endp = b + match_size;
|
||||
/* Avoid matching the empty line at the end of the buffer. */
|
||||
if (!out_invert && b == lim)
|
||||
break;
|
||||
if (!out_invert || p < b)
|
||||
{
|
||||
- char const *prbeg = out_invert ? p : b;
|
||||
- char const *prend = out_invert ? b : endp;
|
||||
+ char *prbeg = out_invert ? p : b;
|
||||
+ char *prend = out_invert ? b : endp;
|
||||
prtext (prbeg, prend);
|
||||
if (!outleft || done_on_match)
|
||||
{
|
||||
@@ -1313,7 +1344,6 @@ static intmax_t
|
||||
grep (int fd, struct stat const *st)
|
||||
{
|
||||
intmax_t nlines, i;
|
||||
- enum textbin textbin;
|
||||
size_t residue, save;
|
||||
char oldc;
|
||||
char *beg;
|
||||
@@ -1322,6 +1352,7 @@ grep (int fd, struct stat const *st)
|
||||
char nul_zapper = '\0';
|
||||
bool done_on_match_0 = done_on_match;
|
||||
bool out_quiet_0 = out_quiet;
|
||||
+ bool has_nulls = false;
|
||||
|
||||
if (! reset (fd, st))
|
||||
return 0;
|
||||
@@ -1333,6 +1364,7 @@ grep (int fd, struct stat const *st)
|
||||
after_last_match = 0;
|
||||
pending = 0;
|
||||
skip_nuls = skip_empty_lines && !eol;
|
||||
+ encoding_error_output = false;
|
||||
seek_data_failed = false;
|
||||
|
||||
nlines = 0;
|
||||
@@ -1345,26 +1377,20 @@ grep (int fd, struct stat const *st)
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (binary_files == TEXT_BINARY_FILES)
|
||||
- textbin = TEXTBIN_TEXT;
|
||||
- else
|
||||
+ for (bool firsttime = true; ; firsttime = false)
|
||||
{
|
||||
- textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st);
|
||||
- if (textbin_is_binary (textbin))
|
||||
+ if (!has_nulls && eol && binary_files != TEXT_BINARY_FILES
|
||||
+ && (buf_has_nulls (bufbeg, buflim - bufbeg)
|
||||
+ || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
|
||||
{
|
||||
+ has_nulls = true;
|
||||
if (binary_files == WITHOUT_MATCH_BINARY_FILES)
|
||||
return 0;
|
||||
done_on_match = out_quiet = true;
|
||||
nul_zapper = eol;
|
||||
skip_nuls = skip_empty_lines;
|
||||
}
|
||||
- else if (execute != Pexecute)
|
||||
- textbin = TEXTBIN_TEXT;
|
||||
- }
|
||||
|
||||
- for (;;)
|
||||
- {
|
||||
- input_textbin = textbin;
|
||||
lastnl = bufbeg;
|
||||
if (lastout)
|
||||
lastout = bufbeg;
|
||||
@@ -1415,13 +1441,8 @@ grep (int fd, struct stat const *st)
|
||||
}
|
||||
|
||||
/* Detect whether leading context is adjacent to previous output. */
|
||||
- if (lastout)
|
||||
- {
|
||||
- if (textbin == TEXTBIN_UNKNOWN)
|
||||
- textbin = TEXTBIN_TEXT;
|
||||
- if (beg != lastout)
|
||||
- lastout = 0;
|
||||
- }
|
||||
+ if (beg != lastout)
|
||||
+ lastout = 0;
|
||||
|
||||
/* Handle some details and read more data to scan. */
|
||||
save = residue + lim - beg;
|
||||
@@ -1434,22 +1455,6 @@ grep (int fd, struct stat const *st)
|
||||
suppressible_error (filename, errno);
|
||||
goto finish_grep;
|
||||
}
|
||||
-
|
||||
- /* If the file's textbin has not been determined yet, assume
|
||||
- it's binary if the next input buffer suggests so. */
|
||||
- if (textbin == TEXTBIN_UNKNOWN)
|
||||
- {
|
||||
- enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg);
|
||||
- if (textbin_is_binary (tb))
|
||||
- {
|
||||
- if (binary_files == WITHOUT_MATCH_BINARY_FILES)
|
||||
- return 0;
|
||||
- textbin = tb;
|
||||
- done_on_match = out_quiet = true;
|
||||
- nul_zapper = eol;
|
||||
- skip_nuls = skip_empty_lines;
|
||||
- }
|
||||
- }
|
||||
}
|
||||
if (residue)
|
||||
{
|
||||
@@ -1463,7 +1468,7 @@ grep (int fd, struct stat const *st)
|
||||
finish_grep:
|
||||
done_on_match = done_on_match_0;
|
||||
out_quiet = out_quiet_0;
|
||||
- if (textbin_is_binary (textbin) && !out_quiet && nlines != 0)
|
||||
+ if ((has_nulls || encoding_error_output) && !out_quiet && nlines != 0)
|
||||
printf (_("Binary file %s matches\n"), filename);
|
||||
return nlines;
|
||||
}
|
||||
diff --git a/src/grep.h b/src/grep.h
|
||||
index 580eb11..2e4527c 100644
|
||||
--- a/src/grep.h
|
||||
+++ b/src/grep.h
|
||||
@@ -29,22 +29,4 @@ extern bool match_words; /* -w */
|
||||
extern bool match_lines; /* -x */
|
||||
extern char eolbyte; /* -z */
|
||||
|
||||
-/* An enum textbin describes the file's type, inferred from data read
|
||||
- before the first line is selected for output. */
|
||||
-enum textbin
|
||||
- {
|
||||
- /* Binary, as it contains null bytes and the -z option is not in effect,
|
||||
- or it contains encoding errors. */
|
||||
- TEXTBIN_BINARY = -1,
|
||||
-
|
||||
- /* Not known yet. Only text has been seen so far. */
|
||||
- TEXTBIN_UNKNOWN = 0,
|
||||
-
|
||||
- /* Text. */
|
||||
- TEXTBIN_TEXT = 1
|
||||
- };
|
||||
-
|
||||
-/* Input file type. */
|
||||
-extern enum textbin input_textbin;
|
||||
-
|
||||
#endif
|
||||
diff --git a/src/pcresearch.c b/src/pcresearch.c
|
||||
index b1f8310..a446b2c 100644
|
||||
--- a/src/pcresearch.c
|
||||
+++ b/src/pcresearch.c
|
||||
@@ -194,32 +194,13 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||
error. */
|
||||
char const *subject = buf;
|
||||
|
||||
- /* If the input type is unknown, the caller is still testing the
|
||||
- input, which means the current buffer cannot contain encoding
|
||||
- errors and a multiline search is typically more efficient.
|
||||
- Otherwise, a single-line search is typically faster, so that
|
||||
- pcre_exec doesn't waste time validating the entire input
|
||||
- buffer. */
|
||||
- bool multiline = input_textbin == TEXTBIN_UNKNOWN;
|
||||
-
|
||||
for (; p < buf + size; p = line_start = line_end + 1)
|
||||
{
|
||||
- bool too_big;
|
||||
-
|
||||
- if (multiline)
|
||||
- {
|
||||
- size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);
|
||||
- size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);
|
||||
- line_end = memrchr (p, eolbyte, scan_size);
|
||||
- too_big = ! line_end;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- line_end = memchr (p, eolbyte, buf + size - p);
|
||||
- too_big = INT_MAX < line_end - p;
|
||||
- }
|
||||
-
|
||||
- if (too_big)
|
||||
+ /* A single-line search is typically faster, so that
|
||||
+ pcre_exec doesn't waste time validating the entire input
|
||||
+ buffer. */
|
||||
+ line_end = memchr (p, eolbyte, buf + size - p);
|
||||
+ if (INT_MAX < line_end - p)
|
||||
error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
|
||||
|
||||
for (;;)
|
||||
@@ -247,27 +228,11 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||
int options = 0;
|
||||
if (!bol)
|
||||
options |= PCRE_NOTBOL;
|
||||
- if (multiline)
|
||||
- options |= PCRE_NO_UTF8_CHECK;
|
||||
|
||||
e = jit_exec (subject, line_end - subject, search_offset,
|
||||
options, sub);
|
||||
if (e != PCRE_ERROR_BADUTF8)
|
||||
- {
|
||||
- if (0 < e && multiline && sub[1] - sub[0] != 0)
|
||||
- {
|
||||
- char const *nl = memchr (subject + sub[0], eolbyte,
|
||||
- sub[1] - sub[0]);
|
||||
- if (nl)
|
||||
- {
|
||||
- /* This match crosses a line boundary; reject it. */
|
||||
- p = subject + sub[0];
|
||||
- line_end = nl;
|
||||
- continue;
|
||||
- }
|
||||
- }
|
||||
- break;
|
||||
- }
|
||||
+ break;
|
||||
int valid_bytes = sub[0];
|
||||
|
||||
/* Try to match the string before the encoding error. */
|
||||
@@ -337,15 +302,6 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
|
||||
beg = matchbeg;
|
||||
end = matchend;
|
||||
}
|
||||
- else if (multiline)
|
||||
- {
|
||||
- char const *prev_nl = memrchr (line_start - 1, eolbyte,
|
||||
- matchbeg - (line_start - 1));
|
||||
- char const *next_nl = memchr (matchend, eolbyte,
|
||||
- line_end + 1 - matchend);
|
||||
- beg = prev_nl + 1;
|
||||
- end = next_nl + 1;
|
||||
- }
|
||||
else
|
||||
{
|
||||
beg = line_start;
|
||||
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||
index d379821..2ade5be 100644
|
||||
--- a/tests/Makefile.am
|
||||
+++ b/tests/Makefile.am
|
||||
@@ -70,6 +70,7 @@ TESTS = \
|
||||
empty \
|
||||
empty-line \
|
||||
empty-line-mb \
|
||||
+ encoding-error \
|
||||
epipe \
|
||||
equiv-classes \
|
||||
ere \
|
||||
diff --git a/tests/Makefile.in b/tests/Makefile.in
|
||||
index 6de6f49..b5bd7b5 100644
|
||||
--- a/tests/Makefile.in
|
||||
+++ b/tests/Makefile.in
|
||||
@@ -1406,6 +1406,7 @@ TESTS = \
|
||||
empty \
|
||||
empty-line \
|
||||
empty-line-mb \
|
||||
+ encoding-error \
|
||||
epipe \
|
||||
equiv-classes \
|
||||
ere \
|
||||
@@ -1997,6 +1998,13 @@ empty-line-mb.log: empty-line-mb
|
||||
--log-file $$b.log --trs-file $$b.trs \
|
||||
$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
|
||||
"$$tst" $(AM_TESTS_FD_REDIRECT)
|
||||
+encoding-error.log: encoding-error
|
||||
+ @p='encoding-error'; \
|
||||
+ b='encoding-error'; \
|
||||
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
|
||||
+ --log-file $$b.log --trs-file $$b.trs \
|
||||
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
|
||||
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
|
||||
epipe.log: epipe
|
||||
@p='epipe'; \
|
||||
b='epipe'; \
|
||||
diff --git a/tests/encoding-error b/tests/encoding-error
|
||||
new file mode 100755
|
||||
index 0000000..fe52de2
|
||||
--- a/dev/null
|
||||
+++ b/tests/encoding-error
|
||||
@@ -0,0 +1,41 @@
|
||||
+#! /bin/sh
|
||||
+# Test grep's behavior on encoding errors.
|
||||
+#
|
||||
+# Copyright 2015 Free Software Foundation, Inc.
|
||||
+#
|
||||
+# Copying and distribution of this file, with or without modification,
|
||||
+# are permitted in any medium without royalty provided the copyright
|
||||
+# notice and this notice are preserved.
|
||||
+
|
||||
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
+
|
||||
+require_en_utf8_locale_
|
||||
+
|
||||
+LC_ALL=en_US.UTF-8
|
||||
+export LC_ALL
|
||||
+
|
||||
+printf 'Alfred Jones\n' > a || framework_failure_
|
||||
+printf 'John Smith\n' >j || framework_failure_
|
||||
+printf 'Pedro P\xe9rez\n' >p || framework_failure_
|
||||
+cat a p j >in || framework_failure_
|
||||
+
|
||||
+fail=0
|
||||
+
|
||||
+grep '^A' in >out || fail=1
|
||||
+compare a out || fail=1
|
||||
+
|
||||
+grep '^P' in >out || fail=1
|
||||
+printf 'Binary file in matches\n' >exp || framework_failure_
|
||||
+compare exp out || fail=1
|
||||
+
|
||||
+grep '^J' in >out || fail=1
|
||||
+compare j out || fail=1
|
||||
+
|
||||
+grep '^X' in >out
|
||||
+test $? = 1 || fail=1
|
||||
+compare /dev/null out || fail=1
|
||||
+
|
||||
+grep -a . in >out || fail=1
|
||||
+compare in out
|
||||
+
|
||||
+Exit $fail
|
@ -1,54 +0,0 @@
|
||||
From e938d22e2131972a6e9eaddbf850c6a04a4f750c Mon Sep 17 00:00:00 2001
|
||||
From: Jim Meyering <meyering@fb.com>
|
||||
Date: Wed, 4 Nov 2015 10:57:07 -0800
|
||||
Subject: [PATCH] tests: mark performance-related tests as expensive
|
||||
|
||||
These performance-related tests are slightly failure prone due to
|
||||
varying system load during the two runs.
|
||||
Marking these tests as "expensive" makes it so they are no longer run
|
||||
via "make check". You can still run them via make "check-expensive".
|
||||
This makes them less likely to be run by regular users.
|
||||
* tests/long-pattern-perf: Use expensive_.
|
||||
* tests/mb-non-UTF8-performance: Likewise.
|
||||
Reported by Jaroslav Skarvada in http://debbugs.gnu.org/21826
|
||||
and by Andreas Schwab in http://debbugs.gnu.org/21812.
|
||||
---
|
||||
tests/long-pattern-perf | 5 +++++
|
||||
tests/mb-non-UTF8-performance | 5 +++++
|
||||
2 files changed, 10 insertions(+)
|
||||
|
||||
diff --git a/tests/long-pattern-perf b/tests/long-pattern-perf
|
||||
index c222c02..2c9d080 100755
|
||||
--- a/tests/long-pattern-perf
|
||||
+++ b/tests/long-pattern-perf
|
||||
@@ -20,6 +20,11 @@
|
||||
|
||||
fail=0
|
||||
|
||||
+# This test is susceptible to failure due to differences in
|
||||
+# system load during the two test runs, so we'll mark it as
|
||||
+# "expensive", making it less likely to be run by regular users.
|
||||
+expensive_
|
||||
+
|
||||
echo x > in || framework_failure_
|
||||
# We could use seq -s '' (avoiding the tr filter), but I
|
||||
# suspect some version of seq does not honor that option.
|
||||
diff --git a/tests/mb-non-UTF8-performance b/tests/mb-non-UTF8-performance
|
||||
index 228361d..9bd5d39 100755
|
||||
--- a/tests/mb-non-UTF8-performance
|
||||
+++ b/tests/mb-non-UTF8-performance
|
||||
@@ -22,6 +22,11 @@
|
||||
|
||||
fail=0
|
||||
|
||||
+# This test is susceptible to failure due to differences in
|
||||
+# system load during the two test runs, so we'll mark it as
|
||||
+# "expensive", making it less likely to be run by regular users.
|
||||
+expensive_
|
||||
+
|
||||
# Make this large enough so that even on high-end systems
|
||||
# it incurs at least 5-10ms of user time.
|
||||
yes $(printf '%078d' 0) | head -400000 > in || framework_failure_
|
||||
--
|
||||
2.6.0
|
||||
|
@ -1,8 +1,8 @@
|
||||
diff --git a/src/grep.c b/src/grep.c
|
||||
index 7315bd0..2748fd3 100644
|
||||
index 7ffd122..6ef8a7c 100644
|
||||
--- a/src/grep.c
|
||||
+++ b/src/grep.c
|
||||
@@ -1813,17 +1813,20 @@ Output control:\n\
|
||||
@@ -1854,17 +1854,20 @@ Output control:\n\
|
||||
-D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
|
||||
ACTION is 'read' or 'skip'\n\
|
||||
-r, --recursive like --directories=recurse\n\
|
||||
|
@ -1,8 +1,8 @@
|
||||
diff --git a/doc/grep.in.1 b/doc/grep.in.1
|
||||
index 2b513a8..40ac40d 100644
|
||||
index ade11b4..6b91700 100644
|
||||
--- a/doc/grep.in.1
|
||||
+++ b/doc/grep.in.1
|
||||
@@ -314,7 +314,7 @@ Print
|
||||
@@ -320,7 +320,7 @@ Print
|
||||
.I NUM
|
||||
lines of trailing context after matching lines.
|
||||
Places a line containing a group separator
|
||||
@ -11,7 +11,7 @@ index 2b513a8..40ac40d 100644
|
||||
between contiguous groups of matches.
|
||||
With the
|
||||
.B \-o
|
||||
@@ -327,7 +327,7 @@ Print
|
||||
@@ -333,7 +333,7 @@ Print
|
||||
.I NUM
|
||||
lines of leading context before matching lines.
|
||||
Places a line containing a group separator
|
||||
@ -20,7 +20,7 @@ index 2b513a8..40ac40d 100644
|
||||
between contiguous groups of matches.
|
||||
With the
|
||||
.B \-o
|
||||
@@ -340,13 +340,24 @@ Print
|
||||
@@ -346,13 +346,24 @@ Print
|
||||
.I NUM
|
||||
lines of output context.
|
||||
Places a line containing a group separator
|
||||
@ -47,10 +47,10 @@ index 2b513a8..40ac40d 100644
|
||||
.TP
|
||||
.BR \-a ", " \-\^\-text
|
||||
diff --git a/src/grep.c b/src/grep.c
|
||||
index 2c5e09a..7315bd0 100644
|
||||
index 73c3651..7ffd122 100644
|
||||
--- a/src/grep.c
|
||||
+++ b/src/grep.c
|
||||
@@ -1837,6 +1837,8 @@ Context control:\n\
|
||||
@@ -1878,6 +1878,8 @@ Context control:\n\
|
||||
"));
|
||||
printf (_("\
|
||||
-NUM same as --context=NUM\n\
|
||||
|
@ -1,26 +0,0 @@
|
||||
diff --git a/tests/pcre-count b/tests/pcre-count
|
||||
--- a/tests/pcre-count
|
||||
+++ b/tests/pcre-count
|
||||
@@ -13,11 +13,17 @@ require_pcre_
|
||||
|
||||
fail=0
|
||||
|
||||
-printf 'a\n%032768d\nb\x0\n%032768d\na\n' 0 0 > in
|
||||
+printf 'a\n%032768d\nb\0\n%032768d\na\n' 0 0 > in || framework_failure_
|
||||
|
||||
-LC_ALL=C grep -P 'a' in | wc -l > exp
|
||||
+# grep will discover that the input is a binary file sooner if the
|
||||
+# page size is larger, so allow for either possible output.
|
||||
+printf 'a\nBinary file in matches\n' >exp1a || framework_failure_
|
||||
+printf 'Binary file in matches\n' >exp1b || framework_failure_
|
||||
+LC_ALL=C grep -P 'a' in >out || fail=1
|
||||
+compare exp1a out || compare exp1b out || fail=1
|
||||
|
||||
-LC_ALL=C grep -Pc 'a' in > out || fail=1
|
||||
-compare exp out || fail=1
|
||||
+printf '2\n' >exp2 || framework_failure_
|
||||
+LC_ALL=C grep -Pc 'a' in >out || fail=1
|
||||
+compare exp2 out || fail=1
|
||||
|
||||
Exit $fail
|
||||
|
29
grep.spec
29
grep.spec
@ -2,8 +2,8 @@
|
||||
|
||||
Summary: Pattern matching utilities
|
||||
Name: grep
|
||||
Version: 2.22
|
||||
Release: 7%{?dist}
|
||||
Version: 2.23
|
||||
Release: 1%{?dist}
|
||||
License: GPLv3+
|
||||
URL: http://www.gnu.org/software/grep/
|
||||
Group: Applications/Text
|
||||
@ -17,17 +17,6 @@ Source4: grepconf.sh
|
||||
Patch0: grep-2.22-man-fix-gs.patch
|
||||
# upstream ticket 39445
|
||||
Patch1: grep-2.22-help-align.patch
|
||||
# backported from upstream, mb-non-UTF8-performance and long-pattern-perf tests
|
||||
# marked as expensive and not run by default. The result is decided according
|
||||
# to measured runtime, which doesn't work reliable on the builders with
|
||||
# variable load.
|
||||
Patch2: grep-2.22-disable-performance-related-tests.patch
|
||||
# backported from upstream
|
||||
Patch3: grep-2.22-better-encoding-errors-handling.patch
|
||||
# backported from upstream, upstream bug#22028
|
||||
Patch4: grep-2.22-Pc-consistent-results.patch
|
||||
# backported from upstream, upstream bug#22350
|
||||
Patch5: grep-2.22-test-pcre-count-fix.patch
|
||||
Requires(post): /sbin/install-info
|
||||
Requires(preun): /sbin/install-info
|
||||
|
||||
@ -47,13 +36,6 @@ GNU grep is needed by many scripts, so it shall be installed on every system.
|
||||
%setup -q
|
||||
%patch0 -p1 -b .man-fix-gs
|
||||
%patch1 -p1 -b .help-align
|
||||
%patch2 -p1 -b .disable-performance-related-tests
|
||||
%patch3 -p1 -b .better-encoding-errors-handling
|
||||
%patch4 -p1 -b .Pc-consistent-results
|
||||
%patch5 -p1 -b .test-pcre-count-fix
|
||||
|
||||
chmod 755 tests/encoding-error
|
||||
chmod 755 tests/pcre-count
|
||||
|
||||
%build
|
||||
%global BUILD_FLAGS $RPM_OPT_FLAGS
|
||||
@ -104,6 +86,13 @@ fi
|
||||
%{_libexecdir}/grepconf.sh
|
||||
|
||||
%changelog
|
||||
* Fri Feb 5 2016 Jaroslav Škarvada <jskarvad@redhat.com> - 2.23-1
|
||||
- New version
|
||||
Resolves: rhbz#1305035
|
||||
- Dropped disable-performance-related-tests, better-encoding-errors-handling,
|
||||
Pc-consistent-results, and test-pcre-count-fix patches (all upstreamed)
|
||||
- De-fuzzified man-fx-gs, and help-align patches
|
||||
|
||||
* Wed Feb 03 2016 Fedora Release Engineering <releng@fedoraproject.org> - 2.22-7
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user