Fix bad handling of empty lines in pcregrep tool

This commit is contained in:
Petr Písař 2013-05-13 13:59:15 +02:00
parent fc76cf56db
commit 81e9d1cee3
2 changed files with 154 additions and 1 deletions

View File

@ -0,0 +1,147 @@
From 038a52f90a30d93c5688a882620bfd392f386076 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Fri, 10 May 2013 11:40:06 +0000
Subject: [PATCH] Fix pcregrep so that it can find empty lines.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1324 2f5784b3-3f2a-0410-8824-cb99058d5e15
Petr Pisar: Ported to 8.33-RC1.
diff --git a/RunGrepTest b/RunGrepTest
index 94fd808..daaf8af 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -486,6 +486,22 @@ echo "---------------------------- Test 101 ------------------------------" >>te
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
+echo "---------------------------- Test 102 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -n "^$" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 103 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --only-matching "^$" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 104 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep -n --only-matching "^$" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
+echo "---------------------------- Test 105 -----------------------------" >>testtry
+(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1
+echo "RC=$?" >>testtry
+
# Now compare the results.
diff --git a/pcregrep.c b/pcregrep.c
index 2e0dc03..1d20733 100644
--- a/pcregrep.c
+++ b/pcregrep.c
@@ -1378,6 +1378,7 @@ to find all possible matches.
Arguments:
matchptr the start of the subject
length the length of the subject to match
+ options options for pcre_exec
startoffset where to start matching
offsets the offets vector to fill in
mrc address of where to put the result of pcre_exec()
@@ -1388,8 +1389,8 @@ Returns: TRUE if there was a match
*/
static BOOL
-match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
- int *mrc)
+match_patterns(char *matchptr, size_t length, unsigned int options,
+ int startoffset, int *offsets, int *mrc)
{
int i;
size_t slen = length;
@@ -1404,7 +1405,7 @@ if (slen > 200)
for (i = 1; p != NULL; p = p->next, i++)
{
*mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
- startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
+ startoffset, options, offsets, OFFSET_SIZE);
if (*mrc >= 0) return TRUE;
if (*mrc == PCRE_ERROR_NOMATCH) continue;
fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
@@ -1539,6 +1540,7 @@ while (ptr < endptr)
int endlinelength;
int mrc = 0;
int startoffset = 0;
+ unsigned int options = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
@@ -1628,9 +1630,12 @@ while (ptr < endptr)
/* Run through all the patterns until one matches or there is an error other
than NOMATCH. This code is in a subroutine so that it can be re-used for
- finding subsequent matches when colouring matched lines. */
+ finding subsequent matches when colouring matched lines. After finding one
+ match, set PCRE_NOTEMPTY to disable any further matches of null strings in
+ this line. */
- match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
+ match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
+ options = PCRE_NOTEMPTY;
/* If it's a match or a not-match (as required), do what's wanted. */
@@ -1871,7 +1876,8 @@ while (ptr < endptr)
{
startoffset = offsets[1];
if (startoffset >= (int)linelength + endlinelength ||
- !match_patterns(matchptr, length, startoffset, offsets, &mrc))
+ !match_patterns(matchptr, length, options, startoffset, offsets,
+ &mrc))
break;
FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 733b9d6..cf04091 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -705,3 +705,38 @@ RC=0
./testdata/grepinput:zero|a
./testdata/grepinput:.|zero|the|.
RC=0
+---------------------------- Test 102 -----------------------------
+2:
+5:
+7:
+9:
+12:
+14:
+RC=0
+---------------------------- Test 103 -----------------------------
+RC=0
+---------------------------- Test 104 -----------------------------
+2:
+5:
+7:
+9:
+12:
+14:
+RC=0
+---------------------------- Test 105 -----------------------------
+triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t2_txt s1_tag s_txt p_tag p_txt o_tag
+Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+
+triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t5_txt s1_tag s_txt p_tag p_txt o_tag
+o_txt
+
+triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
+
+triple: t7_txt s1_tag s_txt p_tag p_txt o_tag o_txt
+RC=0
--
1.8.1.4

View File

@ -2,7 +2,7 @@
%global rcversion RC1
Name: pcre
Version: 8.33
Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
Group: System Environment/Libraries
@ -15,6 +15,8 @@ Patch0: pcre-8.21-multilib.patch
Patch1: pcre-8.32-refused_spelling_terminated.patch
# Fix big-endian issues, accepted by upstream after 8.33-RC1
Patch2: pcre-8.33-RC1-Fix-retrieving-PCRE_INFO_MATCHLIMIT-and-PCRE_INFO_RE.patch
# Fix pcregrep on empty line, in upstream after 8.33-RC1
Patch3: pcre-8.33-RC1-Fix-pcregrep-so-that-it-can-find-empty-lines.patch
BuildRequires: readline-devel
# New libtool to get rid of rpath
BuildRequires: autoconf, automake, libtool
@ -57,6 +59,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
%patch0 -p1 -b .multilib
%patch1 -p1 -b .terminated_typos
%patch2 -p1 -b .fullinfo
%patch3 -p1 -b .pcregrep_empty_line
# Because of rpath patch
libtoolize --copy --force && autoreconf -vif
# One contributor's name is non-UTF-8
@ -120,6 +123,9 @@ make check
%{_mandir}/man1/pcretest.*
%changelog
* Mon May 13 2013 Petr Pisar <ppisar@redhat.com> - 8.33-0.2.RC1
- Fix bad handling of empty lines in pcregrep tool (bug #961789)
* Thu May 02 2013 Petr Pisar <ppisar@redhat.com> - 8.33-0.1.RC1
- 8.33-RC1 bump