From 1c9ad4b954e5888ff6acdcb8e7073b123641b536 Mon Sep 17 00:00:00 2001 From: ph10 Date: Sat, 6 Feb 2016 16:40:59 +0000 Subject: [PATCH] Fix pcre2test loop when a callout is in an initial lookbehind. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@488 6239d852-aaf2-0410-a92c-79f79f948069 Petr Písař: Ported to 10.21. Signed-off-by: Petr Písař --- doc/pcre2test.1 | 4 +++- src/pcre2test.c | 38 ++++++++++++++++++++++++++++---------- testdata/testinput2 | 5 +++++ testdata/testoutput2 | 15 +++++++++++++++ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index b8eef93..4845cc7 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1475,7 +1475,9 @@ item to be tested. For example: This output indicates that callout number 0 occurred for a match attempt starting at the fourth character of the subject string, when the pointer was at the seventh character, and when the next pattern item was \ed. Just -one circumflex is output if the start and current positions are the same. +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. .P Callouts numbered 255 are assumed to be automatic callouts, inserted as a result of the \fB/auto_callout\fP pattern modifier. In this case, instead of diff --git a/src/pcre2test.c b/src/pcre2test.c index 0a5879e..bc9099f 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -2546,12 +2546,13 @@ return (int)(pp - p); /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed. For printing *MARK strings, a negative length is given. If handed a NULL file, -just counts chars without printing. */ +just counts chars without printing (because pchar() does that). */ static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f) { uint32_t c = 0; int yield = 0; + if (length < 0) length = p[-1]; while (length-- > 0) { @@ -2569,6 +2570,7 @@ while (length-- > 0) c = *p++; yield += pchar(c, utf, f); } + return yield; } #endif @@ -4975,6 +4977,7 @@ static int callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr) { uint32_t i, pre_start, post_start, subject_length; +PCRE2_SIZE current_position; BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0; @@ -5025,21 +5028,36 @@ if (callout_capture) } } -/* Re-print the subject in canonical form, the first time or if giving full -datails. On subsequent calls in the same match, we use pchars just to find the -printed lengths of the substrings. */ +/* Re-print the subject in canonical form (with escapes for non-printing +characters), the first time, or if giving full details. On subsequent calls in +the same match, we use PCHARS() just to find the printed lengths of the +substrings. */ if (f != NULL) fprintf(f, "--->"); +/* The subject before the match start. */ + PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); +/* If a lookbehind is involved, the current position may be earlier than the +match start. If so, use the match start instead. */ + +current_position = (cb->current_position >= cb->start_match)? + cb->current_position : cb->start_match; + +/* The subject between the match start and the current position. */ + PCHARS(post_start, cb->subject, cb->start_match, - cb->current_position - cb->start_match, utf, f); + current_position - cb->start_match, utf, f); -PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); +/* Print from the current position to the end. */ + +PCHARSV(cb->subject, current_position, cb->subject_length - current_position, + utf, f); -PCHARSV(cb->subject, cb->current_position, - cb->subject_length - cb->current_position, utf, f); +/* Calculate the total subject printed length (no print). */ + +PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); if (f != NULL) fprintf(f, "\n"); @@ -7021,7 +7039,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) struct rlimit rlim; if (U32OVERFLOW(uli)) { - fprintf(stderr, "+++ Argument for -S is too big\n"); + fprintf(stderr, "** Argument for -S is too big\n"); exit(1); } stack_size = (uint32_t)uli; @@ -7073,7 +7091,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) { if (U32OVERFLOW(uli)) { - fprintf(stderr, "+++ Argument for %s is too big\n", arg); + fprintf(stderr, "** Argument for %s is too big\n", arg); exit(1); } timeitm = (int)uli; diff --git a/testdata/testinput2 b/testdata/testinput2 index 86c149d..67c40ba 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4802,4 +4802,9 @@ a)"xI /(?J)(?'a'))(?'a')/ +/(?<=((?C)0))/ + 9010 +\= Expect no match + abc + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 87c7204..32aa066 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15161,4 +15161,19 @@ MK: A\x00b /(?J)(?'a'))(?'a')/ Failed: error 122 at offset 10: unmatched closing parenthesis +/(?<=((?C)0))/ + 9010 +--->9010 + 0 ^ 0 + 0 ^ 0 + 0: + 1: 0 +\= Expect no match + abc +--->abc + 0 ^ 0 + 0 ^ 0 + 0 ^ 0 +No match + # End of testinput2 -- 2.5.0