175 lines
6.7 KiB
Diff
175 lines
6.7 KiB
Diff
From c26d49eda45dd8a26e1de65a4430e84116266227 Mon Sep 17 00:00:00 2001
|
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
|
Date: Sun, 24 Dec 2017 10:27:13 +0000
|
|
Subject: [PATCH 2/3] Documentation update.
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@895 6239d852-aaf2-0410-a92c-79f79f948069
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
doc/html/pcre2demo.html | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
|
|
doc/pcre2demo.3 | 53 +++++++++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 106 insertions(+)
|
|
|
|
diff --git a/doc/html/pcre2demo.html b/doc/html/pcre2demo.html
|
|
index d64e16b..72754d3 100644
|
|
--- a/doc/html/pcre2demo.html
|
|
+++ b/doc/html/pcre2demo.html
|
|
@@ -228,6 +228,21 @@ pcre2_match_data_create_from_pattern() above. */
|
|
if (rc == 0)
|
|
printf("ovector was not big enough for all the captured substrings\n");
|
|
|
|
+/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
|
|
+to set the start of a match later than its end. In this demonstration program,
|
|
+we just detect this case and give up. */
|
|
+
|
|
+if (ovector[0] > ovector[1])
|
|
+ {
|
|
+ printf("\\K was used in an assertion to set the match start after its end.\n"
|
|
+ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
|
+ (char *)(subject + ovector[1]));
|
|
+ printf("Run abandoned\n");
|
|
+ pcre2_match_data_free(match_data);
|
|
+ pcre2_code_free(re);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
/* Show substrings stored in the output vector by number. Obviously, in a real
|
|
application you might want to do things other than print them. */
|
|
|
|
@@ -355,6 +370,29 @@ for (;;)
|
|
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
|
}
|
|
|
|
+ /* If the previous match was not an empty string, there is one tricky case to
|
|
+ consider. If a pattern contains \K within a lookbehind assertion at the
|
|
+ start, the end of the matched string can be at the offset where the match
|
|
+ started. Without special action, this leads to a loop that keeps on matching
|
|
+ the same substring. We must detect this case and arrange to move the start on
|
|
+ by one character. The pcre2_get_startchar() function returns the starting
|
|
+ offset that was passed to pcre2_match(). */
|
|
+
|
|
+ else
|
|
+ {
|
|
+ PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
|
+ if (start_offset <= startchar)
|
|
+ {
|
|
+ if (startchar >= subject_length) break; /* Reached end of subject. */
|
|
+ start_offset = startchar + 1; /* Advance by one character. */
|
|
+ if (utf8) /* If UTF-8, it may be more */
|
|
+ { /* than one code unit. */
|
|
+ for (; start_offset < subject_length; start_offset++)
|
|
+ if ((subject[start_offset] & 0xc0) != 0x80) break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
/* Run the next matching operation */
|
|
|
|
rc = pcre2_match(
|
|
@@ -419,6 +457,21 @@ for (;;)
|
|
if (rc == 0)
|
|
printf("ovector was not big enough for all the captured substrings\n");
|
|
|
|
+ /* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
|
+ assertion to set the start of a match later than its end. In this
|
|
+ demonstration program, we just detect this case and give up. */
|
|
+
|
|
+ if (ovector[0] > ovector[1])
|
|
+ {
|
|
+ printf("\\K was used in an assertion to set the match start after its end.\n"
|
|
+ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
|
+ (char *)(subject + ovector[1]));
|
|
+ printf("Run abandoned\n");
|
|
+ pcre2_match_data_free(match_data);
|
|
+ pcre2_code_free(re);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
/* As before, show substrings stored in the output vector by number, and then
|
|
also any named substrings. */
|
|
|
|
diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3
|
|
index c02dcd9..a9e58e2 100644
|
|
--- a/doc/pcre2demo.3
|
|
+++ b/doc/pcre2demo.3
|
|
@@ -228,6 +228,21 @@ pcre2_match_data_create_from_pattern() above. */
|
|
if (rc == 0)
|
|
printf("ovector was not big enough for all the captured substrings\en");
|
|
|
|
+/* We must guard against patterns such as /(?=.\eK)/ that use \eK in an assertion
|
|
+to set the start of a match later than its end. In this demonstration program,
|
|
+we just detect this case and give up. */
|
|
+
|
|
+if (ovector[0] > ovector[1])
|
|
+ {
|
|
+ printf("\e\eK was used in an assertion to set the match start after its end.\en"
|
|
+ "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]),
|
|
+ (char *)(subject + ovector[1]));
|
|
+ printf("Run abandoned\en");
|
|
+ pcre2_match_data_free(match_data);
|
|
+ pcre2_code_free(re);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
/* Show substrings stored in the output vector by number. Obviously, in a real
|
|
application you might want to do things other than print them. */
|
|
|
|
@@ -355,6 +370,29 @@ for (;;)
|
|
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
|
}
|
|
|
|
+ /* If the previous match was not an empty string, there is one tricky case to
|
|
+ consider. If a pattern contains \eK within a lookbehind assertion at the
|
|
+ start, the end of the matched string can be at the offset where the match
|
|
+ started. Without special action, this leads to a loop that keeps on matching
|
|
+ the same substring. We must detect this case and arrange to move the start on
|
|
+ by one character. The pcre2_get_startchar() function returns the starting
|
|
+ offset that was passed to pcre2_match(). */
|
|
+
|
|
+ else
|
|
+ {
|
|
+ PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
|
+ if (start_offset <= startchar)
|
|
+ {
|
|
+ if (startchar >= subject_length) break; /* Reached end of subject. */
|
|
+ start_offset = startchar + 1; /* Advance by one character. */
|
|
+ if (utf8) /* If UTF-8, it may be more */
|
|
+ { /* than one code unit. */
|
|
+ for (; start_offset < subject_length; start_offset++)
|
|
+ if ((subject[start_offset] & 0xc0) != 0x80) break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
/* Run the next matching operation */
|
|
|
|
rc = pcre2_match(
|
|
@@ -419,6 +457,21 @@ for (;;)
|
|
if (rc == 0)
|
|
printf("ovector was not big enough for all the captured substrings\en");
|
|
|
|
+ /* We must guard against patterns such as /(?=.\eK)/ that use \eK in an
|
|
+ assertion to set the start of a match later than its end. In this
|
|
+ demonstration program, we just detect this case and give up. */
|
|
+
|
|
+ if (ovector[0] > ovector[1])
|
|
+ {
|
|
+ printf("\e\eK was used in an assertion to set the match start after its end.\en"
|
|
+ "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]),
|
|
+ (char *)(subject + ovector[1]));
|
|
+ printf("Run abandoned\en");
|
|
+ pcre2_match_data_free(match_data);
|
|
+ pcre2_code_free(re);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
/* As before, show substrings stored in the output vector by number, and then
|
|
also any named substrings. */
|
|
|
|
--
|
|
2.13.6
|
|
|