diff --git a/pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch b/pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch new file mode 100644 index 0000000..be2418a --- /dev/null +++ b/pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch @@ -0,0 +1,178 @@ +From 2ede5a4b4a98add3bbf982f5805e015e8c61c565 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 26 Jun 2018 16:51:43 +0000 +Subject: [PATCH] Fix two C++ wrapper bugs, unnoticed for years. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1735 2f5784b3-3f2a-0410-8824-cb99058d5e15 + +Petr Písař: Ported to 8.42. + +diff --git a/pcrecpp.cc b/pcrecpp.cc +index d09c9ab..77a2fed 100644 +--- a/pcrecpp.cc ++++ b/pcrecpp.cc +@@ -80,6 +80,24 @@ static const string empty_string; + // If the user doesn't ask for any options, we just use this one + static RE_Options default_options; + ++// Specials for the start of patterns. See comments where start_options is used ++// below. (PH June 2018) ++static const char *start_options[] = { ++ "(*UTF8)", ++ "(*UTF)", ++ "(*UCP)", ++ "(*NO_START_OPT)", ++ "(*NO_AUTO_POSSESS)", ++ "(*LIMIT_RECURSION=", ++ "(*LIMIT_MATCH=", ++ "(*CRLF)", ++ "(*CR)", ++ "(*BSR_UNICODE)", ++ "(*BSR_ANYCRLF)", ++ "(*ANYCRLF)", ++ "(*ANY)", ++ "" }; ++ + void RE::Init(const string& pat, const RE_Options* options) { + pattern_ = pat; + if (options == NULL) { +@@ -135,7 +153,49 @@ pcre* RE::Compile(Anchor anchor) { + } else { + // Tack a '\z' at the end of RE. Parenthesize it first so that + // the '\z' applies to all top-level alternatives in the regexp. +- string wrapped = "(?:"; // A non-counting grouping operator ++ ++ /* When this code was written (for PCRE 6.0) it was enough just to ++ parenthesize the entire pattern. Unfortunately, when the feature of ++ starting patterns with (*UTF8) or (*CR) etc. was added to PCRE patterns, ++ this code was never updated. This bug was not noticed till 2018, long after ++ PCRE became obsolescent and its maintainer no longer around. Since PCRE is ++ frozen, I have added a hack to check for all the existing "start of ++ pattern" specials - knowing that no new ones will ever be added. I am not a ++ C++ programmer, so the code style is no doubt crude. It is also ++ inefficient, but is only run when the pattern starts with "(*". ++ PH June 2018. */ ++ ++ string wrapped = ""; ++ ++ if (pattern_.c_str()[0] == '(' && pattern_.c_str()[1] == '*') { ++ int kk, klen, kmat; ++ for (;;) { // Loop for any number of leading items ++ ++ for (kk = 0; start_options[kk][0] != 0; kk++) { ++ klen = strlen(start_options[kk]); ++ kmat = strncmp(pattern_.c_str(), start_options[kk], klen); ++ if (kmat >= 0) break; ++ } ++ if (kmat != 0) break; // Not found ++ ++ // If the item ended in "=" we must copy digits up to ")". ++ ++ if (start_options[kk][klen-1] == '=') { ++ while (isdigit(pattern_.c_str()[klen])) klen++; ++ if (pattern_.c_str()[klen] != ')') break; // Syntax error ++ klen++; ++ } ++ ++ // Move the item from the pattern to the start of the wrapped string. ++ ++ wrapped += pattern_.substr(0, klen); ++ pattern_.erase(0, klen); ++ } ++ } ++ ++ // Wrap the rest of the pattern. ++ ++ wrapped += "(?:"; // A non-counting grouping operator + wrapped += pattern_; + wrapped += ")\\z"; + re = pcre_compile(wrapped.c_str(), pcre_options, +@@ -415,7 +475,7 @@ int RE::GlobalReplace(const StringPiece& rewrite, + matchend++; + } + // We also need to advance more than one char if we're in utf8 mode. +-#ifdef SUPPORT_UTF8 ++#ifdef SUPPORT_UTF + if (options_.utf8()) { + while (matchend < static_cast(str->length()) && + ((*str)[matchend] & 0xc0) == 0x80) +diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc +index 4b15fbe..255066f 100644 +--- a/pcrecpp_unittest.cc ++++ b/pcrecpp_unittest.cc +@@ -309,7 +309,7 @@ static void TestReplace() { + "@aa", + "@@@", + 3 }, +-#ifdef SUPPORT_UTF8 ++#ifdef SUPPORT_UTF + { "b*", + "bb", + "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 +@@ -327,7 +327,7 @@ static void TestReplace() { + { "", NULL, NULL, NULL, NULL, 0 } + }; + +-#ifdef SUPPORT_UTF8 ++#ifdef SUPPORT_UTF + const bool support_utf8 = true; + #else + const bool support_utf8 = false; +@@ -535,7 +535,7 @@ static void TestQuoteMetaLatin1() { + } + + static void TestQuoteMetaUtf8() { +-#ifdef SUPPORT_UTF8 ++#ifdef SUPPORT_UTF + TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); + TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 + TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) +@@ -1178,7 +1178,7 @@ int main(int argc, char** argv) { + CHECK(re.error().empty()); // Must have no error + } + +-#ifdef SUPPORT_UTF8 ++#ifdef SUPPORT_UTF + // Check UTF-8 handling + { + printf("Testing UTF-8 handling\n"); +@@ -1202,6 +1202,24 @@ int main(int argc, char** argv) { + CHECK(re_test1.FullMatch(utf8_string)); + RE re_test2("...", pcrecpp::UTF8()); + CHECK(re_test2.FullMatch(utf8_string)); ++ ++ // PH added these tests for leading option settings ++ ++ RE re_testZ1("(*UTF8)..."); ++ CHECK(re_testZ1.FullMatch(utf8_string)); ++ ++ RE re_testZ2("(*UTF)..."); ++ CHECK(re_testZ2.FullMatch(utf8_string)); ++ ++ RE re_testZ3("(*UCP)(*UTF)..."); ++ CHECK(re_testZ3.FullMatch(utf8_string)); ++ ++ RE re_testZ4("(*UCP)(*LIMIT_MATCH=1000)(*UTF)..."); ++ CHECK(re_testZ4.FullMatch(utf8_string)); ++ ++ RE re_testZ5("(*UCP)(*LIMIT_MATCH=1000)(*ANY)(*UTF)..."); ++ CHECK(re_testZ5.FullMatch(utf8_string)); ++ + + // Check that '.' matches one byte or UTF-8 character + // according to the mode. +@@ -1248,7 +1266,7 @@ int main(int argc, char** argv) { + CHECK(!match_sentence.FullMatch(target)); + CHECK(!match_sentence_re.FullMatch(target)); + } +-#endif /* def SUPPORT_UTF8 */ ++#endif /* def SUPPORT_UTF */ + + printf("Testing error reporting\n"); + +-- +2.14.4 + diff --git a/pcre.spec b/pcre.spec index 6355866..c14ca57 100644 --- a/pcre.spec +++ b/pcre.spec @@ -2,7 +2,7 @@ #%%global rcversion RC1 Name: pcre Version: 8.42 -Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library ## Source package only: @@ -35,6 +35,9 @@ Patch0: pcre-8.21-multilib.patch Patch1: pcre-8.32-refused_spelling_terminated.patch # Fix recursion stack estimator, upstream bug #2173, refused by upstream Patch2: pcre-8.41-fix_stack_estimator.patch +# Fix handling UTF and start-of-pattern options in C++ wrapper, +# upstream bug #2283, in upstream after 8.42 +Patch3: pcre-8.42-Fix-two-C-wrapper-bugs-unnoticed-for-years.patch BuildRequires: readline-devel BuildRequires: autoconf BuildRequires: automake @@ -119,6 +122,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. %patch0 -p1 %patch1 -p1 %patch2 -p2 +%patch3 -p1 # Because of rpath patch libtoolize --copy --force autoreconf -vif @@ -213,6 +217,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcretest.* %changelog +* Thu Jun 28 2018 Petr Pisar - 8.42-2 +- Fix handling UTF and start-of-pattern options in C++ wrapper + (upstream bug #2283) + * Tue Mar 20 2018 Petr Pisar - 8.42-1 - 8.42 bump