From 584e38d1fda565187f2df0997e7ac9f25f583ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Zaoral?= Date: Mon, 21 Aug 2023 08:54:34 +0200 Subject: [PATCH] port to PCRE 2 Resolves: rhbz#1938979 --- 0004-zsh-enable-PCRE-locale-switching.patch | 64 ++ 0005-zsh-port-to-pcre2.patch | 863 ++++++++++++++++++++ zsh.spec | 11 +- 3 files changed, 936 insertions(+), 2 deletions(-) create mode 100644 0004-zsh-enable-PCRE-locale-switching.patch create mode 100644 0005-zsh-port-to-pcre2.patch diff --git a/0004-zsh-enable-PCRE-locale-switching.patch b/0004-zsh-enable-PCRE-locale-switching.patch new file mode 100644 index 0000000..a6f03bf --- /dev/null +++ b/0004-zsh-enable-PCRE-locale-switching.patch @@ -0,0 +1,64 @@ +From 1b421e4978440234fb73117c8505dad1ccc68d46 Mon Sep 17 00:00:00 2001 +From: Jun-ichi Takimoto +Date: Mon, 26 Sep 2022 10:52:50 +0900 +Subject: [PATCH] 50658 + test: Enable to switch between C/UTF-8 locales in + PCRE + +--- + Src/Modules/pcre.c | 10 ++-------- + Test/V07pcre.ztst | 11 +++++++++++ + 2 files changed, 13 insertions(+), 8 deletions(-) + +diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c +index 6289e003e..46875a59b 100644 +--- a/Src/Modules/pcre.c ++++ b/Src/Modules/pcre.c +@@ -47,8 +47,6 @@ zpcre_utf8_enabled(void) + #if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET) + static int have_utf8_pcre = -1; + +- /* value can toggle based on MULTIBYTE, so don't +- * be too eager with caching */ + if (have_utf8_pcre < -1) + return 0; + +@@ -56,15 +54,11 @@ zpcre_utf8_enabled(void) + return 0; + + if ((have_utf8_pcre == -1) && +- (!strcmp(nl_langinfo(CODESET), "UTF-8"))) { +- +- if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre)) ++ (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) { + have_utf8_pcre = -2; /* erk, failed to ask */ + } + +- if (have_utf8_pcre < 0) +- return 0; +- return have_utf8_pcre; ++ return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8")); + + #else + return 0; +diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst +index ca13419e5..22a0b64c7 100644 +--- a/Test/V07pcre.ztst ++++ b/Test/V07pcre.ztst +@@ -162,3 +162,14 @@ + echo $match[2] ) + 0:regression for segmentation fault, workers/38307 + >test ++ ++ LANG_SAVE=$LANG ++ [[ é =~ '^.\z' ]]; echo $? ++ LANG=C ++ [[ é =~ '^..\z' ]]; echo $? ++ LANG=$LANG_SAVE ++ [[ é =~ '^.\z' ]]; echo $? ++0:swich between C/UTF-8 locales ++>0 ++>0 ++>0 +-- +2.41.0 + diff --git a/0005-zsh-port-to-pcre2.patch b/0005-zsh-port-to-pcre2.patch new file mode 100644 index 0000000..6c1a2c0 --- /dev/null +++ b/0005-zsh-port-to-pcre2.patch @@ -0,0 +1,863 @@ +From b62e911341c8ec7446378b477c47da4256053dc0 Mon Sep 17 00:00:00 2001 +From: Oliver Kiddle +Date: Sat, 13 May 2023 00:53:32 +0200 +Subject: [PATCH 1/3] 51723: migrate pcre module to pcre2 + +--- + Src/Modules/pcre.c | 223 ++++++++++++++++++--------------------------- + Test/V07pcre.ztst | 13 ++- + configure.ac | 20 ++-- + 3 files changed, 109 insertions(+), 149 deletions(-) + +diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c +index 46875a59b..079ecc2c5 100644 +--- a/Src/Modules/pcre.c ++++ b/Src/Modules/pcre.c +@@ -34,11 +34,11 @@ + #define CPCRE_PLAIN 0 + + /**/ +-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) +-#include ++#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) ++#define PCRE2_CODE_UNIT_WIDTH 8 ++#include + +-static pcre *pcre_pattern; +-static pcre_extra *pcre_hints; ++static pcre2_code *pcre_pattern; + + /**/ + static int +@@ -54,8 +54,8 @@ zpcre_utf8_enabled(void) + return 0; + + if ((have_utf8_pcre == -1) && +- (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))) { +- have_utf8_pcre = -2; /* erk, failed to ask */ ++ (pcre2_config(PCRE2_CONFIG_UNICODE, &have_utf8_pcre))) { ++ have_utf8_pcre = -2; /* erk, failed to ask */ + } + + return (have_utf8_pcre == 1) && (!strcmp(nl_langinfo(CODESET), "UTF-8")); +@@ -69,115 +69,87 @@ zpcre_utf8_enabled(void) + static int + bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func)) + { +- int pcre_opts = 0, pcre_errptr, target_len; +- const char *pcre_error; ++ uint32_t pcre_opts = 0; ++ int target_len; ++ int pcre_error; ++ PCRE2_SIZE pcre_offset; + char *target; + +- if(OPT_ISSET(ops,'a')) pcre_opts |= PCRE_ANCHORED; +- if(OPT_ISSET(ops,'i')) pcre_opts |= PCRE_CASELESS; +- if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE; +- if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED; +- if(OPT_ISSET(ops,'s')) pcre_opts |= PCRE_DOTALL; ++ if (OPT_ISSET(ops, 'a')) pcre_opts |= PCRE2_ANCHORED; ++ if (OPT_ISSET(ops, 'i')) pcre_opts |= PCRE2_CASELESS; ++ if (OPT_ISSET(ops, 'm')) pcre_opts |= PCRE2_MULTILINE; ++ if (OPT_ISSET(ops, 'x')) pcre_opts |= PCRE2_EXTENDED; ++ if (OPT_ISSET(ops, 's')) pcre_opts |= PCRE2_DOTALL; + + if (zpcre_utf8_enabled()) +- pcre_opts |= PCRE_UTF8; +- +-#ifdef HAVE_PCRE_STUDY +- if (pcre_hints) +-#ifdef PCRE_CONFIG_JIT +- pcre_free_study(pcre_hints); +-#else +- pcre_free(pcre_hints); +-#endif +- pcre_hints = NULL; +-#endif ++ pcre_opts |= PCRE2_UTF; + + if (pcre_pattern) +- pcre_free(pcre_pattern); ++ pcre2_code_free(pcre_pattern); + pcre_pattern = NULL; + + target = ztrdup(*args); + unmetafy(target, &target_len); + +- if ((int)strlen(target) != target_len) { +- zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern"); +- } +- +- pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL); ++ pcre_pattern = pcre2_compile((PCRE2_SPTR) target, (PCRE2_SIZE) target_len, ++ pcre_opts, &pcre_error, &pcre_offset, NULL); + + free(target); + + if (pcre_pattern == NULL) + { +- zwarnnam(nam, "error in regex: %s", pcre_error); ++ PCRE2_UCHAR buffer[256]; ++ pcre2_get_error_message(pcre_error, buffer, sizeof(buffer)); ++ zwarnnam(nam, "error in regex: %s", buffer); + return 1; + } + + return 0; + } + +-/**/ +-#ifdef HAVE_PCRE_STUDY +- + /**/ + static int + bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int func)) + { +- const char *pcre_error; +- + if (pcre_pattern == NULL) + { + zwarnnam(nam, "no pattern has been compiled for study"); + return 1; + } +- +- if (pcre_hints) +-#ifdef PCRE_CONFIG_JIT +- pcre_free_study(pcre_hints); +-#else +- pcre_free(pcre_hints); +-#endif +- pcre_hints = NULL; + +- pcre_hints = pcre_study(pcre_pattern, 0, &pcre_error); +- if (pcre_error != NULL) +- { +- zwarnnam(nam, "error while studying regex: %s", pcre_error); +- return 1; ++ int jit = 0; ++ if (!pcre2_config(PCRE2_CONFIG_JIT, &jit) && jit) { ++ if (pcre2_jit_compile(pcre_pattern, PCRE2_JIT_COMPLETE) < 0) { ++ zwarnnam(nam, "error while studying regex"); ++ return 1; ++ } + } + + return 0; + } + +-/**/ +-#else /* !HAVE_PCRE_STUDY */ +- +-# define bin_pcre_study bin_notavail +- +-/**/ +-#endif /* !HAVE_PCRE_STUDY */ +- +-/**/ + static int +-zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, +- char *substravar, int want_offset_pair, int matchedinarr, +- int want_begin_end) ++zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, ++ char *matchvar, char *substravar, int want_offset_pair, ++ int matchedinarr, int want_begin_end) + { +- char **captures, *match_all, **matches; ++ PCRE2_SIZE *ovec; ++ char *match_all, **matches; + char offset_all[50]; + int capture_start = 1; + + if (matchedinarr) { +- /* bash-style captures[0] entire-matched string in the array */ ++ /* bash-style ovec[0] entire-matched string in the array */ + capture_start = 0; + } + +- /* captures[0] will be entire matched string, [1] first substring */ +- if (!pcre_get_substring_list(arg, ovec, captured_count, (const char ***)&captures)) { +- int nelem = arrlen(captures)-1; ++ /* ovec[0] will be entire matched string, [1] first substring */ ++ ovec = pcre2_get_ovector_pointer(mdata); ++ if (ovec) { ++ int nelem = captured_count - 1; + /* Set to the offsets of the complete match */ + if (want_offset_pair) { +- sprintf(offset_all, "%d %d", ovec[0], ovec[1]); ++ sprintf(offset_all, "%ld %ld", ovec[0], ovec[1]); + setsparam("ZPCRE_OP", ztrdup(offset_all)); + } + /* +@@ -186,7 +158,7 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, + * ovec is length 2*(1+capture_list_length) + */ + if (matchvar) { +- match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP); ++ match_all = metafy(arg + ovec[0], ovec[1] - ovec[0], META_DUP); + setsparam(matchvar, match_all); + } + /* +@@ -201,16 +173,12 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, + */ + if (substravar && + (!want_begin_end || nelem)) { +- char **x, **y; ++ char **x; + int vec_off, i; +- y = &captures[capture_start]; + matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start)); +- for (i = capture_start; i < captured_count; i++, y++) { ++ for (i = capture_start; i < captured_count; i++) { + vec_off = 2*i; +- if (*y) +- *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP); +- else +- *x++ = NULL; ++ *x++ = metafy(arg + ovec[vec_off], ovec[vec_off+1]-ovec[vec_off], META_DUP); + } + *x = NULL; + setaparam(substravar, matches); +@@ -247,7 +215,8 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, + setiparam("MEND", offs + !isset(KSHARRAYS) - 1); + if (nelem) { + char **mbegin, **mend, **bptr, **eptr; +- int i, *ipair; ++ int i; ++ size_t *ipair; + + bptr = mbegin = zalloc(sizeof(char*)*(nelem+1)); + eptr = mend = zalloc(sizeof(char*)*(nelem+1)); +@@ -287,8 +256,6 @@ zpcre_get_substrings(char *arg, int *ovec, int captured_count, char *matchvar, + setaparam("mend", mend); + } + } +- +- pcre_free_substring_list((const char **)captures); + } + + return 0; +@@ -314,7 +281,8 @@ getposint(char *instr, char *nam) + static int + bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + { +- int ret, capcount, *ovec, ovecsize, c; ++ int ret, c; ++ pcre2_match_data *pcre_mdata = NULL; + char *matched_portion = NULL; + char *plaintext = NULL; + char *receptacle = NULL; +@@ -344,36 +312,30 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + /* For the entire match, 'Return' the offset byte positions instead of the matched string */ + if(OPT_ISSET(ops,'b')) want_offset_pair = 1; + +- if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount))) +- { +- zwarnnam(nam, "error %d in fullinfo", ret); +- return 1; +- } +- +- ovecsize = (capcount+1)*3; +- ovec = zalloc(ovecsize*sizeof(int)); +- + plaintext = ztrdup(*args); + unmetafy(plaintext, &subject_len); + + if (offset_start > 0 && offset_start >= subject_len) +- ret = PCRE_ERROR_NOMATCH; +- else +- ret = pcre_exec(pcre_pattern, pcre_hints, plaintext, subject_len, offset_start, 0, ovec, ovecsize); ++ ret = PCRE2_ERROR_NOMATCH; ++ else { ++ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); ++ ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, ++ offset_start, 0, pcre_mdata, NULL); ++ } + + if (ret==0) return_value = 0; +- else if (ret==PCRE_ERROR_NOMATCH) /* no match */; ++ else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; + else if (ret>0) { +- zpcre_get_substrings(plaintext, ovec, ret, matched_portion, receptacle, ++ zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle, + want_offset_pair, 0, 0); + return_value = 0; + } + else { +- zwarnnam(nam, "error in pcre_exec [%d]", ret); ++ zwarnnam(nam, "error in pcre2_match [%d]", ret); + } + +- if (ovec) +- zfree(ovec, ovecsize*sizeof(int)); ++ if (pcre_mdata) ++ pcre2_match_data_free(pcre_mdata); + zsfree(plaintext); + + return return_value; +@@ -383,17 +345,19 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + static int + cond_pcre_match(char **a, int id) + { +- pcre *pcre_pat; +- const char *pcre_err; ++ pcre2_code *pcre_pat = NULL; ++ int pcre_err; ++ PCRE2_SIZE pcre_erroff; + char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar, *svar; +- int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; ++ int r = 0, pcre_opts = 0; ++ pcre2_match_data *pcre_mdata = NULL; + int lhstr_plain_len, rhre_plain_len; + int return_value = 0; + + if (zpcre_utf8_enabled()) +- pcre_opts |= PCRE_UTF8; ++ pcre_opts |= PCRE2_UTF; + if (isset(REMATCHPCRE) && !isset(CASEMATCH)) +- pcre_opts |= PCRE_CASELESS; ++ pcre_opts |= PCRE2_CASELESS; + + lhstr = cond_str(a,0,0); + rhre = cond_str(a,1,0); +@@ -401,9 +365,6 @@ cond_pcre_match(char **a, int id) + rhre_plain = ztrdup(rhre); + unmetafy(lhstr_plain, &lhstr_plain_len); + unmetafy(rhre_plain, &rhre_plain_len); +- pcre_pat = NULL; +- ov = NULL; +- ovsize = 0; + + if (isset(BASHREMATCH)) { + svar = NULL; +@@ -415,27 +376,27 @@ cond_pcre_match(char **a, int id) + + switch(id) { + case CPCRE_PLAIN: +- if ((int)strlen(rhre_plain) != rhre_plain_len) { +- zwarn("embedded NULs in PCRE pattern terminate pattern"); +- } +- pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL); +- if (pcre_pat == NULL) { +- zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err); ++ if (!(pcre_pat = pcre2_compile((PCRE2_SPTR) rhre_plain, ++ (PCRE2_SIZE) rhre_plain_len, pcre_opts, ++ &pcre_err, &pcre_erroff, NULL))) ++ { ++ PCRE2_UCHAR buffer[256]; ++ pcre2_get_error_message(pcre_err, buffer, sizeof(buffer)); ++ zwarn("failed to compile regexp /%s/: %s", rhre, buffer); + break; + } +- pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); +- ovsize = (capcnt+1)*3; +- ov = zalloc(ovsize*sizeof(int)); +- r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize); +- /* r < 0 => error; r==0 match but not enough size in ov ++ pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pat, NULL); ++ r = pcre2_match(pcre_pat, (PCRE2_SPTR8) lhstr_plain, lhstr_plain_len, ++ 0, 0, pcre_mdata, NULL); ++ /* r < 0 => error; r==0 match but not enough size in match data + * r > 0 => (r-1) substrings found; r==1 => no substrings + */ + if (r==0) { +- zwarn("reportable zsh problem: pcre_exec() returned 0"); ++ zwarn("reportable zsh problem: pcre2_match() returned 0"); + return_value = 1; + break; + } +- else if (r==PCRE_ERROR_NOMATCH) { ++ else if (r == PCRE2_ERROR_NOMATCH) { + return_value = 0; /* no match */ + break; + } +@@ -444,7 +405,7 @@ cond_pcre_match(char **a, int id) + break; + } + else if (r>0) { +- zpcre_get_substrings(lhstr_plain, ov, r, svar, avar, 0, ++ zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0, + isset(BASHREMATCH), + !isset(BASHREMATCH)); + return_value = 1; +@@ -457,10 +418,10 @@ cond_pcre_match(char **a, int id) + free(lhstr_plain); + if(rhre_plain) + free(rhre_plain); ++ if (pcre_mdata) ++ pcre2_match_data_free(pcre_mdata); + if (pcre_pat) +- pcre_free(pcre_pat); +- if (ov) +- zfree(ov, ovsize*sizeof(int)); ++ pcre2_code_free(pcre_pat); + + return return_value; + } +@@ -489,11 +450,11 @@ static struct builtin bintab[] = { + + static struct features module_features = { + bintab, sizeof(bintab)/sizeof(*bintab), +-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) ++#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) + cotab, sizeof(cotab)/sizeof(*cotab), +-#else /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ ++#else /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ + NULL, 0, +-#endif /* !(HAVE_PCRE_COMPILE && HAVE_PCRE_EXEC) */ ++#endif /* !(HAVE_PCRE2_COMPILE_8 && HAVE_PCRE2_H) */ + NULL, 0, + NULL, 0, + 0 +@@ -540,19 +501,9 @@ cleanup_(Module m) + int + finish_(UNUSED(Module m)) + { +-#if defined(HAVE_PCRE_COMPILE) && defined(HAVE_PCRE_EXEC) +-#ifdef HAVE_PCRE_STUDY +- if (pcre_hints) +-#ifdef PCRE_CONFIG_JIT +- pcre_free_study(pcre_hints); +-#else +- pcre_free(pcre_hints); +-#endif +- pcre_hints = NULL; +-#endif +- ++#if defined(HAVE_PCRE2_COMPILE_8) && defined(HAVE_PCRE2_H) + if (pcre_pattern) +- pcre_free(pcre_pattern); ++ pcre2_code_free(pcre_pattern); + pcre_pattern = NULL; + #endif + +diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst +index 22a0b64c7..6eb366964 100644 +--- a/Test/V07pcre.ztst ++++ b/Test/V07pcre.ztst +@@ -117,12 +117,17 @@ + >78884; ZPCRE_OP: 25 30 + >90210; ZPCRE_OP: 31 36 + +-# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed) ++# Embedded NULs allowed in plaintext, in RE, pcre supports \0 as two-chars + [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]] + print "${#MATCH}; ${#match[1]}; ${#match[2]}" + 0:ensure ASCII NUL passes in and out of matched plaintext + >6; 3; 3 + ++# PCRE2 supports NULs also in the RE ++ [[ $'a\0b\0c' =~ $'^(.\0)+' ]] && print "${#MATCH}; ${#match[1]}" ++0:ensure ASCII NUL works also in the regex ++>4; 2 ++ + # Ensure the long-form infix operator works + [[ foo -pcre-match ^f..$ ]] + print $? +@@ -169,7 +174,11 @@ + [[ é =~ '^..\z' ]]; echo $? + LANG=$LANG_SAVE + [[ é =~ '^.\z' ]]; echo $? +-0:swich between C/UTF-8 locales ++0:switch between C/UTF-8 locales + >0 + >0 + >0 ++ ++ [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}" ++0:empty capture ++>3; 1; 0 +diff --git a/configure.ac b/configure.ac +index d8a17791a..4710d1659 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -438,7 +438,7 @@ fi], + + dnl Do you want to look for pcre support? + AC_ARG_ENABLE(pcre, +-AS_HELP_STRING([--enable-pcre],[enable the search for the pcre library (may create run-time library dependencies)])) ++AS_HELP_STRING([--enable-pcre],[enable the search for the pcre2 library (may create run-time library dependencies)])) + + dnl Do you want to look for capability support? + AC_ARG_ENABLE(cap, +@@ -652,13 +652,12 @@ AC_HEADER_SYS_WAIT + + oldcflags="$CFLAGS" + if test x$enable_pcre = xyes; then +-AC_CHECK_PROG([PCRECONF], pcre-config, pcre-config) +-dnl Typically (meaning on this single RedHat 9 box in front of me) +-dnl pcre-config --cflags produces a -I output which needs to go into ++AC_CHECK_PROG([PCRECONF], pcre2-config, pcre2-config) ++dnl pcre2-config --cflags may produce a -I output which needs to go into + dnl CPPFLAGS else configure's preprocessor tests don't pick it up, + dnl producing a warning. +-if test "x$ac_cv_prog_PCRECONF" = xpcre-config; then +- CPPFLAGS="$CPPFLAGS `pcre-config --cflags`" ++if test "x$ac_cv_prog_PCRECONF" = xpcre2-config; then ++ CPPFLAGS="$CPPFLAGS `pcre2-config --cflags`" + fi + fi + +@@ -668,9 +667,10 @@ AC_CHECK_HEADERS(sys/time.h sys/times.h sys/select.h termcap.h termio.h \ + locale.h errno.h stdio.h stdarg.h varargs.h stdlib.h \ + unistd.h sys/capability.h \ + utmp.h utmpx.h sys/types.h pwd.h grp.h poll.h sys/mman.h \ +- netinet/in_systm.h pcre.h langinfo.h wchar.h stddef.h \ ++ netinet/in_systm.h langinfo.h wchar.h stddef.h \ + sys/stropts.h iconv.h ncurses.h ncursesw/ncurses.h \ + ncurses/ncurses.h) ++AC_CHECK_HEADERS([pcre2.h],,,[#define PCRE2_CODE_UNIT_WIDTH 8]) + if test x$dynamic = xyes; then + AC_CHECK_HEADERS(dlfcn.h) + AC_CHECK_HEADERS(dl.h) +@@ -948,9 +948,7 @@ if test "x$ac_found_iconv" = "xyes"; then + fi + + if test x$enable_pcre = xyes; then +-dnl pcre-config should probably be employed here +-dnl AC_SEARCH_LIBS(pcre_compile, pcre) +- LIBS="`$ac_cv_prog_PCRECONF --libs` $LIBS" ++ LIBS="`$ac_cv_prog_PCRECONF --libs8` $LIBS" + fi + + dnl --------------------- +@@ -1313,7 +1311,7 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \ + pathconf sysconf \ + tgetent tigetflag tigetnum tigetstr setupterm initscr resize_term \ + getcchar setcchar waddwstr wget_wch win_wch use_default_colors \ +- pcre_compile pcre_study pcre_exec \ ++ pcre2_compile_8 \ + nl_langinfo \ + erand48 open_memstream \ + posix_openpt \ +-- +2.41.0 + + +From f3f371deb376478176866fd770fbcf9bc0d0609f Mon Sep 17 00:00:00 2001 +From: Oliver Kiddle +Date: Sat, 13 May 2023 00:56:48 +0200 +Subject: [PATCH 2/3] 51728: assign pcre named capture groups to a hash + +--- + Doc/Zsh/mod_pcre.yo | 10 ++++++---- + Src/Modules/pcre.c | 43 +++++++++++++++++++++++++++++++++---------- + Test/V07pcre.ztst | 14 ++++++++++++++ + 3 files changed, 53 insertions(+), 14 deletions(-) + +diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo +index c2817f519..6d073985d 100644 +--- a/Doc/Zsh/mod_pcre.yo ++++ b/Doc/Zsh/mod_pcre.yo +@@ -20,12 +20,12 @@ including those that indicate newline. + ) + findex(pcre_study) + item(tt(pcre_study))( +-Studies the previously-compiled PCRE which may result in faster +-matching. ++Requests JIT compilation for the previously-compiled PCRE which ++may result in faster matching. + ) + findex(pcre_match) + item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \ +-[ tt(-n) var(offset) ] [ tt(-b) ] var(string))( ++[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))( + Returns successfully if tt(string) matches the previously-compiled + PCRE. + +@@ -36,7 +36,9 @@ substrings, unless the tt(-a) option is given, in which + case it will set the array var(arr). Similarly, the variable + tt(MATCH) will be set to the entire matched portion of the + string, unless the tt(-v) option is given, in which case the variable +-var(var) will be set. ++var(var) will be set. Furthermore, any named captures will ++be stored in the associative array tt(.pcre.match) unless an ++alternative is given with tt(-A). + No variables are altered if there is no successful match. + A tt(-n) option starts searching for a match from the + byte var(offset) position in var(string). If the tt(-b) option is given, +diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c +index 079ecc2c5..6be1f76e2 100644 +--- a/Src/Modules/pcre.c ++++ b/Src/Modules/pcre.c +@@ -129,14 +129,17 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f + } + + static int +-zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, +- char *matchvar, char *substravar, int want_offset_pair, +- int matchedinarr, int want_begin_end) ++zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata, ++ int captured_count, char *matchvar, char *substravar, char *namedassoc, ++ int want_offset_pair, int matchedinarr, int want_begin_end) + { + PCRE2_SIZE *ovec; + char *match_all, **matches; + char offset_all[50]; + int capture_start = 1; ++ int vec_off; ++ PCRE2_SPTR ntable; /* table of named captures */ ++ uint32_t ncount, nsize; + + if (matchedinarr) { + /* bash-style ovec[0] entire-matched string in the array */ +@@ -174,7 +177,7 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, + if (substravar && + (!want_begin_end || nelem)) { + char **x; +- int vec_off, i; ++ int i; + matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start)); + for (i = capture_start; i < captured_count; i++) { + vec_off = 2*i; +@@ -184,6 +187,23 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count, + setaparam(substravar, matches); + } + ++ if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount ++ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize) ++ && !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable)) ++ { ++ char **hash, **hashptr; ++ uint32_t nidx; ++ hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *)); ++ for (nidx = 0; nidx < ncount; nidx++) { ++ vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1]; ++ /* would metafy the key but pcre limits characters in the name */ ++ *hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2); ++ *hashptr++ = metafy(arg + ovec[vec_off], ++ ovec[vec_off+1]-ovec[vec_off], META_DUP); ++ } ++ sethparam(namedassoc, hash); ++ } ++ + if (want_begin_end) { + /* + * cond-infix rather than builtin; also not bash; so we set a bunch +@@ -286,6 +306,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + char *matched_portion = NULL; + char *plaintext = NULL; + char *receptacle = NULL; ++ char *named = ".pcre.match"; + int return_value = 1; + /* The subject length and offset start are both int values in pcre_exec */ + int subject_len; +@@ -305,6 +326,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + if(OPT_HASARG(ops,c='v')) { + matched_portion = OPT_ARG(ops,c); + } ++ if (OPT_HASARG(ops, c='A')) { ++ named = OPT_ARG(ops, c); ++ } + if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */ + if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0) + return 1; +@@ -326,8 +350,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + if (ret==0) return_value = 0; + else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; + else if (ret>0) { +- zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle, +- want_offset_pair, 0, 0); ++ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion, ++ receptacle, named, want_offset_pair, 0, 0); + return_value = 0; + } + else { +@@ -405,9 +429,8 @@ cond_pcre_match(char **a, int id) + break; + } + else if (r>0) { +- zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0, +- isset(BASHREMATCH), +- !isset(BASHREMATCH)); ++ zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar, ++ ".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH)); + return_value = 1; + break; + } +@@ -443,7 +466,7 @@ static struct conddef cotab[] = { + + static struct builtin bintab[] = { + BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL), +- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL), ++ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL), + BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL) + }; + +diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst +index 6eb366964..027fea3aa 100644 +--- a/Test/V07pcre.ztst ++++ b/Test/V07pcre.ztst +@@ -182,3 +182,17 @@ + [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}" + 0:empty capture + >3; 1; 0 ++ ++ [[ category/name-12345 =~ '(?x)^ ++ (? [^/]* ) / ++ (? ++ (? \w+ ) - ++ (? \d+ ))$' ]] ++ typeset -p1 .pcre.match ++0:named captures ++>typeset -g -A .pcre.match=( ++> [category]=category ++> [name]=name ++> [package]=name-12345 ++> [version]=12345 ++>) +-- +2.41.0 + + +From b4d1c756f50909b4a13e5c8fe5f26f71e9d54f63 Mon Sep 17 00:00:00 2001 +From: Oliver Kiddle +Date: Sat, 13 May 2023 00:59:00 +0200 +Subject: [PATCH 3/3] 51738: support pcre's alternative DFA matching algorithm + +--- + Doc/Zsh/mod_pcre.yo | 6 ++++- + Src/Modules/pcre.c | 53 ++++++++++++++++++++++++++++++--------------- + Test/V07pcre.ztst | 5 +++++ + 3 files changed, 46 insertions(+), 18 deletions(-) + +diff --git a/Doc/Zsh/mod_pcre.yo b/Doc/Zsh/mod_pcre.yo +index 6d073985d..da73ac85a 100644 +--- a/Doc/Zsh/mod_pcre.yo ++++ b/Doc/Zsh/mod_pcre.yo +@@ -25,7 +25,7 @@ may result in faster matching. + ) + findex(pcre_match) + item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \ +-[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))( ++[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-bd) ] var(string))( + Returns successfully if tt(string) matches the previously-compiled + PCRE. + +@@ -69,6 +69,10 @@ print -l $accum) + ) + enditem() + ++The option tt(-d) uses the alternative breadth-first DFA search algorithm of ++pcre. This sets tt(match), or the array given with tt(-a), to all the matches ++found from the same start point in the subject. ++ + The tt(zsh/pcre) module makes available the following test condition: + + startitem() +diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c +index 6be1f76e2..96f3c6e65 100644 +--- a/Src/Modules/pcre.c ++++ b/Src/Modules/pcre.c +@@ -305,30 +305,29 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + pcre2_match_data *pcre_mdata = NULL; + char *matched_portion = NULL; + char *plaintext = NULL; +- char *receptacle = NULL; +- char *named = ".pcre.match"; ++ char *receptacle; ++ char *named = NULL; + int return_value = 1; + /* The subject length and offset start are both int values in pcre_exec */ + int subject_len; + int offset_start = 0; + int want_offset_pair = 0; ++ int use_dfa = 0; + + if (pcre_pattern == NULL) { + zwarnnam(nam, "no pattern has been compiled"); + return 1; + } + +- matched_portion = "MATCH"; +- receptacle = "match"; +- if(OPT_HASARG(ops,c='a')) { +- receptacle = OPT_ARG(ops,c); +- } +- if(OPT_HASARG(ops,c='v')) { +- matched_portion = OPT_ARG(ops,c); +- } +- if (OPT_HASARG(ops, c='A')) { +- named = OPT_ARG(ops, c); ++ if (!(use_dfa = OPT_ISSET(ops, 'd'))) { ++ matched_portion = OPT_HASARG(ops, c='v') ? OPT_ARG(ops, c) : "MATCH"; ++ named = OPT_HASARG(ops, c='A') ? OPT_ARG(ops, c) : ".pcre.match"; ++ } else if (OPT_HASARG(ops, c='v') || OPT_HASARG(ops, c='A')) { ++ zwarnnam(nam, "-d cannot be combined with -%c", c); ++ return 1; + } ++ receptacle = OPT_HASARG(ops, 'a') ? OPT_ARG(ops, 'a') : "match"; ++ + if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */ + if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0) + return 1; +@@ -341,7 +340,25 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + + if (offset_start > 0 && offset_start >= subject_len) + ret = PCRE2_ERROR_NOMATCH; +- else { ++ else if (use_dfa) { ++ PCRE2_SIZE old, wscount = 128, capcount = 128; ++ void *workspace = zhalloc(sizeof(int) * wscount); ++ pcre_mdata = pcre2_match_data_create(capcount, NULL); ++ do { ++ ret = pcre2_dfa_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, ++ offset_start, 0, pcre_mdata, NULL, (int *) workspace, wscount); ++ if (ret == PCRE2_ERROR_DFA_WSSIZE) { ++ old = wscount; ++ wscount += wscount / 2; ++ workspace = hrealloc(workspace, sizeof(int) * old, sizeof(int) * wscount); ++ } else if (ret == 0) { ++ capcount += capcount / 2; ++ pcre2_match_data_free(pcre_mdata); ++ pcre_mdata = pcre2_match_data_create(capcount, NULL); ++ } else ++ break; ++ } while(1); ++ } else { + pcre_mdata = pcre2_match_data_create_from_pattern(pcre_pattern, NULL); + ret = pcre2_match(pcre_pattern, (PCRE2_SPTR) plaintext, subject_len, + offset_start, 0, pcre_mdata, NULL); +@@ -350,12 +367,14 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func)) + if (ret==0) return_value = 0; + else if (ret == PCRE2_ERROR_NOMATCH) /* no match */; + else if (ret>0) { +- zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion, +- receptacle, named, want_offset_pair, 0, 0); ++ zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, ++ matched_portion, receptacle, named, want_offset_pair, use_dfa, 0); + return_value = 0; + } + else { +- zwarnnam(nam, "error in pcre2_match [%d]", ret); ++ PCRE2_UCHAR buffer[256]; ++ pcre2_get_error_message(ret, buffer, sizeof(buffer)); ++ zwarnnam(nam, "error in pcre matching for /%s/: %s", plaintext, buffer); + } + + if (pcre_mdata) +@@ -466,7 +485,7 @@ static struct conddef cotab[] = { + + static struct builtin bintab[] = { + BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL), +- BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL), ++ BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:bd", NULL), + BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL) + }; + +diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst +index 027fea3aa..585698d05 100644 +--- a/Test/V07pcre.ztst ++++ b/Test/V07pcre.ztst +@@ -196,3 +196,8 @@ + > [package]=name-12345 + > [version]=12345 + >) ++ ++ pcre_compile 'cat(er(pillar)?)?' ++ pcre_match -d 'the caterpillar catchment' && print $match ++0:pcre_match -d ++>caterpillar cater cat +-- +2.41.0 + diff --git a/zsh.spec b/zsh.spec index 1e031c0..ebb19d9 100644 --- a/zsh.spec +++ b/zsh.spec @@ -1,7 +1,7 @@ Summary: Powerful interactive shell Name: zsh Version: 5.9 -Release: 8%{?dist} +Release: 9%{?dist} License: MIT-Modern-Variant AND ISC AND GPL-2.0-only URL: http://zsh.sourceforge.net/ Source0: https://downloads.sourceforge.net/%{name}/%{name}-%{version}.tar.xz @@ -19,6 +19,10 @@ Patch1: 0001-zsh-5.9-do-not-use-egrep-in-tests.patch Patch2: 0002-zsh-Use-int-main-in-test-c-codes.patch # upstream commit a84fdd7c8f77935ecce99ff2b0bdba738821ed79 Patch3: 0003-zsh-fix-module-loading-problem-with-full-RELRO.patch +# upstream commit 1b421e4978440234fb73117c8505dad1ccc68d46 +Patch4: 0004-zsh-enable-PCRE-locale-switching.patch +# upstream commits b62e911341c8ec7446378b477c47da4256053dc0...b4d1c756f50909b4a13e5c8fe5f26f71e9d54f63 +Patch5: 0005-zsh-port-to-pcre2.patch BuildRequires: autoconf BuildRequires: coreutils @@ -29,7 +33,7 @@ BuildRequires: glibc-langpack-ja BuildRequires: libcap-devel BuildRequires: make BuildRequires: ncurses-devel -BuildRequires: pcre-devel +BuildRequires: pcre2-devel BuildRequires: sed BuildRequires: texi2html BuildRequires: texinfo @@ -164,6 +168,9 @@ fi %doc Doc/*.html %changelog +* Mon Aug 21 2023 Lukáš Zaoral - 5.9-9 +- port to PCRE 2 (rhbz#1938979) + * Sat Jul 22 2023 Fedora Release Engineering - 5.9-8 - Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild