Port pcre dependency to pcre2

resolves: rhbz#2128320
This commit is contained in:
alakatos 2023-06-08 08:53:53 +02:00
parent a12998ad96
commit c32581d3c9
2 changed files with 180 additions and 3 deletions

View File

@ -0,0 +1,168 @@
diff --git a/configure.ac b/configure.ac
index b6f92f1..752b7d9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -80,7 +80,7 @@ AC_ARG_ENABLE(regexp,
)
AM_CONDITIONAL(ENABLE_REGEXP, test x$enable_regexp = xyes)
if test "$enable_regexp" = "yes"; then
- PKG_CHECK_MODULES(PCRE, libpcre)
+ PKG_CHECK_MODULES(PCRE, [libpcre2-8 >= 10.00])
AC_DEFINE(FEATURE_REGEXP, 1, [Regular expressions support enabled.])
FEATURE_REGEXP=1
else
@@ -194,5 +194,4 @@ echo "Testbench enabled: $enable_testbench"
echo "Valgrind enabled: $enable_valgrind"
echo "Debug mode enabled: $enable_debug"
echo "Tools enabled: $enable_tools"
-echo "Docs enabled: $enable_docs"
-
+echo "Docs enabled: $enable_docs"
\ No newline at end of file
diff --git a/src/parser.c b/src/parser.c
index 2d70424..dcd5b4e 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -42,12 +42,6 @@
#include "samp.h"
#include "helpers.h"
-#ifdef FEATURE_REGEXP
-#include <pcre.h>
-#include <errno.h>
-#endif
-
-
/* how should output values be formatted? */
enum FMT_MODE {
FMT_AS_STRING = 0,
diff --git a/src/v1_parser.c b/src/v1_parser.c
index 323ada0..9fb3ccb 100644
--- a/src/v1_parser.c
+++ b/src/v1_parser.c
@@ -39,7 +39,8 @@
#include "v1_samp.h"
#ifdef FEATURE_REGEXP
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#include <errno.h>
#endif
@@ -1266,7 +1267,7 @@ void* tokenized_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
* significantly slower than other field-types.
*/
struct regex_parser_data_s {
- pcre *re;
+ pcre2_code *re;
int consume_group;
int return_group;
int max_groups;
@@ -1276,17 +1277,33 @@ PARSER(Regex)
assert(str != NULL);
assert(offs != NULL);
assert(parsed != NULL);
- unsigned int* ovector = NULL;
+ PCRE2_SIZE *ovector;
+ pcre2_match_data *match_data = NULL;
struct regex_parser_data_s *pData = (struct regex_parser_data_s*) node->parser_data;
if (pData != NULL) {
- ovector = calloc(pData->max_groups, sizeof(unsigned int) * 3);
- if (ovector == NULL) FAIL(LN_NOMEM);
+ match_data = pcre2_match_data_create_from_pattern(pData->re, NULL);
+ if (match_data == NULL) FAIL(LN_NOMEM);
+
+ int result = pcre2_match(
+ pData->re, /* the compiled pattern */
+ (PCRE2_SPTR)str, /* the subject string */
+ (PCRE2_SIZE)strLen, /* the length of the subject */
+ (PCRE2_SIZE)*offs, /* start at offset 0 in the subject */
+ 0, /* default options */
+ match_data, /* block for storing the result */
+ NULL); /* use default match context */
- int result = pcre_exec(pData->re, NULL, str, strLen, *offs, 0, (int*) ovector, pData->max_groups * 3);
if (result == 0) result = pData->max_groups;
if (result > pData->consume_group) {
- /*please check 'man 3 pcreapi' for cryptic '2 * n' and '2 * n + 1' magic*/
+ ovector = pcre2_get_ovector_pointer(match_data);
+ printf("Match succeeded at offset %d\n", (int)ovector[0]);
+
+ /* please check 'man 3 pcre2api' for cryptic '2 * n' and '2 * n + 1' magic
+ * in a nutshell, within the ovector, the first in each pair of values is set to the
+ * offset of the first code unit of a substring, and the second is set to the
+ * offset of the first code unit after the end of a substring.
+ */
if (ovector[2 * pData->consume_group] == *offs) {
*parsed = ovector[2 * pData->consume_group + 1] - ovector[2 * pData->consume_group];
if (pData->consume_group != pData->return_group) {
@@ -1294,22 +1311,20 @@ PARSER(Regex)
if((val = strndup(str + ovector[2 * pData->return_group],
ovector[2 * pData->return_group + 1] -
ovector[2 * pData->return_group])) == NULL) {
- free(ovector);
FAIL(LN_NOMEM);
}
*value = json_object_new_string(val);
free(val);
if (*value == NULL) {
- free(ovector);
FAIL(LN_NOMEM);
}
}
}
}
- free(ovector);
}
r = 0; /* success */
done:
+ pcre2_match_data_free(match_data);
return r;
}
@@ -1346,8 +1361,8 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
char* name = NULL;
struct regex_parser_data_s *pData = NULL;
const char *unescaped_exp = NULL;
- const char *error = NULL;
- int erroffset = 0;
+ PCRE2_SIZE erroffset = 0;
+ int errcode = 0;
CHKN(name = es_str2cstr(node->name, NULL));
@@ -1365,7 +1380,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
if ((grp_parse_err = regex_parser_configure_consume_and_return_group(args, pData)) != NULL)
FAIL(LN_BADCONFIG);
- CHKN(pData->re = pcre_compile(exp, 0, &error, &erroffset, NULL));
+ CHKN(pData->re = pcre2_compile((PCRE2_SPTR)exp, PCRE2_ZERO_TERMINATED, 0, &errcode, &erroffset, NULL));
pData->max_groups = ((pData->consume_group > pData->return_group) ? pData->consume_group :
pData->return_group) + 1;
@@ -1387,9 +1402,12 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
ln_dbgprintf(ctx, "couldn't allocate memory for regex-string for field: '%s'", name);
else if (grp_parse_err != NULL)
ln_dbgprintf(ctx, "%s for: '%s'", grp_parse_err, name);
- else if (pData->re == NULL)
+ else if (pData->re == NULL) {
+ PCRE2_UCHAR errbuffer[256];
+ pcre2_get_error_message(errcode, errbuffer, sizeof(errbuffer));
ln_dbgprintf(ctx, "couldn't compile regex(encountered error '%s' at char '%d' in pattern) "
- "for regex-matched field: '%s'", error, erroffset, name);
+ "for regex-matched field: '%s'", errbuffer, (int)erroffset, name);
+ }
regex_parser_data_destructor((void**)&pData);
}
if (exp != NULL) free(exp);
@@ -1401,7 +1419,7 @@ void* regex_parser_data_constructor(ln_fieldList_t *node, ln_ctx ctx) {
void regex_parser_data_destructor(void** dataPtr) {
if ((*dataPtr) != NULL) {
struct regex_parser_data_s *pData = (struct regex_parser_data_s*) *dataPtr;
- if (pData->re != NULL) pcre_free(pData->re);
+ if (pData->re != NULL) pcre2_code_free(pData->re);
free(pData);
*dataPtr = NULL;
}

View File

@ -2,7 +2,7 @@
Name: liblognorm
Version: 2.0.6
Release: 8%{?dist}
Release: 9%{?dist}
Summary: Fast samples-based log normalization library
License: LGPL-2.1-or-later AND Apache-2.0
URL: http://www.liblognorm.com
@ -12,10 +12,14 @@ BuildRequires: gcc
BuildRequires: chrpath
BuildRequires: libfastjson-devel
BuildRequires: libestr-devel
BuildRequires: pcre-devel
BuildRequires: pcre2-devel
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: libtool
Patch0: liblognorm-2.0.6-rhbz2105934-sphinx5.patch
Patch1: liblognorm-configure-glitch.patch
Patch2: liblognorm-2.0.6-rhbz2128320.patch
%description
Briefly described, liblognorm is a tool to normalize log data.
@ -59,11 +63,12 @@ log files.
%patch -P 0 -p1 -b .sphinx5
%patch -P 1 -p1 -b .configure-glitch
%patch -P 2 -p1 -b .pcre2
%build
# Prevent rebuild of the configure script.
touch configure aclocal.m4 Makefile.in config.h.in
autoreconf --verbose --force --install
%configure --enable-regexp --enable-docs --docdir=%{htmldir} --includedir=%{_includedir}/%{name}/
@ -97,6 +102,10 @@ rm %{buildroot}%{htmldir}/{objects.inv,.buildinfo}
%changelog
* Thu Jun 08 2023 Attila Lakatos <alakatos@redhat.com> - 2.0.6-9
- Port pcre dependency to pcre2
resolves: rhbz#2128320
* Wed May 31 2023 Attila Lakatos <alakatos@redhat.com> - 2.0.6-8
- Update License tag for SPDX
- Apache 2.0 was missing according to upstream sources