Make File::Glob more resistant against degenerative matching
This commit is contained in:
parent
1ad8b2225f
commit
034407d342
@ -0,0 +1,258 @@
|
|||||||
|
From 0db967b2e6a4093a6a5f649190159767e5d005e0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yves Orton <demerphq@gmail.com>
|
||||||
|
Date: Tue, 25 Apr 2017 15:17:06 +0200
|
||||||
|
Subject: [PATCH] [perl #131211] fixup File::Glob degenerate matching
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
The old code would go quadratic with recursion and backtracking
|
||||||
|
when doing patterns like "a*a*a*a*a*a*a*x" on a file like
|
||||||
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".
|
||||||
|
|
||||||
|
This patch changes the code to not recurse, and to not backtrack,
|
||||||
|
as per this article from Russ Cox: https://research.swtch.com/glob
|
||||||
|
|
||||||
|
It also adds a micro-optimisation for M_ONE and M_SET under the new code.
|
||||||
|
|
||||||
|
Thanks to Avar and Russ Cox for helping with this patch, along with
|
||||||
|
Jilles Tjoelker and the rest of the FreeBSD community.
|
||||||
|
|
||||||
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||||
|
---
|
||||||
|
MANIFEST | 1 +
|
||||||
|
ext/File-Glob/bsd_glob.c | 64 +++++++++++++++++++++++--------
|
||||||
|
ext/File-Glob/t/rt131211.t | 94 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
3 files changed, 144 insertions(+), 15 deletions(-)
|
||||||
|
create mode 100644 ext/File-Glob/t/rt131211.t
|
||||||
|
|
||||||
|
diff --git a/MANIFEST b/MANIFEST
|
||||||
|
index b7b6e74..af0da6c 100644
|
||||||
|
--- a/MANIFEST
|
||||||
|
+++ b/MANIFEST
|
||||||
|
@@ -3948,6 +3948,7 @@ ext/File-Glob/t/basic.t See if File::Glob works
|
||||||
|
ext/File-Glob/t/case.t See if File::Glob works
|
||||||
|
ext/File-Glob/t/global.t See if File::Glob works
|
||||||
|
ext/File-Glob/t/rt114984.t See if File::Glob works
|
||||||
|
+ext/File-Glob/t/rt131211.t See if File::Glob works
|
||||||
|
ext/File-Glob/t/taint.t See if File::Glob works
|
||||||
|
ext/File-Glob/t/threads.t See if File::Glob + threads works
|
||||||
|
ext/File-Glob/TODO File::Glob extension todo list
|
||||||
|
diff --git a/ext/File-Glob/bsd_glob.c b/ext/File-Glob/bsd_glob.c
|
||||||
|
index 821ef20..e96fb73 100644
|
||||||
|
--- a/ext/File-Glob/bsd_glob.c
|
||||||
|
+++ b/ext/File-Glob/bsd_glob.c
|
||||||
|
@@ -563,8 +563,12 @@ glob0(const Char *pattern, glob_t *pglob)
|
||||||
|
break;
|
||||||
|
case BG_STAR:
|
||||||
|
pglob->gl_flags |= GLOB_MAGCHAR;
|
||||||
|
- /* collapse adjacent stars to one,
|
||||||
|
- * to avoid exponential behavior
|
||||||
|
+ /* Collapse adjacent stars to one.
|
||||||
|
+ * This is required to ensure that a pattern like
|
||||||
|
+ * "a**" matches a name like "a", as without this
|
||||||
|
+ * check when the first star matched everything it would
|
||||||
|
+ * cause the second star to return a match fail.
|
||||||
|
+ * As long ** is folded here this does not happen.
|
||||||
|
*/
|
||||||
|
if (bufnext == patbuf || bufnext[-1] != M_ALL)
|
||||||
|
*bufnext++ = M_ALL;
|
||||||
|
@@ -909,35 +913,56 @@ globextend(const Char *path, glob_t *pglob, size_t *limitp)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * pattern matching function for filenames. Each occurrence of the *
|
||||||
|
- * pattern causes a recursion level.
|
||||||
|
+ * pattern matching function for filenames using state machine to avoid
|
||||||
|
+ * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
|
||||||
|
+ * without additional callframes, and to do cleanly prune the backtracking
|
||||||
|
+ * state when multiple '*' (start) matches are included in the patter.
|
||||||
|
+ *
|
||||||
|
+ * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
|
||||||
|
+ * matching on failure.
|
||||||
|
+ *
|
||||||
|
+ * https://research.swtch.com/glob
|
||||||
|
+ *
|
||||||
|
+ * An example would be a pattern
|
||||||
|
+ * ("a*" x 100) . "y"
|
||||||
|
+ * against a file name like
|
||||||
|
+ * ("a" x 100) . "x"
|
||||||
|
+ *
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
match(Char *name, Char *pat, Char *patend, int nocase)
|
||||||
|
{
|
||||||
|
int ok, negate_range;
|
||||||
|
Char c, k;
|
||||||
|
+ Char *nextp = NULL;
|
||||||
|
+ Char *nextn = NULL;
|
||||||
|
|
||||||
|
+ loop:
|
||||||
|
while (pat < patend) {
|
||||||
|
c = *pat++;
|
||||||
|
switch (c & M_MASK) {
|
||||||
|
case M_ALL:
|
||||||
|
if (pat == patend)
|
||||||
|
return(1);
|
||||||
|
- do
|
||||||
|
- if (match(name, pat, patend, nocase))
|
||||||
|
- return(1);
|
||||||
|
- while (*name++ != BG_EOS)
|
||||||
|
- ;
|
||||||
|
- return(0);
|
||||||
|
+ if (*name == BG_EOS)
|
||||||
|
+ return 0;
|
||||||
|
+ nextn = name + 1;
|
||||||
|
+ nextp = pat - 1;
|
||||||
|
+ break;
|
||||||
|
case M_ONE:
|
||||||
|
+ /* since * matches leftmost-shortest first *
|
||||||
|
+ * if we encounter the EOS then backtracking *
|
||||||
|
+ * will not help, so we can exit early here. */
|
||||||
|
if (*name++ == BG_EOS)
|
||||||
|
- return(0);
|
||||||
|
+ return 0;
|
||||||
|
break;
|
||||||
|
case M_SET:
|
||||||
|
ok = 0;
|
||||||
|
+ /* since * matches leftmost-shortest first *
|
||||||
|
+ * if we encounter the EOS then backtracking *
|
||||||
|
+ * will not help, so we can exit early here. */
|
||||||
|
if ((k = *name++) == BG_EOS)
|
||||||
|
- return(0);
|
||||||
|
+ return 0;
|
||||||
|
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
|
||||||
|
++pat;
|
||||||
|
while (((c = *pat++) & M_MASK) != M_END)
|
||||||
|
@@ -953,16 +978,25 @@ match(Char *name, Char *pat, Char *patend, int nocase)
|
||||||
|
} else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
|
||||||
|
ok = 1;
|
||||||
|
if (ok == negate_range)
|
||||||
|
- return(0);
|
||||||
|
+ goto fail;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
k = *name++;
|
||||||
|
if (nocase ? (tolower(k) != tolower(c)) : (k != c))
|
||||||
|
- return(0);
|
||||||
|
+ goto fail;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- return(*name == BG_EOS);
|
||||||
|
+ if (*name == BG_EOS)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ fail:
|
||||||
|
+ if (nextn) {
|
||||||
|
+ pat = nextp;
|
||||||
|
+ name = nextn;
|
||||||
|
+ goto loop;
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Free allocated data belonging to a glob_t structure. */
|
||||||
|
diff --git a/ext/File-Glob/t/rt131211.t b/ext/File-Glob/t/rt131211.t
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..c1bcbe0
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/ext/File-Glob/t/rt131211.t
|
||||||
|
@@ -0,0 +1,94 @@
|
||||||
|
+use strict;
|
||||||
|
+use warnings;
|
||||||
|
+use v5.16.0;
|
||||||
|
+use File::Temp 'tempdir';
|
||||||
|
+use File::Spec::Functions;
|
||||||
|
+use Test::More;
|
||||||
|
+use Time::HiRes qw(time);
|
||||||
|
+
|
||||||
|
+plan tests => 13;
|
||||||
|
+
|
||||||
|
+my $path = tempdir uc cleanup => 1;
|
||||||
|
+my @files= (
|
||||||
|
+ "x".("a" x 50)."b", # 0
|
||||||
|
+ "abbbbbbbbbbbbc", # 1
|
||||||
|
+ "abbbbbbbbbbbbd", # 2
|
||||||
|
+ "aaabaaaabaaaabc", # 3
|
||||||
|
+ "pq", # 4
|
||||||
|
+ "r", # 5
|
||||||
|
+ "rttiiiiiii", # 6
|
||||||
|
+ "wewewewewewe", # 7
|
||||||
|
+ "weeeweeeweee", # 8
|
||||||
|
+ "weewweewweew", # 9
|
||||||
|
+ "wewewewewewewewewewewewewewewewewq", # 10
|
||||||
|
+ "wtttttttetttttttwr", # 11
|
||||||
|
+);
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+foreach (@files) {
|
||||||
|
+ open(my $f, ">", catfile $path, $_);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+my $elapsed_fail= 0;
|
||||||
|
+my $elapsed_match= 0;
|
||||||
|
+my @got_files;
|
||||||
|
+my @no_files;
|
||||||
|
+my $count = 0;
|
||||||
|
+
|
||||||
|
+while (++$count < 10) {
|
||||||
|
+ $elapsed_match -= time;
|
||||||
|
+ @got_files= glob catfile $path, "x".("a*" x $count) . "b";
|
||||||
|
+ $elapsed_match += time;
|
||||||
|
+
|
||||||
|
+ $elapsed_fail -= time;
|
||||||
|
+ @no_files= glob catfile $path, "x".("a*" x $count) . "c";
|
||||||
|
+ $elapsed_fail += time;
|
||||||
|
+ last if $elapsed_fail > $elapsed_match * 100;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+is $count,10,
|
||||||
|
+ "tried all the patterns without bailing out";
|
||||||
|
+
|
||||||
|
+cmp_ok $elapsed_fail/$elapsed_match,"<",2,
|
||||||
|
+ "time to fail less than twice the time to match";
|
||||||
|
+is "@got_files", catfile($path, $files[0]),
|
||||||
|
+ "only got the expected file for xa*..b";
|
||||||
|
+is "@no_files", "", "shouldnt have files for xa*..c";
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+@got_files= glob catfile $path, "a*b*b*b*bc";
|
||||||
|
+is "@got_files", catfile($path, $files[1]),
|
||||||
|
+ "only got the expected file for a*b*b*b*bc";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "a*b*b*bc";
|
||||||
|
+is "@got_files", catfile($path, $files[3])." ".catfile($path,$files[1]),
|
||||||
|
+ "got the expected two files for a*b*b*bc";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "p*";
|
||||||
|
+is "@got_files", catfile($path, $files[4]),
|
||||||
|
+ "p* matches pq";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "r*???????";
|
||||||
|
+is "@got_files", catfile($path, $files[6]),
|
||||||
|
+ "r*??????? works as expected";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "w*e*w??e";
|
||||||
|
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8)),
|
||||||
|
+ "w*e*w??e works as expected";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "w*e*we??";
|
||||||
|
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
|
||||||
|
+ "w*e*we?? works as expected";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "w**e**w";
|
||||||
|
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (9)),
|
||||||
|
+ "w**e**w works as expected";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "*wee*";
|
||||||
|
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (8,9)),
|
||||||
|
+ "*wee* works as expected";
|
||||||
|
+
|
||||||
|
+@got_files= sort glob catfile $path, "we*";
|
||||||
|
+is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8,9,10)),
|
||||||
|
+ "we* works as expected";
|
||||||
|
+
|
||||||
|
--
|
||||||
|
2.9.4
|
||||||
|
|
11
perl.spec
11
perl.spec
@ -37,7 +37,7 @@
|
|||||||
Name: perl
|
Name: perl
|
||||||
Version: %{perl_version}
|
Version: %{perl_version}
|
||||||
# release number must be even higher, because dual-lived modules will be broken otherwise
|
# release number must be even higher, because dual-lived modules will be broken otherwise
|
||||||
Release: 393%{?dist}
|
Release: 394%{?dist}
|
||||||
Epoch: %{perl_epoch}
|
Epoch: %{perl_epoch}
|
||||||
Summary: Practical Extraction and Report Language
|
Summary: Practical Extraction and Report Language
|
||||||
Group: Development/Languages
|
Group: Development/Languages
|
||||||
@ -137,6 +137,10 @@ Patch26: perl-5.18.2-Destroy-GDBM-NDBM-ODBM-SDBM-_File-objects-only-from-
|
|||||||
# This allows not to require perl-devel. Bug #1129443
|
# This allows not to require perl-devel. Bug #1129443
|
||||||
Patch30: perl-5.22.1-Replace-EU-MM-dependnecy-with-EU-MM-Utils-in-IPC-Cmd.patch
|
Patch30: perl-5.22.1-Replace-EU-MM-dependnecy-with-EU-MM-Utils-in-IPC-Cmd.patch
|
||||||
|
|
||||||
|
# Make File::Glob more resistant against degenerative matching, RT#131211,
|
||||||
|
# in upstream after 5.27.0
|
||||||
|
Patch31: perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch
|
||||||
|
|
||||||
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
||||||
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
||||||
|
|
||||||
@ -2785,6 +2789,7 @@ Perl extension for Version Objects
|
|||||||
%patch22 -p1
|
%patch22 -p1
|
||||||
%patch26 -p1
|
%patch26 -p1
|
||||||
%patch30 -p1
|
%patch30 -p1
|
||||||
|
%patch31 -p1
|
||||||
%patch200 -p1
|
%patch200 -p1
|
||||||
%patch201 -p1
|
%patch201 -p1
|
||||||
|
|
||||||
@ -2805,6 +2810,7 @@ perl -x patchlevel.h \
|
|||||||
'Fedora Patch26: Make *DBM_File desctructors thread-safe (RT#61912)' \
|
'Fedora Patch26: Make *DBM_File desctructors thread-safe (RT#61912)' \
|
||||||
'Fedora Patch27: Make PadlistNAMES() lvalue again (CPAN RT#101063)' \
|
'Fedora Patch27: Make PadlistNAMES() lvalue again (CPAN RT#101063)' \
|
||||||
'Fedora Patch30: Replace EU::MakeMaker dependency with EU::MM::Utils in IPC::Cmd (bug #1129443)' \
|
'Fedora Patch30: Replace EU::MakeMaker dependency with EU::MM::Utils in IPC::Cmd (bug #1129443)' \
|
||||||
|
'Fedora Patch31: Make File::Glob more resistant against degenerative matching (RT#131211)' \
|
||||||
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
||||||
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
||||||
%{nil}
|
%{nil}
|
||||||
@ -5087,6 +5093,9 @@ popd
|
|||||||
|
|
||||||
# Old changelog entries are preserved in CVS.
|
# Old changelog entries are preserved in CVS.
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Jun 16 2017 Petr Pisar <ppisar@redhat.com> - 4:5.26.0-394
|
||||||
|
- Make File::Glob more resistant against degenerative matching (RT#131211)
|
||||||
|
|
||||||
* Tue Jun 06 2017 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.26.0-393
|
* Tue Jun 06 2017 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.26.0-393
|
||||||
- Stop providing old perl(MODULE_COMPAT_5.24.*)
|
- Stop providing old perl(MODULE_COMPAT_5.24.*)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user