perl/perl-5.26.2-RC1-Parse-caret-vars-with-subscripts-the-same-as-normal-.patch

From 07ebe9c4fb1028d17e61caabe8c15abd0cd48983 Mon Sep 17 00:00:00 2001
From: Yves Orton <demerphq@gmail.com>
Date: Thu, 29 Jun 2017 11:31:14 +0200
Subject: [PATCH] Parse caret vars with subscripts the same as normal vars
 inside of ${..} escaping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This behavior is discussed in perl #131664, which complains that
"${^CAPTURE}[0]" does not work as expected. Abigail explains the
behavior is by design and Eirik Berg Hanssen expands on that explanation
pointing out that what /should/ work, "${^CAPTURE[0]}" does not,
which Sawyer then ruled was a bug.

So this patch makes "${^CAPTURE[0]}" (and "${^CAPTURE [0]}" [hi
abigial]) work the same as they would if the var was called @foo.

Petr Písař: Ported to 5.26.2-RC1.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 t/base/lex.t | 28 +++++++++++++++++++++++++++-
 toke.c       | 46 +++++++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/t/base/lex.t b/t/base/lex.t
index 99fd3bb..ae17bbd 100644
--- a/t/base/lex.t
+++ b/t/base/lex.t
@@ -1,6 +1,6 @@
 #!./perl
 
-print "1..112\n";
+print "1..119\n";
 
 $x = 'x';
 
@@ -154,6 +154,32 @@ my $test = 31;
   print "not " unless index ($@, 'Can\'t use global $^XYZ in "my"') > -1;
   print "ok $test\n"; $test++;
 #  print "($@)\n" if $@;
+#
+  ${^TEST}= "splat";
+  @{^TEST}= ("foo", "bar");
+  %{^TEST}= ("foo" => "FOO", "bar" => "BAR" );
+
+  print "not " if "${^TEST}" ne "splat";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${^TEST}[0]" ne "splat[0]";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${^TEST[0]}" ne "foo";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${ ^TEST [1] }" ne "bar";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${^TEST}{foo}" ne "splat{foo}";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${^TEST{foo}}" ne "FOO";
+  print "ok $test\n"; $test++;
+
+  print "not " if "${ ^TEST {bar} }" ne "BAR";
+  print "ok $test\n"; $test++;
+
 
 # Now let's make sure that caret variables are all forced into the main package.
   package Someother;
diff --git a/toke.c b/toke.c
index ee9c464..aff785b 100644
--- a/toke.c
+++ b/toke.c
@@ -9416,19 +9416,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
         bool skip;
         char *s2;
         /* If we were processing {...} notation then...  */
-        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
-            /* if it starts as a valid identifier, assume that it is one.
-               (the later check for } being at the expected point will trap
-               cases where this doesn't pan out.)  */
-            d += is_utf8 ? UTF8SKIP(d) : 1;
-            parse_ident(&s, &d, e, 1, is_utf8, TRUE);
-	    *d = '\0';
+        if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
+            || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+                 && isWORDCHAR(*s))
+        ) {
+            /* note we have to check for a normal identifier first,
+             * as it handles utf8 symbols, and only after that has
+             * been ruled out can we look at the caret words */
+            if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
+                /* if it starts as a valid identifier, assume that it is one.
+                   (the later check for } being at the expected point will trap
+                   cases where this doesn't pan out.)  */
+                d += is_utf8 ? UTF8SKIP(d) : 1;
+                parse_ident(&s, &d, e, 1, is_utf8, TRUE);
+                *d = '\0';
+            }
+            else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
+                d++;
+                while (isWORDCHAR(*s) && d < e) {
+                    *d++ = *s++;
+                }
+                if (d >= e)
+                    Perl_croak(aTHX_ "%s", ident_too_long);
+                *d = '\0';
+            }
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
                 s = skipspace(s);
             }
 	    if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
-                /* ${foo[0]} and ${foo{bar}} notation.  */
+                /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
 		if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
 		    const char * const brack =
 			(const char *)
@@ -9447,19 +9464,6 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
 		return s;
 	    }
 	}
-	/* Handle extended ${^Foo} variables
-	 * 1999-02-27 mjd-perl-patch@plover.com */
-	else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
-		 && isWORDCHAR(*s))
-	{
-	    d++;
-	    while (isWORDCHAR(*s) && d < e) {
-		*d++ = *s++;
-	    }
-	    if (d >= e)
-		Perl_croak(aTHX_ "%s", ident_too_long);
-	    *d = '\0';
-	}
 
         if ( !tmp_copline )
             tmp_copline = CopLINE(PL_curcop);
-- 
2.14.3
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`From 07ebe9c4fb1028d17e61caabe8c15abd0cd48983 Mon Sep 17 00:00:00 2001`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`From: Yves Orton <demerphq@gmail.com>`
			`Date: Thu, 29 Jun 2017 11:31:14 +0200`
			`Subject: [PATCH] Parse caret vars with subscripts the same as normal vars`
			`inside of ${..} escaping`
			`MIME-Version: 1.0`
			`Content-Type: text/plain; charset=UTF-8`
			`Content-Transfer-Encoding: 8bit`

			`This behavior is discussed in perl #131664, which complains that`
			`"${^CAPTURE}[0]" does not work as expected. Abigail explains the`
			`behavior is by design and Eirik Berg Hanssen expands on that explanation`
			`pointing out that what /should/ work, "${^CAPTURE[0]}" does not,`
			`which Sawyer then ruled was a bug.`

			`So this patch makes "${^CAPTURE[0]}" (and "${^CAPTURE [0]}" [hi`
			`abigial]) work the same as they would if the var was called @foo.`

5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`Petr Písař: Ported to 5.26.2-RC1.`

Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`Signed-off-by: Petr Písař <ppisar@redhat.com>`
			`---`
			`t/base/lex.t \| 28 +++++++++++++++++++++++++++-`
			`toke.c \| 46 +++++++++++++++++++++++++---------------------`
			`2 files changed, 52 insertions(+), 22 deletions(-)`

			`diff --git a/t/base/lex.t b/t/base/lex.t`
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`index 99fd3bb..ae17bbd 100644`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`--- a/t/base/lex.t`
			`+++ b/t/base/lex.t`
			`@@ -1,6 +1,6 @@`
			`#!./perl`

5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`-print "1..112\n";`
			`+print "1..119\n";`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00
			`$x = 'x';`

			`@@ -154,6 +154,32 @@ my $test = 31;`
			`print "not " unless index ($@, 'Can\'t use global $^XYZ in "my"') > -1;`
			`print "ok $test\n"; $test++;`
			`# print "($@)\n" if $@;`
			`+#`
			`+ ${^TEST}= "splat";`
			`+ @{^TEST}= ("foo", "bar");`
			`+ %{^TEST}= ("foo" => "FOO", "bar" => "BAR" );`
			`+`
			`+ print "not " if "${^TEST}" ne "splat";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${^TEST}[0]" ne "splat[0]";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${^TEST[0]}" ne "foo";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${ ^TEST [1] }" ne "bar";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${^TEST}{foo}" ne "splat{foo}";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${^TEST{foo}}" ne "FOO";`
			`+ print "ok $test\n"; $test++;`
			`+`
			`+ print "not " if "${ ^TEST {bar} }" ne "BAR";`
			`+ print "ok $test\n"; $test++;`
			`+`

			`# Now let's make sure that caret variables are all forced into the main package.`
			`package Someother;`
			`diff --git a/toke.c b/toke.c`
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`index ee9c464..aff785b 100644`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`--- a/toke.c`
			`+++ b/toke.c`
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`@@ -9416,19 +9416,36 @@ S_scan_ident(pTHX_ char s, char dest, STRLEN destlen, I32 ck_uni)`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`bool skip;`
			`char *s2;`
			`/* If we were processing {...} notation then... */`
			`- if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {`
			`- /* if it starts as a valid identifier, assume that it is one.`
			`- (the later check for } being at the expected point will trap`
			`- cases where this doesn't pan out.) */`
			`- d += is_utf8 ? UTF8SKIP(d) : 1;`
			`- parse_ident(&s, &d, e, 1, is_utf8, TRUE);`
			`- *d = '\0';`
			`+ if (isIDFIRST_lazy_if_safe(d, e, is_utf8)`
			`+ \|\| (!isPRINT(d) / isCNTRL(d), plus all non-ASCII */`
			`+ && isWORDCHAR(*s))`
			`+ ) {`
			`+ /* note we have to check for a normal identifier first,`
			`+ * as it handles utf8 symbols, and only after that has`
			`+ * been ruled out can we look at the caret words */`
			`+ if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {`
			`+ /* if it starts as a valid identifier, assume that it is one.`
			`+ (the later check for } being at the expected point will trap`
			`+ cases where this doesn't pan out.) */`
			`+ d += is_utf8 ? UTF8SKIP(d) : 1;`
			`+ parse_ident(&s, &d, e, 1, is_utf8, TRUE);`
			`+ *d = '\0';`
			`+ }`
			`+ else { /* caret word: ${^Foo} ${^CAPTURE[0]} */`
			`+ d++;`
			`+ while (isWORDCHAR(*s) && d < e) {`
			`+ d++ = s++;`
			`+ }`
			`+ if (d >= e)`
			`+ Perl_croak(aTHX_ "%s", ident_too_long);`
			`+ *d = '\0';`
			`+ }`
			`tmp_copline = CopLINE(PL_curcop);`
			`if (s < PL_bufend && isSPACE(*s)) {`
			`s = skipspace(s);`
			`}`
			`if ((s == '[' \|\| (s == '{' && strNE(dest, "sub")))) {`
			`- /* ${foo[0]} and ${foo{bar}} notation. */`
			`+ /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */`
			`if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {`
			`const char * const brack =`
			`(const char *)`
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`@@ -9447,19 +9464,6 @@ S_scan_ident(pTHX_ char s, char dest, STRLEN destlen, I32 ck_uni)`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00			`return s;`
			`}`
			`}`
			`- /* Handle extended ${^Foo} variables`
			`- * 1999-02-27 mjd-perl-patch@plover.com */`
			`- else if (! isPRINT(d) / isCNTRL(d), plus all non-ASCII */`
			`- && isWORDCHAR(*s))`
			`- {`
			`- d++;`
			`- while (isWORDCHAR(*s) && d < e) {`
			`- d++ = s++;`
			`- }`
			`- if (d >= e)`
			`- Perl_croak(aTHX_ "%s", ident_too_long);`
			`- *d = '\0';`
			`- }`

			`if ( !tmp_copline )`
			`tmp_copline = CopLINE(PL_curcop);`
			`--`
5.26.2-RC1 bump 2018-03-26 10:14:05 +00:00			`2.14.3`
Parse caret variables with subscripts as normal variables inside ${...} escaping 2017-08-08 13:56:03 +00:00