1645 lines
58 KiB
Diff
1645 lines
58 KiB
Diff
commit 51c500269bf53749b107807d84271385fad35628
|
||
Author: Marek Polacek <polacek@redhat.com>
|
||
Date: Wed Oct 6 14:33:59 2021 -0400
|
||
|
||
libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026]
|
||
|
||
From a link below:
|
||
"An issue was discovered in the Bidirectional Algorithm in the Unicode
|
||
Specification through 14.0. It permits the visual reordering of
|
||
characters via control sequences, which can be used to craft source code
|
||
that renders different logic than the logical ordering of tokens
|
||
ingested by compilers and interpreters. Adversaries can leverage this to
|
||
encode source code for compilers accepting Unicode such that targeted
|
||
vulnerabilities are introduced invisibly to human reviewers."
|
||
|
||
More info:
|
||
https://nvd.nist.gov/vuln/detail/CVE-2021-42574
|
||
https://trojansource.codes/
|
||
|
||
This is not a compiler bug. However, to mitigate the problem, this patch
|
||
implements -Wbidi-chars=[none|unpaired|any] to warn about possibly
|
||
misleading Unicode bidirectional control characters the preprocessor may
|
||
encounter.
|
||
|
||
The default is =unpaired, which warns about improperly terminated
|
||
bidirectional control characters; e.g. a LRE without its corresponding PDF.
|
||
The level =any warns about any use of bidirectional control characters.
|
||
|
||
This patch handles both UCNs and UTF-8 characters. UCNs designating
|
||
bidi characters in identifiers are accepted since r204886. Then r217144
|
||
enabled -fextended-identifiers by default. Extended characters in C/C++
|
||
identifiers have been accepted since r275979. However, this patch still
|
||
warns about mixing UTF-8 and UCN bidi characters; there seems to be no
|
||
good reason to allow mixing them.
|
||
|
||
We warn in different contexts: comments (both C and C++-style), string
|
||
literals, character constants, and identifiers. Expectedly, UCNs are ignored
|
||
in comments and raw string literals. The bidirectional control characters
|
||
can nest so this patch handles that as well.
|
||
|
||
I have not included nor tested this at all with Fortran (which also has
|
||
string literals and line comments).
|
||
|
||
Dave M. posted patches improving diagnostic involving Unicode characters.
|
||
This patch does not make use of this new infrastructure yet.
|
||
|
||
PR preprocessor/103026
|
||
|
||
gcc/c-family/ChangeLog:
|
||
|
||
* c.opt (Wbidi-chars, Wbidi-chars=): New option.
|
||
|
||
gcc/ChangeLog:
|
||
|
||
* doc/invoke.texi: Document -Wbidi-chars.
|
||
|
||
libcpp/ChangeLog:
|
||
|
||
* include/cpplib.h (enum cpp_bidirectional_level): New.
|
||
(struct cpp_options): Add cpp_warn_bidirectional.
|
||
(enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
|
||
* internal.h (struct cpp_reader): Add warn_bidi_p member
|
||
function.
|
||
* init.c (cpp_create_reader): Set cpp_warn_bidirectional.
|
||
* lex.c (bidi): New namespace.
|
||
(get_bidi_utf8): New function.
|
||
(get_bidi_ucn): Likewise.
|
||
(maybe_warn_bidi_on_close): Likewise.
|
||
(maybe_warn_bidi_on_char): Likewise.
|
||
(_cpp_skip_block_comment): Implement warning about bidirectional
|
||
control characters.
|
||
(skip_line_comment): Likewise.
|
||
(forms_identifier_p): Likewise.
|
||
(lex_identifier): Likewise.
|
||
(lex_string): Likewise.
|
||
(lex_raw_string): Likewise.
|
||
|
||
gcc/testsuite/ChangeLog:
|
||
|
||
* c-c++-common/Wbidi-chars-1.c: New test.
|
||
* c-c++-common/Wbidi-chars-2.c: New test.
|
||
* c-c++-common/Wbidi-chars-3.c: New test.
|
||
* c-c++-common/Wbidi-chars-4.c: New test.
|
||
* c-c++-common/Wbidi-chars-5.c: New test.
|
||
* c-c++-common/Wbidi-chars-6.c: New test.
|
||
* c-c++-common/Wbidi-chars-7.c: New test.
|
||
* c-c++-common/Wbidi-chars-8.c: New test.
|
||
* c-c++-common/Wbidi-chars-9.c: New test.
|
||
* c-c++-common/Wbidi-chars-10.c: New test.
|
||
* c-c++-common/Wbidi-chars-11.c: New test.
|
||
* c-c++-common/Wbidi-chars-12.c: New test.
|
||
* c-c++-common/Wbidi-chars-13.c: New test.
|
||
* c-c++-common/Wbidi-chars-14.c: New test.
|
||
* c-c++-common/Wbidi-chars-15.c: New test.
|
||
* c-c++-common/Wbidi-chars-16.c: New test.
|
||
* c-c++-common/Wbidi-chars-17.c: New test.
|
||
|
||
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
|
||
index f591b39be5a..cf922812198 100644
|
||
--- a/gcc/c-family/c.opt
|
||
+++ b/gcc/c-family/c.opt
|
||
@@ -334,6 +334,30 @@ Wbad-function-cast
|
||
C ObjC Var(warn_bad_function_cast) Warning
|
||
Warn about casting functions to incompatible types.
|
||
|
||
+Wbidi-chars
|
||
+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
|
||
+;
|
||
+
|
||
+Wbidi-chars=
|
||
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
|
||
+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
|
||
+
|
||
+; Required for these enum values.
|
||
+SourceInclude
|
||
+cpplib.h
|
||
+
|
||
+Enum
|
||
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
|
||
+
|
||
+EnumValue
|
||
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
|
||
+
|
||
+EnumValue
|
||
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
|
||
+
|
||
+EnumValue
|
||
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
|
||
+
|
||
Wbool-compare
|
||
C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
|
||
Warn about boolean expression compared with an integer value different from true/false.
|
||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||
index 78ca7738df2..cc85c53aede 100644
|
||
--- a/gcc/doc/invoke.texi
|
||
+++ b/gcc/doc/invoke.texi
|
||
@@ -264,7 +264,8 @@ Objective-C and Objective-C++ Dialects}.
|
||
-Walloc-zero -Walloc-size-larger-than=@var{n}
|
||
-Walloca -Walloca-larger-than=@var{n} @gol
|
||
-Wno-aggressive-loop-optimizations -Warray-bounds -Warray-bounds=@var{n} @gol
|
||
--Wno-attributes -Wbool-compare -Wbool-operation @gol
|
||
+-Wno-attributes -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
|
||
+-Wbool-compare -Wbool-operation @gol
|
||
-Wno-builtin-declaration-mismatch @gol
|
||
-Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
|
||
-Wc++-compat -Wc++11-compat -Wc++14-compat @gol
|
||
@@ -5606,6 +5607,23 @@ Warn about declarations using the @code{alias} and similar attributes whose
|
||
target is incompatible with the type of the alias. @xref{Function Attributes,
|
||
,Declaring Attributes of Functions}.
|
||
|
||
+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
|
||
+@opindex Wbidi-chars=
|
||
+@opindex Wbidi-chars
|
||
+@opindex Wno-bidi-chars
|
||
+Warn about possibly misleading UTF-8 bidirectional control characters in
|
||
+comments, string literals, character constants, and identifiers. Such
|
||
+characters can change left-to-right writing direction into right-to-left
|
||
+(and vice versa), which can cause confusion between the logical order and
|
||
+visual order. This may be dangerous; for instance, it may seem that a piece
|
||
+of code is not commented out, whereas it in fact is.
|
||
+
|
||
+There are three levels of warning supported by GCC@. The default is
|
||
+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
|
||
+bidi contexts. @option{-Wbidi-chars=none} turns the warning off.
|
||
+@option{-Wbidi-chars=any} warns about any use of bidirectional control
|
||
+characters.
|
||
+
|
||
@item -Wbool-compare
|
||
@opindex Wno-bool-compare
|
||
@opindex Wbool-compare
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
|
||
new file mode 100644
|
||
index 00000000000..34f5ac19271
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
|
||
@@ -0,0 +1,12 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+
|
||
+int main() {
|
||
+ int isAdmin = 0;
|
||
+ /* } if (isAdmin) begin admins only */
|
||
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
|
||
+ __builtin_printf("You are an admin.\n");
|
||
+ /* end admins only { */
|
||
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
|
||
+ return 0;
|
||
+}
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
|
||
new file mode 100644
|
||
index 00000000000..3f851b69e65
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
|
||
@@ -0,0 +1,27 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* More nesting testing. */
|
||
+
|
||
+/* RLE LRI PDF PDI*/
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int LRE_\u202a_PDF_\u202c;
|
||
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
|
||
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
|
||
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
|
||
+int FSI_\u2068;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int FSI_\u2068_PDI_\u2069;
|
||
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
|
||
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
|
||
new file mode 100644
|
||
index 00000000000..44d044d82de
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
|
||
@@ -0,0 +1,9 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test that we warn when mixing UCN and UTF-8. */
|
||
+
|
||
+const char *s1 = "LRE__PDF_\u202c";
|
||
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
|
||
+const char *s2 = "LRE_\u202a_PDF_";
|
||
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
|
||
new file mode 100644
|
||
index 00000000000..b07eec1da91
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
|
||
@@ -0,0 +1,19 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile { target { c || c++11 } } } */
|
||
+/* { dg-options "-Wbidi-chars=any" } */
|
||
+/* Test raw strings. */
|
||
+
|
||
+const char *s1 = R"(a b c LRE 1 2 3 PDF x y z)";
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+const char *s2 = R"(a b c RLE 1 2 3 PDF x y z)";
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+const char *s3 = R"(a b c LRO 1 2 3 PDF x y z)";
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+const char *s4 = R"(a b c RLO 1 2 3 PDF x y z)";
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+const char *s7 = R"(a b c FSI 1 2 3 PDI x y) z";
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+const char *s8 = R"(a b c PDI x y )z";
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
+const char *s9 = R"(a b c PDF x y z)";
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
|
||
new file mode 100644
|
||
index 00000000000..b2dd9fde752
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
|
||
@@ -0,0 +1,17 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile { target { c || c++11 } } } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test raw strings. */
|
||
+
|
||
+const char *s1 = R"(a b c LRE 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s2 = R"(a b c RLE 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s3 = R"(a b c LRO 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s4 = R"(a b c FSI 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s5 = R"(a b c LRI 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s6 = R"(a b c RLI 1 2 3)";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
|
||
new file mode 100644
|
||
index 00000000000..ba5f75d9553
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
|
||
@@ -0,0 +1,38 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
|
||
+ or RLOs. */
|
||
+
|
||
+/* LRI__LRI__RLE__RLE__RLE__PDI_*/
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// LRI__RLE__RLE__RLE__PDI_
|
||
+// LRI__RLO__RLE__RLE__PDI_
|
||
+// LRI__RLO__RLE__PDI_
|
||
+// FSI__RLO__PDI_
|
||
+// FSI__FSI__RLO__PDI_
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
|
||
+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int PDI_\u2069;
|
||
+int LRI_\u2066_PDI_\u2069;
|
||
+int RLI_\u2067_PDI_\u2069;
|
||
+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
|
||
+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
|
||
+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLO_\u202e_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
|
||
+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
|
||
new file mode 100644
|
||
index 00000000000..a0ce8ff5e2c
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
|
||
@@ -0,0 +1,59 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test unpaired bidi control chars in multiline comments. */
|
||
+
|
||
+/*
|
||
+ * LRE end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * RLE end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * LRO end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * RLO end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * LRI end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * RLI end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/*
|
||
+ * FSI end
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* LRE
|
||
+ PDF */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* FSI
|
||
+ PDI */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+
|
||
+/* LRE<>
|
||
+ *
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
|
||
+
|
||
+/*
|
||
+ * LRE<>
|
||
+ */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+
|
||
+/*
|
||
+ *
|
||
+ * LRE<> */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* RLI<> */ /* PDI<> */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* LRE<> */ /* PDF<> */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
|
||
new file mode 100644
|
||
index 00000000000..baa0159861c
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
|
||
@@ -0,0 +1,26 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=any" } */
|
||
+/* Test LTR/RTL chars. */
|
||
+
|
||
+/* LTR<> */
|
||
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
|
||
+// LTR<>
|
||
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
|
||
+/* RTL<> */
|
||
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
|
||
+// RTL<>
|
||
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
|
||
+
|
||
+const char *s1 = "LTR<>";
|
||
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
|
||
+const char *s2 = "LTR\u200e";
|
||
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
|
||
+const char *s3 = "LTR\u200E";
|
||
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
|
||
+const char *s4 = "RTL<>";
|
||
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
|
||
+const char *s5 = "RTL\u200f";
|
||
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
|
||
+const char *s6 = "RTL\u200F";
|
||
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
|
||
new file mode 100644
|
||
index 00000000000..07cb4321f96
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
|
||
@@ -0,0 +1,30 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test LTR/RTL chars. */
|
||
+
|
||
+/* LTR<> */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// LTR<>
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* RTL<> */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// RTL<>
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int ltr_\u200e;
|
||
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
|
||
+int rtl_\u200f;
|
||
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
|
||
+
|
||
+const char *s1 = "LTR<>";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s2 = "LTR\u200e";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s3 = "LTR\u200E";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s4 = "RTL<>";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s5 = "RTL\u200f";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s6 = "RTL\u200F";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
|
||
new file mode 100644
|
||
index 00000000000..2340374f276
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
|
||
@@ -0,0 +1,9 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+
|
||
+int main() {
|
||
+ /* Say hello; newline/*/ return 0 ;
|
||
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
|
||
+ __builtin_printf("Hello world.\n");
|
||
+ return 0;
|
||
+}
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
|
||
new file mode 100644
|
||
index 00000000000..9dc7edb6e64
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
|
||
@@ -0,0 +1,11 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+
|
||
+int main() {
|
||
+ const char* access_level = "user";
|
||
+ if (__builtin_strcmp(access_level, "user // Check if admin ")) {
|
||
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
|
||
+ __builtin_printf("You are an admin.\n");
|
||
+ }
|
||
+ return 0;
|
||
+}
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
|
||
new file mode 100644
|
||
index 00000000000..49f856b9bfe
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
|
||
@@ -0,0 +1,172 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
|
||
+/* Test all bidi chars in various contexts (identifiers, comments,
|
||
+ string literals, character constants), both UCN and UTF-8. The bidi
|
||
+ chars here are properly terminated, except for the character constants. */
|
||
+
|
||
+/* a b c LRE 1 2 3 PDF x y z */
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLE 1 2 3 PDF x y z */
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRO 1 2 3 PDF x y z */
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLO 1 2 3 PDF x y z */
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRI 1 2 3 PDI x y z */
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLI 1 2 3 PDI x y */
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+/* a b c FSI 1 2 3 PDI x y z */
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Same but C++ comments instead. */
|
||
+// a b c LRE 1 2 3 PDF x y z
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+// a b c RLE 1 2 3 PDF x y z
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+// a b c LRO 1 2 3 PDF x y z
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+// a b c RLO 1 2 3 PDF x y z
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+// a b c LRI 1 2 3 PDI x y z
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+// a b c RLI 1 2 3 PDI x y
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+// a b c FSI 1 2 3 PDI x y z
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Here we're closing an unopened context, warn when =any. */
|
||
+/* a b c PDI x y z */
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
+/* a b c PDF x y z */
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+// a b c PDI x y z
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
+// a b c PDF x y z
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Multiline comments. */
|
||
+/* a b c PDI x y z
|
||
+ */
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
|
||
+/* a b c PDF x y z
|
||
+ */
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDI x y z
|
||
+ */
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDF x y z
|
||
+ */
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDI x y z */
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
+/* first
|
||
+ a b c PDF x y z */
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+
|
||
+void
|
||
+g1 ()
|
||
+{
|
||
+ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+ const char *s8 = "a b c PDI x y z";
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
+ const char *s9 = "a b c PDF x y z";
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+
|
||
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+}
|
||
+
|
||
+void
|
||
+g2 ()
|
||
+{
|
||
+ const char c1 = '\u202a';
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+ const char c2 = '\u202A';
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+ const char c3 = '\u202b';
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+ const char c4 = '\u202B';
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+ const char c5 = '\u202d';
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+ const char c6 = '\u202D';
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+ const char c7 = '\u202e';
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+ const char c8 = '\u202E';
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+ const char c9 = '\u2066';
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+ const char c10 = '\u2067';
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+ const char c11 = '\u2068';
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+}
|
||
+
|
||
+int A\u202cY;
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+int A\u202CY2;
|
||
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
|
||
+
|
||
+int d\u202ae\u202cf;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+int d\u202Ae\u202cf2;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+int d\u202be\u202cf;
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+int d\u202Be\u202cf2;
|
||
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
|
||
+int d\u202de\u202cf;
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+int d\u202De\u202cf2;
|
||
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
|
||
+int d\u202ee\u202cf;
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+int d\u202Ee\u202cf2;
|
||
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
|
||
+int d\u2066e\u2069f;
|
||
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
|
||
+int d\u2067e\u2069f;
|
||
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
|
||
+int d\u2068e\u2069f;
|
||
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
|
||
+int X\u2069;
|
||
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
|
||
new file mode 100644
|
||
index 00000000000..f5776806c79
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
|
||
@@ -0,0 +1,172 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
|
||
+/* Test all bidi chars in various contexts (identifiers, comments,
|
||
+ string literals, character constants), both UCN and UTF-8. The bidi
|
||
+ chars here are properly terminated, except for the character constants. */
|
||
+
|
||
+/* a b c LRE 1 2 3 PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLE 1 2 3 PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRO 1 2 3 PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLO 1 2 3 PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRI 1 2 3 PDI x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLI 1 2 3 PDI x y */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c FSI 1 2 3 PDI x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Same but C++ comments instead. */
|
||
+// a b c LRE 1 2 3 PDF x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLE 1 2 3 PDF x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c LRO 1 2 3 PDF x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLO 1 2 3 PDF x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c LRI 1 2 3 PDI x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLI 1 2 3 PDI x y
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c FSI 1 2 3 PDI x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Here we're closing an unopened context, warn when =any. */
|
||
+/* a b c PDI x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c PDI x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c PDF x y z
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* Multiline comments. */
|
||
+/* a b c PDI x y z
|
||
+ */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* a b c PDF x y z
|
||
+ */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDI x y z
|
||
+ */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDF x y z
|
||
+ */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
|
||
+/* first
|
||
+ a b c PDI x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* first
|
||
+ a b c PDF x y z */
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+void
|
||
+g1 ()
|
||
+{
|
||
+ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s8 = "a b c PDI x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s9 = "a b c PDF x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+}
|
||
+
|
||
+void
|
||
+g2 ()
|
||
+{
|
||
+ const char c1 = '\u202a';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c2 = '\u202A';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c3 = '\u202b';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c4 = '\u202B';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c5 = '\u202d';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c6 = '\u202D';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c7 = '\u202e';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c8 = '\u202E';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c9 = '\u2066';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c10 = '\u2067';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char c11 = '\u2068';
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+}
|
||
+
|
||
+int A\u202cY;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int A\u202CY2;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+int d\u202ae\u202cf;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202Ae\u202cf2;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202be\u202cf;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202Be\u202cf2;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202de\u202cf;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202De\u202cf2;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202ee\u202cf;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u202Ee\u202cf2;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u2066e\u2069f;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u2067e\u2069f;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int d\u2068e\u2069f;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
+int X\u2069;
|
||
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
|
||
new file mode 100644
|
||
index 00000000000..a65d6faf60e
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
|
||
@@ -0,0 +1,130 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test nesting of bidi chars in various contexts. */
|
||
+
|
||
+/* Terminated by the wrong char: */
|
||
+/* a b c LRE 1 2 3 PDI x y z */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLE 1 2 3 PDI x y z*/
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRO 1 2 3 PDI x y z */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLO 1 2 3 PDI x y z */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c LRI 1 2 3 PDF x y z */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c RLI 1 2 3 PDF x y z */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* a b c FSI 1 2 3 PDF x y z*/
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+/* LRE PDF */
|
||
+/* LRE LRE PDF PDF */
|
||
+/* PDF LRE PDF */
|
||
+/* LRE PDF LRE PDF */
|
||
+/* LRE LRE PDF */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* PDF LRE */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+// a b c LRE 1 2 3 PDI x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLE 1 2 3 PDI x y z*/
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c LRO 1 2 3 PDI x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLO 1 2 3 PDI x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c LRI 1 2 3 PDF x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c RLI 1 2 3 PDF x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// a b c FSI 1 2 3 PDF x y z
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+// LRE PDF
|
||
+// LRE LRE PDF PDF
|
||
+// PDF LRE PDF
|
||
+// LRE PDF LRE PDF
|
||
+// LRE LRE PDF
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+// PDF LRE
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+void
|
||
+g1 ()
|
||
+{
|
||
+ const char *s1 = "a b c LRE 1 2 3 PDI x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s3 = "a b c RLE 1 2 3 PDI x y ";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s5 = "a b c LRO 1 2 3 PDI x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s7 = "a b c RLO 1 2 3 PDI x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s9 = "a b c LRI 1 2 3 PDF x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s11 = "a b c RLI 1 2 3 PDF x y z\
|
||
+ ";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
|
||
+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s13 = "a b c FSI 1 2 3 PDF x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s15 = "PDF LRE";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s16 = "PDF\u202c LRE\u202a";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s17 = "LRE PDF";
|
||
+ const char *s18 = "LRE\u202a PDF\u202c";
|
||
+ const char *s19 = "LRE LRE PDF PDF";
|
||
+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
|
||
+ const char *s21 = "PDF LRE PDF";
|
||
+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
|
||
+ const char *s23 = "LRE LRE PDF";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s25 = "PDF LRE";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s26 = "PDF\u202c LRE\u202a";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s27 = "PDF LRE\u202a";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+ const char *s28 = "PDF\u202c LRE";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+}
|
||
+
|
||
+int A\u202aB\u2069C;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u202bB\u2069c;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u202db\u2069c2;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u202eb\u2069;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u2066b\u202c;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u2067b\u202c;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int a\u2068b\u202c;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int A\u202aB\u202c;
|
||
+int A\u202aA\u202aB\u202cB\u202c;
|
||
+int a_\u202C_\u202a_\u202c;
|
||
+int a_\u202a_\u202c_\u202a_\u202c_;
|
||
+int a_\u202a_\u202c_\u202a_;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
|
||
new file mode 100644
|
||
index 00000000000..d012d420ec0
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
|
||
@@ -0,0 +1,9 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=any" } */
|
||
+/* Test we ignore UCNs in comments. */
|
||
+
|
||
+// a b c \u202a 1 2 3
|
||
+// a b c \u202A 1 2 3
|
||
+/* a b c \u202a 1 2 3 */
|
||
+/* a b c \u202A 1 2 3 */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
|
||
new file mode 100644
|
||
index 00000000000..4f54c5092ec
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
|
||
@@ -0,0 +1,13 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=any" } */
|
||
+/* Test \u vs \U. */
|
||
+
|
||
+int a_\u202A;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+int a_\u202a_2;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+int a_\U0000202A_3;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
+int a_\U0000202a_4;
|
||
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
|
||
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
|
||
new file mode 100644
|
||
index 00000000000..e2af1b1ca97
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
|
||
@@ -0,0 +1,29 @@
|
||
+/* PR preprocessor/103026 */
|
||
+/* { dg-do compile } */
|
||
+/* { dg-options "-Wbidi-chars=unpaired" } */
|
||
+/* Test that we properly separate bidi contexts (comment/identifier/character
|
||
+ constant/string literal). */
|
||
+
|
||
+/* LRE -><- */ int pdf_\u202c_1;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* RLE -><- */ int pdf_\u202c_2;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* LRO -><- */ int pdf_\u202c_3;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* RLO -><- */ int pdf_\u202c_4;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* LRI -><-*/ int pdi_\u2069_1;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* RLI -><- */ int pdi_\u2069_12;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* FSI -><- */ int pdi_\u2069_3;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+
|
||
+const char *s1 = "LRE\u202a"; /* PDF -><- */
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+/* LRE -><- */ const char *s2 = "PDF\u202c";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
+int lre_\u202a; const char *s4 = "PDF\u202c";
|
||
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
|
||
index 3ad52d5e01e..e0dcb7f0529 100644
|
||
--- a/libcpp/include/cpplib.h
|
||
+++ b/libcpp/include/cpplib.h
|
||
@@ -305,6 +305,17 @@ enum cpp_normalize_level {
|
||
normalized_none
|
||
};
|
||
|
||
+/* The possible bidirectional control characters checking levels, from least
|
||
+ restrictive to most. */
|
||
+enum cpp_bidirectional_level {
|
||
+ /* No checking. */
|
||
+ bidirectional_none,
|
||
+ /* Only detect unpaired uses of bidirectional control characters. */
|
||
+ bidirectional_unpaired,
|
||
+ /* Detect any use of bidirectional control characters. */
|
||
+ bidirectional_any
|
||
+};
|
||
+
|
||
/* This structure is nested inside struct cpp_reader, and
|
||
carries all the options visible to the command line. */
|
||
struct cpp_options
|
||
@@ -506,6 +517,10 @@ struct cpp_options
|
||
/* True if warn about differences between C++98 and C++11. */
|
||
bool cpp_warn_cxx11_compat;
|
||
|
||
+ /* Nonzero if bidirectional control characters checking is on. See enum
|
||
+ cpp_bidirectional_level. */
|
||
+ unsigned char cpp_warn_bidirectional;
|
||
+
|
||
/* Dependency generation. */
|
||
struct
|
||
{
|
||
@@ -1063,7 +1078,8 @@ enum {
|
||
CPP_W_PEDANTIC,
|
||
CPP_W_C90_C99_COMPAT,
|
||
CPP_W_CXX11_COMPAT,
|
||
- CPP_W_EXPANSION_TO_DEFINED
|
||
+ CPP_W_EXPANSION_TO_DEFINED,
|
||
+ CPP_W_BIDIRECTIONAL
|
||
};
|
||
|
||
/* Output a diagnostic of some kind. */
|
||
diff --git a/libcpp/init.c b/libcpp/init.c
|
||
index ca3fbaa5c05..5c15da82ff8 100644
|
||
--- a/libcpp/init.c
|
||
+++ b/libcpp/init.c
|
||
@@ -208,6 +208,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
|
||
= ENABLE_CANONICAL_SYSTEM_HEADERS;
|
||
CPP_OPTION (pfile, ext_numeric_literals) = 1;
|
||
CPP_OPTION (pfile, warn_date_time) = 0;
|
||
+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
|
||
|
||
/* Default CPP arithmetic to something sensible for the host for the
|
||
benefit of dumb users like fix-header. */
|
||
diff --git a/libcpp/internal.h b/libcpp/internal.h
|
||
index 4f74f995cec..53b4c0f4af7 100644
|
||
--- a/libcpp/internal.h
|
||
+++ b/libcpp/internal.h
|
||
@@ -576,6 +576,13 @@ struct cpp_reader
|
||
/* If non-null, the lexer will use this location for the next token
|
||
instead of getting a location from the linemap. */
|
||
source_location *forced_token_location_p;
|
||
+
|
||
+ /* Returns true iff we should warn about UTF-8 bidirectional control
|
||
+ characters. */
|
||
+ bool warn_bidi_p () const
|
||
+ {
|
||
+ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
|
||
+ }
|
||
};
|
||
|
||
/* Character classes. Based on the more primitive macros in safe-ctype.h.
|
||
diff --git a/libcpp/lex.c b/libcpp/lex.c
|
||
index a408f912c5c..ea7f75e842e 100644
|
||
--- a/libcpp/lex.c
|
||
+++ b/libcpp/lex.c
|
||
@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
|
||
}
|
||
}
|
||
|
||
+namespace bidi {
|
||
+ enum kind {
|
||
+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
|
||
+ };
|
||
+
|
||
+ /* All the UTF-8 encodings of bidi characters start with E2. */
|
||
+ const uchar utf8_start = 0xe2;
|
||
+
|
||
+ /* A vector holding currently open bidi contexts. We use a char for
|
||
+ each context, its LSB is 1 if it represents a PDF context, 0 if it
|
||
+ represents a PDI context. The next bit is 1 if this context was open
|
||
+ by a bidi character written as a UCN, and 0 when it was UTF-8. */
|
||
+ semi_embedded_vec <unsigned char, 16> vec;
|
||
+
|
||
+ /* Close the whole comment/identifier/string literal/character constant
|
||
+ context. */
|
||
+ void on_close ()
|
||
+ {
|
||
+ vec.truncate (0);
|
||
+ }
|
||
+
|
||
+ /* Pop the last element in the vector. */
|
||
+ void pop ()
|
||
+ {
|
||
+ unsigned int len = vec.count ();
|
||
+ gcc_checking_assert (len > 0);
|
||
+ vec.truncate (len - 1);
|
||
+ }
|
||
+
|
||
+ /* Return the context of the Ith element. */
|
||
+ kind ctx_at (unsigned int i)
|
||
+ {
|
||
+ return (vec[i] & 1) ? PDF : PDI;
|
||
+ }
|
||
+
|
||
+ /* Return which context is currently opened. */
|
||
+ kind current_ctx ()
|
||
+ {
|
||
+ unsigned int len = vec.count ();
|
||
+ if (len == 0)
|
||
+ return NONE;
|
||
+ return ctx_at (len - 1);
|
||
+ }
|
||
+
|
||
+ /* Return true if the current context comes from a UCN origin, that is,
|
||
+ the bidi char which started this bidi context was written as a UCN. */
|
||
+ bool current_ctx_ucn_p ()
|
||
+ {
|
||
+ unsigned int len = vec.count ();
|
||
+ gcc_checking_assert (len > 0);
|
||
+ return (vec[len - 1] >> 1) & 1;
|
||
+ }
|
||
+
|
||
+ /* We've read a bidi char, update the current vector as necessary. */
|
||
+ void on_char (kind k, bool ucn_p)
|
||
+ {
|
||
+ switch (k)
|
||
+ {
|
||
+ case LRE:
|
||
+ case RLE:
|
||
+ case LRO:
|
||
+ case RLO:
|
||
+ vec.push (ucn_p ? 3u : 1u);
|
||
+ break;
|
||
+ case LRI:
|
||
+ case RLI:
|
||
+ case FSI:
|
||
+ vec.push (ucn_p ? 2u : 0u);
|
||
+ break;
|
||
+ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
|
||
+ whose scope has not yet been terminated. */
|
||
+ case PDF:
|
||
+ if (current_ctx () == PDF)
|
||
+ pop ();
|
||
+ break;
|
||
+ /* PDI terminates the scope of the last LRI, RLI, or FSI whose
|
||
+ scope has not yet been terminated, as well as the scopes of
|
||
+ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
|
||
+ yet been terminated. */
|
||
+ case PDI:
|
||
+ for (int i = vec.count () - 1; i >= 0; --i)
|
||
+ if (ctx_at (i) == PDI)
|
||
+ {
|
||
+ vec.truncate (i);
|
||
+ break;
|
||
+ }
|
||
+ break;
|
||
+ case LTR:
|
||
+ case RTL:
|
||
+ /* These aren't popped by a PDF/PDI. */
|
||
+ break;
|
||
+ [[likely]] case NONE:
|
||
+ break;
|
||
+ default:
|
||
+ abort ();
|
||
+ }
|
||
+ }
|
||
+
|
||
+ /* Return a descriptive string for K. */
|
||
+ const char *to_str (kind k)
|
||
+ {
|
||
+ switch (k)
|
||
+ {
|
||
+ case LRE:
|
||
+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
|
||
+ case RLE:
|
||
+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
|
||
+ case LRO:
|
||
+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
|
||
+ case RLO:
|
||
+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
|
||
+ case LRI:
|
||
+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
|
||
+ case RLI:
|
||
+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
|
||
+ case FSI:
|
||
+ return "U+2068 (FIRST STRONG ISOLATE)";
|
||
+ case PDF:
|
||
+ return "U+202C (POP DIRECTIONAL FORMATTING)";
|
||
+ case PDI:
|
||
+ return "U+2069 (POP DIRECTIONAL ISOLATE)";
|
||
+ case LTR:
|
||
+ return "U+200E (LEFT-TO-RIGHT MARK)";
|
||
+ case RTL:
|
||
+ return "U+200F (RIGHT-TO-LEFT MARK)";
|
||
+ default:
|
||
+ abort ();
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
|
||
+
|
||
+static bidi::kind
|
||
+get_bidi_utf8 (const unsigned char *const p)
|
||
+{
|
||
+ gcc_checking_assert (p[0] == bidi::utf8_start);
|
||
+
|
||
+ if (p[1] == 0x80)
|
||
+ switch (p[2])
|
||
+ {
|
||
+ case 0xaa:
|
||
+ return bidi::LRE;
|
||
+ case 0xab:
|
||
+ return bidi::RLE;
|
||
+ case 0xac:
|
||
+ return bidi::PDF;
|
||
+ case 0xad:
|
||
+ return bidi::LRO;
|
||
+ case 0xae:
|
||
+ return bidi::RLO;
|
||
+ case 0x8e:
|
||
+ return bidi::LTR;
|
||
+ case 0x8f:
|
||
+ return bidi::RTL;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+ else if (p[1] == 0x81)
|
||
+ switch (p[2])
|
||
+ {
|
||
+ case 0xa6:
|
||
+ return bidi::LRI;
|
||
+ case 0xa7:
|
||
+ return bidi::RLI;
|
||
+ case 0xa8:
|
||
+ return bidi::FSI;
|
||
+ case 0xa9:
|
||
+ return bidi::PDI;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return bidi::NONE;
|
||
+}
|
||
+
|
||
+/* Parse a UCN where P points just past \u or \U and return its bidi code. */
|
||
+
|
||
+static bidi::kind
|
||
+get_bidi_ucn (const unsigned char *p, bool is_U)
|
||
+{
|
||
+ /* 6.4.3 Universal Character Names
|
||
+ \u hex-quad
|
||
+ \U hex-quad hex-quad
|
||
+ where \unnnn means \U0000nnnn. */
|
||
+
|
||
+ if (is_U)
|
||
+ {
|
||
+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
|
||
+ return bidi::NONE;
|
||
+ /* Skip 4B so we can treat \u and \U the same below. */
|
||
+ p += 4;
|
||
+ }
|
||
+
|
||
+ /* All code points we are looking for start with 20xx. */
|
||
+ if (p[0] != '2' || p[1] != '0')
|
||
+ return bidi::NONE;
|
||
+ else if (p[2] == '2')
|
||
+ switch (p[3])
|
||
+ {
|
||
+ case 'a':
|
||
+ case 'A':
|
||
+ return bidi::LRE;
|
||
+ case 'b':
|
||
+ case 'B':
|
||
+ return bidi::RLE;
|
||
+ case 'c':
|
||
+ case 'C':
|
||
+ return bidi::PDF;
|
||
+ case 'd':
|
||
+ case 'D':
|
||
+ return bidi::LRO;
|
||
+ case 'e':
|
||
+ case 'E':
|
||
+ return bidi::RLO;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+ else if (p[2] == '6')
|
||
+ switch (p[3])
|
||
+ {
|
||
+ case '6':
|
||
+ return bidi::LRI;
|
||
+ case '7':
|
||
+ return bidi::RLI;
|
||
+ case '8':
|
||
+ return bidi::FSI;
|
||
+ case '9':
|
||
+ return bidi::PDI;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+ else if (p[2] == '0')
|
||
+ switch (p[3])
|
||
+ {
|
||
+ case 'e':
|
||
+ case 'E':
|
||
+ return bidi::LTR;
|
||
+ case 'f':
|
||
+ case 'F':
|
||
+ return bidi::RTL;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return bidi::NONE;
|
||
+}
|
||
+
|
||
+/* We're closing a bidi context, that is, we've encountered a newline,
|
||
+ are closing a C-style comment, or are at the end of a string literal,
|
||
+ character constant, or identifier. Warn if this context was not
|
||
+ properly terminated by a PDI or PDF. P points to the last character
|
||
+ in this context. */
|
||
+
|
||
+static void
|
||
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
|
||
+{
|
||
+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
|
||
+ && bidi::vec.count () > 0)
|
||
+ {
|
||
+ const source_location loc
|
||
+ = linemap_position_for_column (pfile->line_table,
|
||
+ CPP_BUF_COLUMN (pfile->buffer, p));
|
||
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
|
||
+ "unpaired UTF-8 bidirectional control character "
|
||
+ "detected");
|
||
+ }
|
||
+ /* We're done with this context. */
|
||
+ bidi::on_close ();
|
||
+}
|
||
+
|
||
+/* We're at the beginning or in the middle of an identifier/comment/string
|
||
+ literal/character constant. Warn if we've encountered a bidi character.
|
||
+ KIND says which bidi character it was; P points to it in the character
|
||
+ stream. UCN_P is true iff this bidi character was written as a UCN. */
|
||
+
|
||
+static void
|
||
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
|
||
+ bool ucn_p)
|
||
+{
|
||
+ if (__builtin_expect (kind == bidi::NONE, 1))
|
||
+ return;
|
||
+
|
||
+ const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
|
||
+
|
||
+ if (warn_bidi != bidirectional_none)
|
||
+ {
|
||
+ const source_location loc
|
||
+ = linemap_position_for_column (pfile->line_table,
|
||
+ CPP_BUF_COLUMN (pfile->buffer, p));
|
||
+ /* It seems excessive to warn about a PDI/PDF that is closing
|
||
+ an opened context because we've already warned about the
|
||
+ opening character. Except warn when we have a UCN x UTF-8
|
||
+ mismatch. */
|
||
+ if (kind == bidi::current_ctx ())
|
||
+ {
|
||
+ if (warn_bidi == bidirectional_unpaired
|
||
+ && bidi::current_ctx_ucn_p () != ucn_p)
|
||
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
|
||
+ "UTF-8 vs UCN mismatch when closing "
|
||
+ "a context by \"%s\"", bidi::to_str (kind));
|
||
+ }
|
||
+ else if (warn_bidi == bidirectional_any)
|
||
+ {
|
||
+ if (kind == bidi::PDF || kind == bidi::PDI)
|
||
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
|
||
+ "\"%s\" is closing an unopened context",
|
||
+ bidi::to_str (kind));
|
||
+ else
|
||
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
|
||
+ "found problematic Unicode character \"%s\"",
|
||
+ bidi::to_str (kind));
|
||
+ }
|
||
+ }
|
||
+ /* We're done with this context. */
|
||
+ bidi::on_char (kind, ucn_p);
|
||
+}
|
||
+
|
||
/* Skip a C-style block comment. We find the end of the comment by
|
||
seeing if an asterisk is before every '/' we encounter. Returns
|
||
nonzero if comment terminated by EOF, zero otherwise.
|
||
@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||
cpp_buffer *buffer = pfile->buffer;
|
||
const uchar *cur = buffer->cur;
|
||
uchar c;
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
|
||
cur++;
|
||
if (*cur == '/')
|
||
@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||
if (c == '/')
|
||
{
|
||
if (cur[-2] == '*')
|
||
- break;
|
||
+ {
|
||
+ if (warn_bidi_p)
|
||
+ maybe_warn_bidi_on_close (pfile, cur);
|
||
+ break;
|
||
+ }
|
||
|
||
/* Warn about potential nested comments, but not if the '/'
|
||
comes immediately before the true comment delimiter.
|
||
@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||
{
|
||
unsigned int cols;
|
||
buffer->cur = cur - 1;
|
||
+ if (warn_bidi_p)
|
||
+ maybe_warn_bidi_on_close (pfile, cur);
|
||
_cpp_process_line_notes (pfile, true);
|
||
if (buffer->next_line >= buffer->rlimit)
|
||
return true;
|
||
@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
|
||
|
||
cur = buffer->cur;
|
||
}
|
||
+ /* If this is a beginning of a UTF-8 encoding, it might be
|
||
+ a bidirectional control character. */
|
||
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
|
||
+ {
|
||
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
|
||
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
|
||
+ }
|
||
}
|
||
|
||
buffer->cur = cur;
|
||
@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
|
||
{
|
||
cpp_buffer *buffer = pfile->buffer;
|
||
source_location orig_line = pfile->line_table->highest_line;
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
|
||
- while (*buffer->cur != '\n')
|
||
- buffer->cur++;
|
||
+ if (!warn_bidi_p)
|
||
+ while (*buffer->cur != '\n')
|
||
+ buffer->cur++;
|
||
+ else
|
||
+ {
|
||
+ while (*buffer->cur != '\n'
|
||
+ && *buffer->cur != bidi::utf8_start)
|
||
+ buffer->cur++;
|
||
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
|
||
+ {
|
||
+ while (*buffer->cur != '\n')
|
||
+ {
|
||
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
|
||
+ {
|
||
+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
|
||
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
|
||
+ /*ucn_p=*/false);
|
||
+ }
|
||
+ buffer->cur++;
|
||
+ }
|
||
+ maybe_warn_bidi_on_close (pfile, buffer->cur);
|
||
+ }
|
||
+ }
|
||
|
||
_cpp_process_line_notes (pfile, true);
|
||
return orig_line != pfile->line_table->highest_line;
|
||
@@ -1315,11 +1669,13 @@ warn_about_normalization (cpp_reader *pfile,
|
||
|
||
/* Returns TRUE if the sequence starting at buffer->cur is invalid in
|
||
an identifier. FIRST is TRUE if this starts an identifier. */
|
||
+
|
||
static bool
|
||
forms_identifier_p (cpp_reader *pfile, int first,
|
||
struct normalize_state *state)
|
||
{
|
||
cpp_buffer *buffer = pfile->buffer;
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
|
||
if (*buffer->cur == '$')
|
||
{
|
||
@@ -1343,6 +1699,12 @@ forms_identifier_p (cpp_reader *pfile, int first,
|
||
{
|
||
cppchar_t s;
|
||
buffer->cur += 2;
|
||
+ if (warn_bidi_p)
|
||
+ {
|
||
+ bidi::kind kind = get_bidi_ucn (buffer->cur,
|
||
+ buffer->cur[-1] == 'U');
|
||
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, /*ucn_p=*/true);
|
||
+ }
|
||
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
|
||
state, &s, NULL, NULL))
|
||
return true;
|
||
@@ -1450,6 +1812,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
|
||
const uchar *cur;
|
||
unsigned int len;
|
||
unsigned int hash = HT_HASHSTEP (0, *base);
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
|
||
cur = pfile->buffer->cur;
|
||
if (! starts_ucn)
|
||
@@ -1472,6 +1835,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
|
||
pfile->buffer->cur++;
|
||
}
|
||
} while (forms_identifier_p (pfile, false, nst));
|
||
+ if (warn_bidi_p)
|
||
+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
|
||
result = _cpp_interpret_identifier (pfile, base,
|
||
pfile->buffer->cur - base);
|
||
*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
|
||
@@ -1673,6 +2038,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
|
||
_cpp_buff *first_buff = NULL, *last_buff = NULL;
|
||
size_t raw_prefix_start;
|
||
_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
|
||
type = (*base == 'L' ? CPP_WSTRING :
|
||
*base == 'U' ? CPP_STRING32 :
|
||
@@ -1909,8 +2275,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
|
||
cur = base = pfile->buffer->cur;
|
||
note = &pfile->buffer->notes[pfile->buffer->cur_note];
|
||
}
|
||
+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
|
||
+ && warn_bidi_p)
|
||
+ maybe_warn_bidi_on_char (pfile, cur - 1, get_bidi_utf8 (cur - 1),
|
||
+ /*ucn_p=*/false);
|
||
}
|
||
|
||
+ if (warn_bidi_p)
|
||
+ maybe_warn_bidi_on_close (pfile, cur);
|
||
+
|
||
if (CPP_OPTION (pfile, user_literals))
|
||
{
|
||
/* If a string format macro, say from inttypes.h, is placed touching
|
||
@@ -2005,15 +2378,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||
else
|
||
terminator = '>', type = CPP_HEADER_NAME;
|
||
|
||
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
|
||
for (;;)
|
||
{
|
||
cppchar_t c = *cur++;
|
||
|
||
/* In #include-style directives, terminators are not escapable. */
|
||
if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
|
||
- cur++;
|
||
+ {
|
||
+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
|
||
+ {
|
||
+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
|
||
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
|
||
+ }
|
||
+ cur++;
|
||
+ }
|
||
else if (c == terminator)
|
||
- break;
|
||
+ {
|
||
+ if (warn_bidi_p)
|
||
+ maybe_warn_bidi_on_close (pfile, cur - 1);
|
||
+ break;
|
||
+ }
|
||
else if (c == '\n')
|
||
{
|
||
cur--;
|
||
@@ -2030,6 +2415,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
|
||
}
|
||
else if (c == '\0')
|
||
saw_NUL = true;
|
||
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
|
||
+ {
|
||
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
|
||
+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
|
||
+ }
|
||
}
|
||
|
||
if (saw_NUL && !pfile->state.skipping)
|