gcc-toolset-10-gcc/SOURCES/gcc10-add-Wbidirectional.patch
2021-11-10 08:33:22 +00:00

1444 lines
52 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From a241a9b727f03afe62a161a2662a0f1192fd523a Mon Sep 17 00:00:00 2001
From: Marek Polacek <polacek@redhat.com>
Date: Wed, 6 Oct 2021 14:33:59 -0400
Subject: [PATCH] cpp: Implement -Wbidirectional=[none|unpaired|any]
This patch implements -Wbidirectional=[none|unpaired|any] to warn about
possibly dangerous bidirectional characters.
gcc/c-family/ChangeLog:
* c.opt (Wbidirectional, Wbidirectional=): New option.
gcc/ChangeLog:
* doc/invoke.texi: Document -Wbidirectional.
libcpp/ChangeLog:
* include/cpplib.h (enum cpp_bidirectional_level): New.
(struct cpp_options): Add cpp_warn_bidirectional.
(enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
* init.c (cpp_create_reader): Set cpp_warn_bidirectional.
* lex.c (bidi): New namespace.
(get_bidi_utf8): New function.
(get_bidi_ucn): Likewise.
(maybe_warn_bidi_on_close): Likewise.
(maybe_warn_bidi_on_char): Likewise.
(_cpp_skip_block_comment): Implement warning about bidirectional
characters.
(skip_line_comment): Likewise.
(forms_identifier_p): Likewise.
(lex_identifier): Likewise.
(lex_string): Likewise.
(lex_raw_string): Likewise.
gcc/testsuite/ChangeLog:
* c-c++-common/Wbidirectional-1.c: New test.
* c-c++-common/Wbidirectional-2.c: New test.
* c-c++-common/Wbidirectional-3.c: New test.
* c-c++-common/Wbidirectional-4.c: New test.
* c-c++-common/Wbidirectional-5.c: New test.
* c-c++-common/Wbidirectional-6.c: New test.
* c-c++-common/Wbidirectional-7.c: New test.
* c-c++-common/Wbidirectional-8.c: New test.
* c-c++-common/Wbidirectional-9.c: New test.
* c-c++-common/Wbidirectional-10.c: New test.
* c-c++-common/Wbidirectional-11.c: New test.
* c-c++-common/Wbidirectional-12.c: New test.
* c-c++-common/Wbidirectional-13.c: New test.
---
gcc/c-family/c.opt | 24 ++
gcc/doc/invoke.texi | 19 +-
gcc/testsuite/c-c++-common/Wbidirectional-1.c | 11 +
.../c-c++-common/Wbidirectional-10.c | 27 ++
.../c-c++-common/Wbidirectional-11.c | 12 +
.../c-c++-common/Wbidirectional-12.c | 18 +
.../c-c++-common/Wbidirectional-13.c | 16 +
gcc/testsuite/c-c++-common/Wbidirectional-2.c | 8 +
gcc/testsuite/c-c++-common/Wbidirectional-3.c | 10 +
gcc/testsuite/c-c++-common/Wbidirectional-4.c | 165 ++++++++
gcc/testsuite/c-c++-common/Wbidirectional-5.c | 165 ++++++++
gcc/testsuite/c-c++-common/Wbidirectional-6.c | 154 +++++++
gcc/testsuite/c-c++-common/Wbidirectional-7.c | 8 +
gcc/testsuite/c-c++-common/Wbidirectional-8.c | 12 +
gcc/testsuite/c-c++-common/Wbidirectional-9.c | 28 ++
libcpp/include/cpplib.h | 18 +-
libcpp/init.c | 1 +
libcpp/lex.c | 391 +++++++++++++++++-
18 files changed, 1072 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-1.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-10.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-11.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-12.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-13.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-2.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-3.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-4.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-5.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-6.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-7.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-8.c
create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-9.c
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 06457ac739e..09391824676 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -374,6 +374,30 @@ Wbad-function-cast
C ObjC Var(warn_bad_function_cast) Warning
Warn about casting functions to incompatible types.
+Wbidirectional
+C ObjC C++ ObjC++ Warning Alias(Wbidirectional=,any,none)
+;
+
+Wbidirectional=
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
+-Wbidirectional=[none|unpaired|any] Warn about UTF-8 bidirectional characters.
+
+; Required for these enum values.
+SourceInclude
+cpplib.h
+
+Enum
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidirectional%> not recognized)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
+
Wbool-compare
C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
Warn about boolean expression compared with an integer value different from true/false.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b64ec18ae46..e32858ce767 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -304,7 +304,9 @@ Objective-C and Objective-C++ Dialects}.
-Warith-conversion @gol
-Warray-bounds -Warray-bounds=@var{n} @gol
-Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
--Wno-attribute-warning -Wbool-compare -Wbool-operation @gol
+-Wno-attribute-warning @gol
+-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
+-Wbool-compare -Wbool-operation @gol
-Wno-builtin-declaration-mismatch @gol
-Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
-Wc11-c2x-compat @gol
@@ -6758,6 +6760,21 @@ Attributes considered include @code{allo
This is the default. You can disable these warnings with either
@option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
+@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]}
+@opindex Wbidirectional=
+@opindex Wbidirectional
+@opindex Wno-bidirectional
+Warn about UTF-8 bidirectional characters. Such characters can change
+left-to-right writing direction into right-to-left (and vice versa),
+which can cause confusion between the logical order and visual order.
+This may be dangerous; for instance, it may seem that a piece of code
+is not commented out, whereas it in fact is.
+
+There are three levels of warning supported by GCC@. The default is
+@option{-Wbidirectional=unpaired}, which warns about improperly terminated
+bidi contexts. @option{-Wbidirectional=none} turns the warning off.
+@option{-Wbidirectional=any} warns about any use of bidirectional characters.
+
@item -Wbool-compare
@opindex Wno-bool-compare
@opindex Wbool-compare
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-1.c b/gcc/testsuite/c-c++-common/Wbidirectional-1.c
new file mode 100644
index 00000000000..750de81fdd8
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+int main() {
+ int isAdmin = 0;
+ /* } if (isAdmin) begin admins only */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("You are an admin.\n");
+ /* end admins only { */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-10.c b/gcc/testsuite/c-c++-common/Wbidirectional-10.c
new file mode 100644
index 00000000000..cd4abeeefbd
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-10.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired" } */
+/* More nesting testing. */
+
+/* RLE LRI PDF PDI*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int LRE_\u202a_PDF_\u202c;
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int FSI_\u2068;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int FSI_\u2068_PDI_\u2069;
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-11.c b/gcc/testsuite/c-c++-common/Wbidirectional-11.c
new file mode 100644
index 00000000000..43d699acc64
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-11.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired" } */
+/* Test that we warn when mixing UCN and UTF-8. */
+
+int LRE__PDF_\u202c;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+int LRE_\u202a_PDF__;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+const char *s1 = "LRE__PDF_\u202c";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+const char *s2 = "LRE_\u202a_PDF_";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-12.c b/gcc/testsuite/c-c++-common/Wbidirectional-12.c
new file mode 100644
index 00000000000..20d1566401a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-12.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target { c || c++11 } } } */
+/* { dg-options "-Wbidirectional=any" } */
+/* Test raw strings. */
+
+const char *s1 = R"(a b c LRE 1 2 3 PDF x y z)";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+const char *s2 = R"(a b c RLE 1 2 3 PDF x y z)";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+const char *s3 = R"(a b c LRO 1 2 3 PDF x y z)";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+const char *s4 = R"(a b c RLO 1 2 3 PDF x y z)";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+const char *s7 = R"(a b c FSI 1 2 3 PDI x y) z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+const char *s8 = R"(a b c PDI x y )z";
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+const char *s9 = R"(a b c PDF x y z)";
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-13.c b/gcc/testsuite/c-c++-common/Wbidirectional-13.c
new file mode 100644
index 00000000000..08010e3b37b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { c || c++11 } } } */
+/* { dg-options "-Wbidirectional=unpaired" } */
+/* Test raw strings. */
+
+const char *s1 = R"(a b c LRE 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s2 = R"(a b c RLE 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s3 = R"(a b c LRO 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s4 = R"(a b c FSI 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s5 = R"(a b c LRI 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s6 = R"(a b c RLI 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-2.c b/gcc/testsuite/c-c++-common/Wbidirectional-2.c
new file mode 100644
index 00000000000..4e04202e058
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+
+int main() {
+ /* Say hello; newline/*/ return 0 ;
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("Hello world.\n");
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-3.c b/gcc/testsuite/c-c++-common/Wbidirectional-3.c
new file mode 100644
index 00000000000..921300e94e0
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+int main() {
+ const char* access_level = "user";
+ if (__builtin_strcmp(access_level, "user // Check if admin ")) {
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("You are an admin.\n");
+ }
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-4.c b/gcc/testsuite/c-c++-common/Wbidirectional-4.c
new file mode 100644
index 00000000000..e6638aecc6a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-4.c
@@ -0,0 +1,165 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=any -Wno-multichar -Wno-overflow" } */
+/* Test all bidi chars in various contexts (identifiers, comments,
+ string literals, character constants), both UCN and UTF-8. The bidi
+ chars here are properly terminated, except for the character constants. */
+
+/* a b c LRE 1 2 3 PDF x y z */
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+/* a b c RLE 1 2 3 PDF x y z */
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+/* a b c LRO 1 2 3 PDF x y z */
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+/* a b c RLO 1 2 3 PDF x y z */
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+/* a b c LRI 1 2 3 PDI x y z */
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+/* a b c RLI 1 2 3 PDI x y */
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+/* a b c FSI 1 2 3 PDI x y z */
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+
+/* Same but C++ comments instead. */
+// a b c LRE 1 2 3 PDF x y z
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+// a b c RLE 1 2 3 PDF x y z
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+// a b c LRO 1 2 3 PDF x y z
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+// a b c RLO 1 2 3 PDF x y z
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+// a b c LRI 1 2 3 PDI x y z
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+// a b c RLI 1 2 3 PDI x y
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+// a b c FSI 1 2 3 PDI x y z
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+
+/* Here we're closing an unopened context, warn when =any. */
+/* a b c PDI x y z */
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+/* a b c PDF x y z */
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+// a b c PDI x y z
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+// a b c PDF x y z
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c PDI x y z";
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c PDF x y z";
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+}
+
+void
+g2 ()
+{
+ const char c1 = '\u202a';
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char c2 = '\u202A';
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char c3 = '\u202b';
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char c4 = '\u202B';
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char c5 = '\u202d';
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char c6 = '\u202D';
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char c7 = '\u202e';
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char c8 = '\u202E';
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char c9 = '\u2066';
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char c10 = '\u2067';
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char c11 = '\u2068';
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+}
+
+int abc;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+int AX;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+int A\u202cY;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+int A\u202CY2;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+int d\u202ae\u202cf;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int d\u202Ae\u202cf2;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int d\u202be\u202cf;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int d\u202Be\u202cf2;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int d\u202de\u202cf;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int d\u202De\u202cf2;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int d\u202ee\u202cf;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int d\u202Ee\u202cf2;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int d\u2066e\u2069f;
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+int d\u2067e\u2069f;
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+int d\u2068e\u2069f;
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+int X\u2069;
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-5.c b/gcc/testsuite/c-c++-common/Wbidirectional-5.c
new file mode 100644
index 00000000000..45d3402c941
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-5.c
@@ -0,0 +1,165 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired -Wno-multichar -Wno-overflow" } */
+/* Test all bidi chars in various contexts (identifiers, comments,
+ string literals, character constants), both UCN and UTF-8. The bidi
+ chars here are properly terminated, except for the character constants. */
+
+/* a b c LRE 1 2 3 PDF x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLE 1 2 3 PDF x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRO 1 2 3 PDF x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLO 1 2 3 PDF x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRI 1 2 3 PDI x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLI 1 2 3 PDI x y */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c FSI 1 2 3 PDI x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+/* Same but C++ comments instead. */
+// a b c LRE 1 2 3 PDF x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLE 1 2 3 PDF x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRO 1 2 3 PDF x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLO 1 2 3 PDF x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRI 1 2 3 PDI x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLI 1 2 3 PDI x y
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c FSI 1 2 3 PDI x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+/* Here we're closing an unopened context, warn when =any. */
+/* a b c PDI x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c PDF x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c PDI x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c PDF x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE 1 2 3 PDF x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c RLE 1 2 3 PDF x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c LRO 1 2 3 PDF x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLO 1 2 3 PDF x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRI 1 2 3 PDI x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c RLI 1 2 3 PDI x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c FSI 1 2 3 PDI x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c PDI x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c PDF x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+}
+
+void
+g2 ()
+{
+ const char c1 = '\u202a';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c2 = '\u202A';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c3 = '\u202b';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c4 = '\u202B';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c5 = '\u202d';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c6 = '\u202D';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c7 = '\u202e';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c8 = '\u202E';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c9 = '\u2066';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c10 = '\u2067';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c11 = '\u2068';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int abc;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int AX;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int A\u202cY;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int A\u202CY2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+int d\u202ae\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Ae\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202be\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Be\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202de\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202De\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202ee\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Ee\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2066e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2067e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2068e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int X\u2069;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-6.c b/gcc/testsuite/c-c++-common/Wbidirectional-6.c
new file mode 100644
index 00000000000..1be017f828d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-6.c
@@ -0,0 +1,154 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired" } */
+/* Test nesting of bidi chars in various contexts. */
+
+/* Terminated by the wrong char: */
+/* a b c LRE 1 2 3 PDI x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLE 1 2 3 PDI x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRO 1 2 3 PDI x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLO 1 2 3 PDI x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRI 1 2 3 PDF x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLI 1 2 3 PDF x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c FSI 1 2 3 PDF x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+/* LRE PDF */
+/* LRE LRE PDF PDF */
+/* PDF LRE PDF */
+/* LRE PDF LRE PDF */
+/* LRE LRE PDF */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* PDF LRE */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+// a b c LRE 1 2 3 PDI x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLE 1 2 3 PDI x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRO 1 2 3 PDI x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLO 1 2 3 PDI x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRI 1 2 3 PDF x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLI 1 2 3 PDF x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c FSI 1 2 3 PDF x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+// LRE PDF
+// LRE LRE PDF PDF
+// PDF LRE PDF
+// LRE PDF LRE PDF
+// LRE LRE PDF
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// PDF LRE
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE 1 2 3 PDI x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c RLE 1 2 3 PDI x y ";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRO 1 2 3 PDI x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c RLO 1 2 3 PDI x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c LRI 1 2 3 PDF x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c RLI 1 2 3 PDF x y z\
+ ";
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c FSI 1 2 3 PDF x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s15 = "PDF LRE";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s16 = "PDF\u202c LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s17 = "LRE PDF";
+ const char *s18 = "LRE\u202a PDF\u202c";
+ const char *s19 = "LRE LRE PDF PDF";
+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
+ const char *s21 = "PDF LRE PDF";
+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
+ const char *s23 = "LRE LRE PDF";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s25 = "PDF LRE";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s26 = "PDF\u202c LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s27 = "PDF LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s28 = "PDF\u202c LRE";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int aLREbPDI;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int A\u202aB\u2069C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLEbPDI;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202bB\u2069c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLRObPDI;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202db\u2069c2;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLObPDI;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202eb\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLRIbPDF;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u2066b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLIbPDFc
+;
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+int a\u2067b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSIbPDF;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u2068b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSIbPD\u202C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSI\u2068bPDF_;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLREbPDFb;
+int A\u202aB\u202c;
+int a_LRE_LRE_b_PDF_PDF;
+int A\u202aA\u202aB\u202cB\u202c;
+int aPDFbLREadPDF;
+int a_\u202C_\u202a_\u202c;
+int a_LRE_b_PDF_c_LRE_PDF;
+int a_\u202a_\u202c_\u202a_\u202c_;
+int a_LRE_b_PDF_c_LRE;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a_\u202a_\u202c_\u202a_;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-7.c b/gcc/testsuite/c-c++-common/Wbidirectional-7.c
new file mode 100644
index 00000000000..f0f7b3ca14a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-7.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=any" } */
+/* Test we ignore UCNs in comments. */
+
+// a b c \u202a 1 2 3
+// a b c \u202A 1 2 3
+/* a b c \u202a 1 2 3 */
+/* a b c \u202A 1 2 3 */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-8.c b/gcc/testsuite/c-c++-common/Wbidirectional-8.c
new file mode 100644
index 00000000000..c7d02193131
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=any" } */
+/* Test \u vs \U. */
+
+int a_\u202A;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\u202a_2;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\U0000202A_3;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\U0000202a_4;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-9.c b/gcc/testsuite/c-c++-common/Wbidirectional-9.c
new file mode 100644
index 00000000000..d029209babb
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-9.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired" } */
+/* Test that we properly separate bidi contexts (comment/identifier/character
+ constant/string literal). */
+
+/* LRE -><- */ int pdf_\u202c_1;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLE -><- */ int pdf_\u202c_2;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRO -><- */ int pdf_\u202c_3;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLO -><- */ int pdf_\u202c_4;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRI -><-*/ int pdi_\u2069_1;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLI -><- */ int pdi_\u2069_12;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* FSI -><- */ int pdi_\u2069_3;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+const char *s1 = "LRE\u202a"; /* PDF -><- */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRE -><- */ const char *s2 = "PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int lre_\u202a; const char *s4 = "PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 6e2fcb6b1f2..e48d13c4ee1 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -308,6 +308,17 @@ enum cpp_normalize_level {
normalized_none
};
+/* The possible bidirectional characters checking levels, from least
+ restrictive to most. */
+enum cpp_bidirectional_level {
+ /* No checking. */
+ bidirectional_none,
+ /* Only detect unpaired uses of bidirectional characters. */
+ bidirectional_unpaired,
+ /* Detect any use of bidirectional characters. */
+ bidirectional_any
+};
+
/* This structure is nested inside struct cpp_reader, and
carries all the options visible to the command line. */
struct cpp_options
@@ -518,6 +529,10 @@ struct cpp_options
/* True if warn about differences between C++98 and C++11. */
bool cpp_warn_cxx11_compat;
+ /* Nonzero of bidirectional characters checking is on. See enum
+ cpp_bidirectional_level. */
+ unsigned char cpp_warn_bidirectional;
+
/* Dependency generation. */
struct
{
@@ -616,7 +631,8 @@ enum cpp_warning_reason {
CPP_W_C90_C99_COMPAT,
CPP_W_C11_C2X_COMPAT,
CPP_W_CXX11_COMPAT,
- CPP_W_EXPANSION_TO_DEFINED
+ CPP_W_EXPANSION_TO_DEFINED,
+ CPP_W_BIDIRECTIONAL
};
/* Callback for header lookup for HEADER, which is the name of a
diff --git a/libcpp/init.c b/libcpp/init.c
index 5a424e23553..f9a8f5f088f 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
= ENABLE_CANONICAL_SYSTEM_HEADERS;
CPP_OPTION (pfile, ext_numeric_literals) = 1;
CPP_OPTION (pfile, warn_date_time) = 0;
+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
/* Default CPP arithmetic to something sensible for the host for the
benefit of dumb users like fix-header. */
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8e3ef096bbe..d9c39a4105f 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1164,6 +1164,284 @@ _cpp_process_line_notes (cpp_reader *pfi
}
}
+namespace bidi {
+ enum kind {
+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI
+ };
+
+ /* All the UTF-8 encodings of bidi characters start with E2. */
+ const uchar utf8_start = 0xe2;
+
+ /* A vector holding currently open bidi contexts. We use a char for
+ each context, its LSB is 1 if it represents a PDF context, 0 if it
+ represents a PDI context. The next bit is 1 if this context was open
+ by a bidi character written as a UCN, and 0 when it was UTF-8. */
+ semi_embedded_vec <unsigned char, 16> vec;
+
+ /* Close the whole comment/identifier/string literal/character constant
+ context. */
+ void on_close ()
+ {
+ vec.truncate (0);
+ }
+
+ /* Pop the last element in the vector. */
+ void pop ()
+ {
+ unsigned int len = vec.count ();
+ gcc_checking_assert (len > 0);
+ vec.truncate (len - 1);
+ }
+
+ /* Return which context is currently opened. */
+ kind current_ctx ()
+ {
+ unsigned int len = vec.count ();
+ if (len == 0)
+ return NONE;
+ return (vec[len - 1] & 1) ? PDF : PDI;
+ }
+
+ /* Return true if the current context comes from a UCN origin, that is,
+ the bidi char which started this bidi context was written as a UCN. */
+ bool current_ctx_ucn_p ()
+ {
+ unsigned int len = vec.count ();
+ gcc_checking_assert (len > 0);
+ return (vec[len - 1] >> 1) & 1;
+ }
+
+ /* We've read a bidi char, update the current vector as necessary. */
+ void on_char (kind k, bool ucn_p)
+ {
+ switch (k)
+ {
+ case LRE:
+ case RLE:
+ case LRO:
+ case RLO:
+ vec.push (ucn_p ? 3u : 1u);
+ break;
+ case LRI:
+ case RLI:
+ case FSI:
+ vec.push (ucn_p ? 2u : 0u);
+ break;
+ case PDF:
+ if (current_ctx () == PDF)
+ pop ();
+ break;
+ case PDI:
+ if (current_ctx () == PDI)
+ pop ();
+ break;
+ [[likely]] case NONE:
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* Return a descriptive string for K. */
+ const char *to_str (kind k)
+ {
+ switch (k)
+ {
+ case LRE:
+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
+ case RLE:
+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
+ case LRO:
+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
+ case RLO:
+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
+ case LRI:
+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
+ case RLI:
+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
+ case FSI:
+ return "U+2068 (FIRST STRONG ISOLATE)";
+ case PDF:
+ return "U+202C (POP DIRECTIONAL FORMATTING)";
+ case PDI:
+ return "U+2069 (POP DIRECTIONAL ISOLATE)";
+ default:
+ abort ();
+ }
+ }
+}
+
+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
+
+static bidi::kind
+get_bidi_utf8 (const unsigned char *const p)
+{
+ gcc_checking_assert (p[0] == bidi::utf8_start);
+
+ if (p[1] == 0x80)
+ switch (p[2])
+ {
+ case 0xaa:
+ return bidi::LRE;
+ case 0xab:
+ return bidi::RLE;
+ case 0xac:
+ return bidi::PDF;
+ case 0xad:
+ return bidi::LRO;
+ case 0xae:
+ return bidi::RLO;
+ default:
+ break;
+ }
+ else if (p[1] == 0x81)
+ switch (p[2])
+ {
+ case 0xa6:
+ return bidi::LRI;
+ case 0xa7:
+ return bidi::RLI;
+ case 0xa8:
+ return bidi::FSI;
+ case 0xa9:
+ return bidi::PDI;
+ default:
+ break;
+ }
+
+ return bidi::NONE;
+}
+
+/* Parse a UCN where P points just past \u or \U and return its bidi code. */
+
+static bidi::kind
+get_bidi_ucn (const unsigned char *p, bool is_U)
+{
+ /* 6.4.3 Universal Character Names
+ \u hex-quad
+ \U hex-quad hex-quad
+ where \unnnn means \U0000nnnn. */
+
+ if (is_U)
+ {
+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
+ return bidi::NONE;
+ /* Skip 4B so we can treat \u and \U the same below. */
+ p += 4;
+ }
+
+ /* All code points we are looking for start with 20xx. */
+ if (p[0] != '2' || p[1] != '0')
+ return bidi::NONE;
+ else if (p[2] == '2')
+ switch (p[3])
+ {
+ case 'a':
+ case 'A':
+ return bidi::LRE;
+ case 'b':
+ case 'B':
+ return bidi::RLE;
+ case 'c':
+ case 'C':
+ return bidi::PDF;
+ case 'd':
+ case 'D':
+ return bidi::LRO;
+ case 'e':
+ case 'E':
+ return bidi::RLO;
+ default:
+ break;
+ }
+ else if (p[2] == '6')
+ switch (p[3])
+ {
+ case '6':
+ return bidi::LRI;
+ case '7':
+ return bidi::RLI;
+ case '8':
+ return bidi::FSI;
+ case '9':
+ return bidi::PDI;
+ default:
+ break;
+ }
+
+ return bidi::NONE;
+}
+
+/* We're closing a bidi context, that is, we've encountered a newline,
+ are closing a C-style comment, or are at the end of a string literal,
+ character constant, or identifier. Warn if this context was not
+ properly terminated by a PDI or PDF. P points to the last character
+ in this context. */
+
+static void
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
+{
+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
+ && bidi::vec.count () > 0)
+ {
+ const location_t loc
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, p));
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "unpaired UTF-8 bidirectional character "
+ "detected");
+ }
+ /* We're done with this context. */
+ bidi::on_close ();
+}
+
+/* We're at the beginning or in the middle of an identifier/comment/string
+ literal/character constant. Warn if we've encountered a bidi character.
+ KIND says which bidi character it was; P points to it in the character
+ stream. UCN_P is true iff this bidi character was written as a UCN. */
+
+static void
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
+ bool ucn_p)
+{
+ if (__builtin_expect (kind == bidi::NONE, 1))
+ return;
+
+ const unsigned char warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
+
+ if (warn_bidi != bidirectional_none)
+ {
+ const location_t loc
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, p));
+ /* It seems excessive to warn about a PDI/PDF that is closing
+ an opened context because we've already warned about the
+ opening character. Except warn when we have a UCN x UTF-8
+ mismatch. */
+ if (kind == bidi::current_ctx ())
+ {
+ if (warn_bidi == bidirectional_unpaired
+ && bidi::current_ctx_ucn_p () != ucn_p)
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "UTF-8 vs UCN mismatch when closing "
+ "a context by \"%s\"", bidi::to_str (kind));
+ }
+ else if (warn_bidi == bidirectional_any)
+ {
+ if (kind == bidi::PDF || kind == bidi::PDI)
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "\"%s\" is closing an unopened context",
+ bidi::to_str (kind));
+ else
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "found problematic Unicode character \"%s\"",
+ bidi::to_str (kind));
+ }
+ }
+ /* We're done with this context. */
+ bidi::on_char (kind, ucn_p);
+}
+
/* Skip a C-style block comment. We find the end of the comment by
seeing if an asterisk is before every '/' we encounter. Returns
nonzero if comment terminated by EOF, zero otherwise.
@@ -1175,7 +1453,8 @@ _cpp_skip_block_comment (cpp_reader *pfi
cpp_buffer *buffer = pfile->buffer;
const uchar *cur = buffer->cur;
uchar c;
-
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
cur++;
if (*cur == '/')
cur++;
@@ -1189,7 +1468,11 @@ _cpp_skip_block_comment (cpp_reader *pfi
if (c == '/')
{
if (cur[-2] == '*')
- break;
+ {
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur);
+ break;
+ }
/* Warn about potential nested comments, but not if the '/'
comes immediately before the true comment delimiter.
@@ -1208,6 +1491,8 @@ _cpp_skip_block_comment (cpp_reader *pfi
{
unsigned int cols;
buffer->cur = cur - 1;
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur);
_cpp_process_line_notes (pfile, true);
if (buffer->next_line >= buffer->rlimit)
return true;
@@ -1218,6 +1503,13 @@ _cpp_skip_block_comment (cpp_reader *pfi
cur = buffer->cur;
}
+ /* If this is a beginning of a UTF-8 encoding, it might be
+ a bidirectional character. */
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
+ }
}
buffer->cur = cur;
@@ -1233,9 +1525,32 @@ skip_line_comment (cpp_reader *pfile)
{
cpp_buffer *buffer = pfile->buffer;
location_t orig_line = pfile->line_table->highest_line;
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
- while (*buffer->cur != '\n')
- buffer->cur++;
+ if (!warn_bidi_p)
+ while (*buffer->cur != '\n')
+ buffer->cur++;
+ else
+ {
+ while (*buffer->cur != '\n'
+ && *buffer->cur != bidi::utf8_start)
+ buffer->cur++;
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
+ {
+ while (*buffer->cur != '\n')
+ {
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
+ {
+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/false);
+ }
+ buffer->cur++;
+ }
+ maybe_warn_bidi_on_close (pfile, buffer->cur);
+ }
+ }
_cpp_process_line_notes (pfile, true);
return orig_line != pfile->line_table->highest_line;
@@ -1317,11 +1632,14 @@ static const cppchar_t utf8_signifier =
/* Returns TRUE if the sequence starting at buffer->cur is valid in
an identifier. FIRST is TRUE if this starts an identifier. */
+
static bool
forms_identifier_p (cpp_reader *pfile, int first,
struct normalize_state *state)
{
cpp_buffer *buffer = pfile->buffer;
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
if (*buffer->cur == '$')
{
@@ -1344,6 +1662,13 @@ forms_identifier_p (cpp_reader *pfile, i
cppchar_t s;
if (*buffer->cur >= utf8_signifier)
{
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
+ && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/false);
+ }
if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state, &s))
return true;
@@ -1352,6 +1677,13 @@ forms_identifier_p (cpp_reader *pfile, i
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
+ if (warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_ucn (buffer->cur,
+ buffer->cur[-1] == 'U');
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/true);
+ }
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state, &s, NULL, NULL))
return true;
@@ -1460,6 +1792,8 @@ lex_identifier (cpp_reader *pfile, const
const uchar *cur;
unsigned int len;
unsigned int hash = HT_HASHSTEP (0, *base);
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
cur = pfile->buffer->cur;
if (! starts_ucn)
@@ -1476,13 +1810,17 @@ lex_identifier (cpp_reader *pfile, const
{
/* Slower version for identifiers containing UCNs
or extended chars (including $). */
- do {
- while (ISIDNUM (*pfile->buffer->cur))
- {
- NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
- pfile->buffer->cur++;
- }
- } while (forms_identifier_p (pfile, false, nst));
+ do
+ {
+ while (ISIDNUM (*pfile->buffer->cur))
+ {
+ NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
+ pfile->buffer->cur++;
+ }
+ }
+ while (forms_identifier_p (pfile, false, nst));
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
@@ -1684,6 +2022,8 @@ lex_raw_string (cpp_reader *pfile, cpp_t
_cpp_buff *first_buff = NULL, *last_buff = NULL;
size_t raw_prefix_start;
_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
@@ -1920,8 +2260,16 @@ lex_raw_string (cpp_reader *pfile, cpp_t
cur = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
+ && warn_bidi_p)
+ maybe_warn_bidi_on_char (pfile, cur - 1,
+ get_bidi_utf8 (cur - 1),
+ /*ucn_p=*/false);
}
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur);
+
if (CPP_OPTION (pfile, user_literals))
{
/* If a string format macro, say from inttypes.h, is placed touching
@@ -2016,15 +2364,28 @@ lex_string (cpp_reader *pfile, cpp_token
else
terminator = '>', type = CPP_HEADER_NAME;
+ const bool warn_bidi_p = (CPP_OPTION (pfile, cpp_warn_bidirectional)
+ != bidirectional_none);
for (;;)
{
cppchar_t c = *cur++;
/* In #include-style directives, terminators are not escapable. */
if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
- cur++;
+ {
+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
+ }
+ cur++;
+ }
else if (c == terminator)
- break;
+ {
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur - 1);
+ break;
+ }
else if (c == '\n')
{
cur--;
@@ -2041,6 +2402,11 @@ lex_string (cpp_reader *pfile, cpp_token
}
else if (c == '\0')
saw_NUL = true;
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
+ }
}
if (saw_NUL && !pfile->state.skipping)
base-commit: b0b1d8d5d90d7c499e2733e8d01ba8b73217f332
--
2.31.1