clang/SOURCES/0002-Misleading-bidirectional-detection.patch
2021-11-11 04:45:29 +00:00

333 lines
13 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 9613c86f0427be88ee43bddb7ae7cde74c1157ac Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 4 Nov 2021 11:11:53 +0100
Subject: [PATCH 2/3] Misleading bidirectional detection
Differential Revision: https://reviews.llvm.org/D112913
---
clang-tools-extra/clang-tidy/misc/CMakeLists.txt | 1 +
.../clang-tidy/misc/MiscTidyModule.cpp | 3 +
.../clang-tidy/misc/MisleadingBidirectional.cpp | 131 +++++++++++++++++++++
.../clang-tidy/misc/MisleadingBidirectional.h | 38 ++++++
clang-tools-extra/docs/ReleaseNotes.rst | 5 +
clang-tools-extra/docs/clang-tidy/checks/list.rst | 3 +-
.../checks/misc-misleading-bidirectional.rst | 21 ++++
.../checkers/misc-misleading-bidirectional.cpp | 31 +++++
8 files changed, 232 insertions(+), 1 deletion(-)
create mode 100644 clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
create mode 100644 clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
create mode 100644 clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
create mode 100644 clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
diff --git a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
index d438a47..d6bb718 100644
--- a/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/misc/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangTidyMiscModule
DefinitionsInHeadersCheck.cpp
Homoglyph.cpp
MiscTidyModule.cpp
+ MisleadingBidirectional.cpp
MisplacedConstCheck.cpp
NewDeleteOverloadsCheck.cpp
NoRecursionCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
index 5c7bd0c..bb5fde2 100644
--- a/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp
@@ -11,6 +11,7 @@
#include "../ClangTidyModuleRegistry.h"
#include "DefinitionsInHeadersCheck.h"
#include "Homoglyph.h"
+#include "MisleadingBidirectional.h"
#include "MisplacedConstCheck.h"
#include "NewDeleteOverloadsCheck.h"
#include "NoRecursionCheck.h"
@@ -35,6 +36,8 @@ public:
CheckFactories.registerCheck<DefinitionsInHeadersCheck>(
"misc-definitions-in-headers");
CheckFactories.registerCheck<Homoglyph>("misc-homoglyph");
+ CheckFactories.registerCheck<MisleadingBidirectionalCheck>(
+ "misc-misleading-bidirectional");
CheckFactories.registerCheck<MisplacedConstCheck>("misc-misplaced-const");
CheckFactories.registerCheck<NewDeleteOverloadsCheck>(
"misc-new-delete-overloads");
diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
new file mode 100644
index 0000000..7a2f06b
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp
@@ -0,0 +1,131 @@
+//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MisleadingBidirectional.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace clang;
+
+static bool containsMisleadingBidi(StringRef Buffer,
+ bool HonorLineBreaks = true) {
+ const char *CurPtr = Buffer.begin();
+ unsigned EmbeddingOverride = 0, Isolate = 0;
+ unsigned i = 0;
+
+ enum {
+ LS = 0x2028,
+ PS = 0x2029,
+ RLO = 0x202E,
+ RLE = 0x202B,
+ LRO = 0x202D,
+ LRE = 0x202A,
+ PDF = 0x202C,
+ RLI = 0x2067,
+ LRI = 0x2066,
+ FSI = 0x2068,
+ PDI = 0x2069
+ };
+
+ // Scan each character while maintaining a count of opened bidi context.
+ // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
+ // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
+ //
+ // Warn if we end up with an unclosed context.
+ while (CurPtr < Buffer.end()) {
+ ++i;
+ unsigned char C = *CurPtr;
+ if (isASCII(C)) {
+ ++CurPtr;
+ // Line break: https://www.unicode.org/reports/tr14/tr14-32.html
+ if (C == '\n' || C == '\r' || C == '\f' || C == '\v' ||
+ C == 0x85 /*next line*/)
+ EmbeddingOverride = Isolate = 0;
+ continue;
+ }
+ llvm::UTF32 CodePoint;
+ llvm::ConversionResult Result = llvm::convertUTF8Sequence(
+ (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
+ &CodePoint, llvm::strictConversion);
+
+ // If conversion fails, utf-8 is designed so that we can just try next char.
+ if (Result != llvm::conversionOK) {
+ ++CurPtr;
+ continue;
+ }
+
+ if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
+ CodePoint == LRE)
+ EmbeddingOverride += 1;
+ else if (CodePoint == PDF)
+ EmbeddingOverride = std::min(EmbeddingOverride - 1, EmbeddingOverride);
+ else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
+ Isolate += 1;
+ else if (CodePoint == PDI)
+ Isolate = std::min(Isolate - 1, Isolate);
+ // Line break: https://www.unicode.org/reports/tr14/tr14-32.html
+ else if (CodePoint == LS || CodePoint == PS)
+ EmbeddingOverride = Isolate = 0;
+ }
+ return EmbeddingOverride != 0 || Isolate != 0;
+}
+
+class clang::tidy::misc::MisleadingBidirectionalCheck::
+ MisleadingBidirectionalHandler : public CommentHandler {
+public:
+ MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check,
+ llvm::Optional<std::string> User)
+ : Check(Check) {}
+
+ bool HandleComment(Preprocessor &PP, SourceRange Range) override {
+ // FIXME: check that we are in a /* */ comment
+ StringRef Text =
+ Lexer::getSourceText(CharSourceRange::getCharRange(Range),
+ PP.getSourceManager(), PP.getLangOpts());
+
+ if (containsMisleadingBidi(Text, true))
+ Check.diag(
+ Range.getBegin(),
+ "comment contains misleading bidirectional Unicode characters");
+ return false;
+ }
+
+private:
+ MisleadingBidirectionalCheck &Check;
+};
+
+clang::tidy::misc::MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
+ StringRef Name, ClangTidyContext *Context)
+ : ClangTidyCheck(Name, Context),
+ Handler(std::make_unique<MisleadingBidirectionalHandler>(
+ *this, Context->getOptions().User)) {}
+
+clang::tidy::misc::MisleadingBidirectionalCheck::
+ ~MisleadingBidirectionalCheck() = default;
+
+void clang::tidy::misc::MisleadingBidirectionalCheck::registerPPCallbacks(
+ const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
+ PP->addCommentHandler(Handler.get());
+}
+
+void clang::tidy::misc::MisleadingBidirectionalCheck::check(
+ const ast_matchers::MatchFinder::MatchResult &Result) {
+ if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
+ StringRef Literal = SL->getBytes();
+ if (containsMisleadingBidi(Literal, false))
+ diag(SL->getBeginLoc(), "string literal contains misleading "
+ "bidirectional Unicode characters");
+ }
+}
+
+void clang::tidy::misc::MisleadingBidirectionalCheck::registerMatchers(
+ ast_matchers::MatchFinder *Finder) {
+ Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
+}
diff --git a/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
new file mode 100644
index 0000000..18e7060
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h
@@ -0,0 +1,38 @@
+//===--- MisleadingBidirectionalCheck.h - clang-tidy ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace misc {
+
+class MisleadingBidirectionalCheck : public ClangTidyCheck {
+public:
+ MisleadingBidirectionalCheck(StringRef Name, ClangTidyContext *Context);
+ ~MisleadingBidirectionalCheck();
+
+ void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
+ Preprocessor *ModuleExpanderPP) override;
+
+ void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+ void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+ class MisleadingBidirectionalHandler;
+ std::unique_ptr<MisleadingBidirectionalHandler> Handler;
+};
+
+} // namespace misc
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MISLEADINGBIDIRECTIONALCHECK_H
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index e4ad7b9..546d544 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -326,6 +326,11 @@ New checks
Detects confusable unicode identifiers.
+- New :doc:`misc-misleading-bidirectional <clang-tidy/checks/misc-misleading-bidirectional>` check.
+
+ Inspect string literal and comments for unterminated bidirectional Unicode
+ characters.
+
New check aliases
^^^^^^^^^^^^^^^^^
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 47ff5b7..977867d 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -202,7 +202,8 @@ Clang-Tidy Checks
`llvmlibc-implementation-in-namespace <llvmlibc-implementation-in-namespace.html>`_,
`llvmlibc-restrict-system-libc-headers <llvmlibc-restrict-system-libc-headers.html>`_, "Yes"
`misc-definitions-in-headers <misc-definitions-in-headers.html>`_, "Yes"
- `misc-homoglyph <misc-homoglyph.html>`_, "Yes"
+ `misc-homoglyph <misc-homoglyph.html>`_,
+ `misc-misleading-bidirectional <misc-misleading-bidirectional.html>`_,
`misc-misplaced-const <misc-misplaced-const.html>`_,
`misc-new-delete-overloads <misc-new-delete-overloads.html>`_,
`misc-no-recursion <misc-no-recursion.html>`_,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
new file mode 100644
index 0000000..16ffc97
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/misc-misleading-bidirectional.rst
@@ -0,0 +1,21 @@
+.. title:: clang-tidy - misc-misleading-bidirectional
+
+misc-misleading-bidirectional
+=============================
+
+Warn about unterminated bidirectional unicode sequence, detecting potential attack
+as described in the `Trojan Source <https://www.trojansource.codes>`_ attack.
+
+Example:
+
+.. code-block:: c++
+
+ #include <iostream>
+
+ int main() {
+ bool isAdmin = false;
+ /* } if (isAdmin) begin admins only */
+ std::cout << "You are an admin.\n";
+ /* end admins only { */
+ return 0;
+ }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
new file mode 100644
index 0000000..12fdf63
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc-misleading-bidirectional.cpp
@@ -0,0 +1,31 @@
+// RUN: %check_clang_tidy %s misc-misleading-bidirectional %t
+
+void func(void) {
+ int admin = 0;
+ /* }if(admin) begin*/
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+ const char msg[] = "if(admin) tes";
+ // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: string literal contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+}
+
+void all_fine(void) {
+ char valid[] = "somevalidsequence";
+ /* EOL ends bidi sequence
+ * end it's fine to do so.
+ * EOL ends isolate too
+ */
+}
+
+int invalid_utf_8(void) {
+ bool isAdmin = false;
+
+ // the comment below contains an invalid utf8 character, but should still be
+ // processed.
+
+ // CHECK-MESSAGES: :[[@LINE+1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+ /*€ } if (isAdmin) begin admins only */
+ return 1;
+ /* end admins only { */
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: comment contains misleading bidirectional Unicode characters [misc-misleading-bidirectional]
+ return 0;
+}
--
1.8.3.1