599 lines
19 KiB
Diff
599 lines
19 KiB
Diff
commit e6e6184bed490403811771fa527eb95b4ae53c7c
|
|
Author: Florian Weimer <fweimer@redhat.com>
|
|
Date: Thu Sep 22 12:10:41 2022 +0200
|
|
|
|
scripts: Enhance glibcpp to do basic macro processing
|
|
|
|
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
|
|
|
Conflicts:
|
|
support/Makefile
|
|
(spurious tests sorting change upstream)
|
|
|
|
diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py
|
|
index b44c6a4392dde8ce..455459a609eab120 100644
|
|
--- a/scripts/glibcpp.py
|
|
+++ b/scripts/glibcpp.py
|
|
@@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings.
|
|
"""
|
|
|
|
import collections
|
|
+import operator
|
|
import re
|
|
+import sys
|
|
|
|
# Caution: The order of the outermost alternation matters.
|
|
# STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
|
|
@@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter):
|
|
|
|
yield tok
|
|
pos = mo.end()
|
|
+
|
|
+class MacroDefinition(collections.namedtuple('MacroDefinition',
|
|
+ 'name_token args body error')):
|
|
+ """A preprocessor macro definition.
|
|
+
|
|
+ name_token is the Token_ for the name.
|
|
+
|
|
+ args is None for a macro that is not function-like. Otherwise, it
|
|
+ is a tuple that contains the macro argument name tokens.
|
|
+
|
|
+ body is a tuple that contains the tokens that constitue the body
|
|
+ of the macro definition (excluding whitespace).
|
|
+
|
|
+ error is None if no error was detected, or otherwise a problem
|
|
+ description associated with this macro definition.
|
|
+
|
|
+ """
|
|
+
|
|
+ @property
|
|
+ def function(self):
|
|
+ """Return true if the macro is function-like."""
|
|
+ return self.args is not None
|
|
+
|
|
+ @property
|
|
+ def name(self):
|
|
+ """Return the name of the macro being defined."""
|
|
+ return self.name_token.text
|
|
+
|
|
+ @property
|
|
+ def line(self):
|
|
+ """Return the line number of the macro defintion."""
|
|
+ return self.name_token.line
|
|
+
|
|
+ @property
|
|
+ def args_lowered(self):
|
|
+ """Return the macro argument list as a list of strings"""
|
|
+ if self.function:
|
|
+ return [token.text for token in self.args]
|
|
+ else:
|
|
+ return None
|
|
+
|
|
+ @property
|
|
+ def body_lowered(self):
|
|
+ """Return the macro body as a list of strings."""
|
|
+ return [token.text for token in self.body]
|
|
+
|
|
+def macro_definitions(tokens):
|
|
+ """A generator for C macro definitions among tokens.
|
|
+
|
|
+ The generator yields MacroDefinition objects.
|
|
+
|
|
+ tokens must be iterable, yielding Token_ objects.
|
|
+
|
|
+ """
|
|
+
|
|
+ macro_name = None
|
|
+ macro_start = False # Set to false after macro name and one otken.
|
|
+ macro_args = None # Set to a list during the macro argument sequence.
|
|
+ in_macro_args = False # True while processing macro identifier-list.
|
|
+ error = None
|
|
+ body = []
|
|
+
|
|
+ for token in tokens:
|
|
+ if token.context == 'define' and macro_name is None \
|
|
+ and token.kind == 'IDENT':
|
|
+ # Starting up macro processing.
|
|
+ if macro_start:
|
|
+ # First identifier is the macro name.
|
|
+ macro_name = token
|
|
+ else:
|
|
+ # Next token is the name.
|
|
+ macro_start = True
|
|
+ continue
|
|
+
|
|
+ if macro_name is None:
|
|
+ # Drop tokens not in macro definitions.
|
|
+ continue
|
|
+
|
|
+ if token.context != 'define':
|
|
+ # End of the macro definition.
|
|
+ if in_macro_args and error is None:
|
|
+ error = 'macro definition ends in macro argument list'
|
|
+ yield MacroDefinition(macro_name, macro_args, tuple(body), error)
|
|
+ # No longer in a macro definition.
|
|
+ macro_name = None
|
|
+ macro_start = False
|
|
+ macro_args = None
|
|
+ in_macro_args = False
|
|
+ error = None
|
|
+ body.clear()
|
|
+ continue
|
|
+
|
|
+ if macro_start:
|
|
+ # First token after the macro name.
|
|
+ macro_start = False
|
|
+ if token.kind == 'PUNCTUATOR' and token.text == '(':
|
|
+ macro_args = []
|
|
+ in_macro_args = True
|
|
+ continue
|
|
+
|
|
+ if in_macro_args:
|
|
+ if token.kind == 'IDENT' \
|
|
+ or (token.kind == 'PUNCTUATOR' and token.text == '...'):
|
|
+ # Macro argument or ... placeholder.
|
|
+ macro_args.append(token)
|
|
+ if token.kind == 'PUNCTUATOR':
|
|
+ if token.text == ')':
|
|
+ macro_args = tuple(macro_args)
|
|
+ in_macro_args = False
|
|
+ elif token.text == ',':
|
|
+ pass # Skip. Not a full syntax check.
|
|
+ elif error is None:
|
|
+ error = 'invalid punctuator in macro argument list: ' \
|
|
+ + repr(token.text)
|
|
+ elif error is None:
|
|
+ error = 'invalid {} token in macro argument list'.format(
|
|
+ token.kind)
|
|
+ continue
|
|
+
|
|
+ if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
|
|
+ body.append(token)
|
|
+
|
|
+ # Emit the macro in case the last line does not end with a newline.
|
|
+ if macro_name is not None:
|
|
+ if in_macro_args and error is None:
|
|
+ error = 'macro definition ends in macro argument list'
|
|
+ yield MacroDefinition(macro_name, macro_args, tuple(body), error)
|
|
+
|
|
+# Used to split UL etc. suffixes from numbers such as 123UL.
|
|
+RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
|
|
+
|
|
+BINARY_OPERATORS = {
|
|
+ '+': operator.add,
|
|
+ '<<': operator.lshift,
|
|
+}
|
|
+
|
|
+# Use the general-purpose dict type if it is order-preserving.
|
|
+if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
|
|
+ OrderedDict = collections.OrderedDict
|
|
+else:
|
|
+ OrderedDict = dict
|
|
+
|
|
+def macro_eval(macro_defs, reporter):
|
|
+ """Compute macro values
|
|
+
|
|
+ macro_defs is the output from macro_definitions. reporter is an
|
|
+ object that accepts reporter.error(line_number, message) and
|
|
+ reporter.note(line_number, message) calls to report errors
|
|
+ and error context invocations.
|
|
+
|
|
+ The returned dict contains the values of macros which are not
|
|
+ function-like, pairing their names with their computed values.
|
|
+
|
|
+ The current implementation is incomplete. It is deliberately not
|
|
+ entirely faithful to C, even in the implemented parts. It checks
|
|
+ that macro replacements follow certain syntactic rules even if
|
|
+ they are never evaluated.
|
|
+
|
|
+ """
|
|
+
|
|
+ # Unevaluated macro definitions by name.
|
|
+ definitions = OrderedDict()
|
|
+ for md in macro_defs:
|
|
+ if md.name in definitions:
|
|
+ reporter.error(md.line, 'macro {} redefined'.format(md.name))
|
|
+ reporter.note(definitions[md.name].line,
|
|
+ 'location of previous definition')
|
|
+ else:
|
|
+ definitions[md.name] = md
|
|
+
|
|
+ # String to value mappings for fully evaluated macros.
|
|
+ evaluated = OrderedDict()
|
|
+
|
|
+ # String to macro definitions during evaluation. Nice error
|
|
+ # reporting relies on determinstic iteration order.
|
|
+ stack = OrderedDict()
|
|
+
|
|
+ def eval_token(current, token):
|
|
+ """Evaluate one macro token.
|
|
+
|
|
+ Integers and strings are returned as such (the latter still
|
|
+ quoted). Identifiers are expanded.
|
|
+
|
|
+ None indicates an empty expansion or an error.
|
|
+
|
|
+ """
|
|
+
|
|
+ if token.kind == 'PP_NUMBER':
|
|
+ value = None
|
|
+ m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
|
|
+ if m:
|
|
+ try:
|
|
+ value = int(m.group(1), 0)
|
|
+ except ValueError:
|
|
+ pass
|
|
+ if value is None:
|
|
+ reporter.error(token.line,
|
|
+ 'invalid number {!r} in definition of {}'.format(
|
|
+ token.text, current.name))
|
|
+ return value
|
|
+
|
|
+ if token.kind == 'STRING':
|
|
+ return token.text
|
|
+
|
|
+ if token.kind == 'CHARCONST' and len(token.text) == 3:
|
|
+ return ord(token.text[1])
|
|
+
|
|
+ if token.kind == 'IDENT':
|
|
+ name = token.text
|
|
+ result = eval1(current, name)
|
|
+ if name not in evaluated:
|
|
+ evaluated[name] = result
|
|
+ return result
|
|
+
|
|
+ reporter.error(token.line,
|
|
+ 'unrecognized {!r} in definition of {}'.format(
|
|
+ token.text, current.name))
|
|
+ return None
|
|
+
|
|
+
|
|
+ def eval1(current, name):
|
|
+ """Evaluate one name.
|
|
+
|
|
+ The name is looked up and the macro definition evaluated
|
|
+ recursively if necessary. The current argument is the macro
|
|
+ definition being evaluated.
|
|
+
|
|
+ None as a return value indicates an error.
|
|
+
|
|
+ """
|
|
+
|
|
+ # Fast path if the value has already been evaluated.
|
|
+ if name in evaluated:
|
|
+ return evaluated[name]
|
|
+
|
|
+ try:
|
|
+ md = definitions[name]
|
|
+ except KeyError:
|
|
+ reporter.error(current.line,
|
|
+ 'reference to undefined identifier {} in definition of {}'
|
|
+ .format(name, current.name))
|
|
+ return None
|
|
+
|
|
+ if md.name in stack:
|
|
+ # Recursive macro definition.
|
|
+ md = stack[name]
|
|
+ reporter.error(md.line,
|
|
+ 'macro definition {} refers to itself'.format(md.name))
|
|
+ for md1 in reversed(list(stack.values())):
|
|
+ if md1 is md:
|
|
+ break
|
|
+ reporter.note(md1.line,
|
|
+ 'evaluated from {}'.format(md1.name))
|
|
+ return None
|
|
+
|
|
+ stack[md.name] = md
|
|
+ if md.function:
|
|
+ reporter.error(current.line,
|
|
+ 'attempt to evaluate function-like macro {}'.format(name))
|
|
+ reporter.note(md.line, 'definition of {}'.format(md.name))
|
|
+ return None
|
|
+
|
|
+ try:
|
|
+ body = md.body
|
|
+ if len(body) == 0:
|
|
+ # Empty expansion.
|
|
+ return None
|
|
+
|
|
+ # Remove surrounding ().
|
|
+ if body[0].text == '(' and body[-1].text == ')':
|
|
+ body = body[1:-1]
|
|
+ had_parens = True
|
|
+ else:
|
|
+ had_parens = False
|
|
+
|
|
+ if len(body) == 1:
|
|
+ return eval_token(md, body[0])
|
|
+
|
|
+ # Minimal expression evaluator for binary operators.
|
|
+ op = body[1].text
|
|
+ if len(body) == 3 and op in BINARY_OPERATORS:
|
|
+ if not had_parens:
|
|
+ reporter.error(body[1].line,
|
|
+ 'missing parentheses around {} expression'.format(op))
|
|
+ reporter.note(md.line,
|
|
+ 'in definition of macro {}'.format(md.name))
|
|
+
|
|
+ left = eval_token(md, body[0])
|
|
+ right = eval_token(md, body[2])
|
|
+
|
|
+ if type(left) != type(1):
|
|
+ reporter.error(left.line,
|
|
+ 'left operand of {} is not an integer'.format(op))
|
|
+ reporter.note(md.line,
|
|
+ 'in definition of macro {}'.format(md.name))
|
|
+ if type(right) != type(1):
|
|
+ reporter.error(left.line,
|
|
+ 'right operand of {} is not an integer'.format(op))
|
|
+ reporter.note(md.line,
|
|
+ 'in definition of macro {}'.format(md.name))
|
|
+ return BINARY_OPERATORS[op](left, right)
|
|
+
|
|
+ reporter.error(md.line,
|
|
+ 'uninterpretable macro token sequence: {}'.format(
|
|
+ ' '.join(md.body_lowered)))
|
|
+ return None
|
|
+ finally:
|
|
+ del stack[md.name]
|
|
+
|
|
+ # Start of main body of macro_eval.
|
|
+ for md in definitions.values():
|
|
+ name = md.name
|
|
+ if name not in evaluated and not md.function:
|
|
+ evaluated[name] = eval1(md, name)
|
|
+ return evaluated
|
|
diff --git a/support/Makefile b/support/Makefile
|
|
index 09b41b0d57e9239a..7749ac24f1ac3622 100644
|
|
--- a/support/Makefile
|
|
+++ b/support/Makefile
|
|
@@ -223,11 +223,11 @@ $(objpfx)true-container : $(libsupport)
|
|
tests = \
|
|
README-testing \
|
|
tst-support-namespace \
|
|
+ tst-support-process_state \
|
|
tst-support_blob_repeat \
|
|
tst-support_capture_subprocess \
|
|
tst-support_descriptors \
|
|
tst-support_format_dns_packet \
|
|
- tst-support-process_state \
|
|
tst-support_quote_blob \
|
|
tst-support_quote_string \
|
|
tst-support_record_failure \
|
|
@@ -248,6 +248,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
|
|
$(evaluate-test)
|
|
endif
|
|
|
|
+tests-special += $(objpfx)tst-glibcpp.out
|
|
+
|
|
+$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py
|
|
+ PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \
|
|
+ $(evaluate-test)
|
|
+
|
|
$(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
|
|
|
|
tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd)
|
|
diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py
|
|
new file mode 100644
|
|
index 0000000000000000..a2db1916ccfce3c3
|
|
--- /dev/null
|
|
+++ b/support/tst-glibcpp.py
|
|
@@ -0,0 +1,217 @@
|
|
+#! /usr/bin/python3
|
|
+# Tests for scripts/glibcpp.py
|
|
+# Copyright (C) 2022 Free Software Foundation, Inc.
|
|
+# This file is part of the GNU C Library.
|
|
+#
|
|
+# The GNU C Library is free software; you can redistribute it and/or
|
|
+# modify it under the terms of the GNU Lesser General Public
|
|
+# License as published by the Free Software Foundation; either
|
|
+# version 2.1 of the License, or (at your option) any later version.
|
|
+#
|
|
+# The GNU C Library is distributed in the hope that it will be useful,
|
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+# Lesser General Public License for more details.
|
|
+#
|
|
+# You should have received a copy of the GNU Lesser General Public
|
|
+# License along with the GNU C Library; if not, see
|
|
+# <https://www.gnu.org/licenses/>.
|
|
+
|
|
+import inspect
|
|
+import sys
|
|
+
|
|
+import glibcpp
|
|
+
|
|
+# Error counter.
|
|
+errors = 0
|
|
+
|
|
+class TokenizerErrors:
|
|
+ """Used as the error reporter during tokenization."""
|
|
+
|
|
+ def __init__(self):
|
|
+ self.errors = []
|
|
+
|
|
+ def error(self, token, message):
|
|
+ self.errors.append((token, message))
|
|
+
|
|
+def check_macro_definitions(source, expected):
|
|
+ reporter = TokenizerErrors()
|
|
+ tokens = glibcpp.tokenize_c(source, reporter)
|
|
+
|
|
+ actual = []
|
|
+ for md in glibcpp.macro_definitions(tokens):
|
|
+ if md.function:
|
|
+ md_name = '{}({})'.format(md.name, ','.join(md.args_lowered))
|
|
+ else:
|
|
+ md_name = md.name
|
|
+ actual.append((md_name, md.body_lowered))
|
|
+
|
|
+ if actual != expected or reporter.errors:
|
|
+ global errors
|
|
+ errors += 1
|
|
+ # Obtain python source line information.
|
|
+ frame = inspect.stack(2)[1]
|
|
+ print('{}:{}: error: macro definition mismatch, actual definitions:'
|
|
+ .format(frame[1], frame[2]))
|
|
+ for md in actual:
|
|
+ print('note: {} {!r}'.format(md[0], md[1]))
|
|
+
|
|
+ if reporter.errors:
|
|
+ for err in reporter.errors:
|
|
+ print('note: tokenizer error: {}: {}'.format(
|
|
+ err[0].line, err[1]))
|
|
+
|
|
+def check_macro_eval(source, expected, expected_errors=''):
|
|
+ reporter = TokenizerErrors()
|
|
+ tokens = list(glibcpp.tokenize_c(source, reporter))
|
|
+
|
|
+ if reporter.errors:
|
|
+ # Obtain python source line information.
|
|
+ frame = inspect.stack(2)[1]
|
|
+ for err in reporter.errors:
|
|
+ print('{}:{}: tokenizer error: {}: {}'.format(
|
|
+ frame[1], frame[2], err[0].line, err[1]))
|
|
+ return
|
|
+
|
|
+ class EvalReporter:
|
|
+ """Used as the error reporter during evaluation."""
|
|
+
|
|
+ def __init__(self):
|
|
+ self.lines = []
|
|
+
|
|
+ def error(self, line, message):
|
|
+ self.lines.append('{}: error: {}\n'.format(line, message))
|
|
+
|
|
+ def note(self, line, message):
|
|
+ self.lines.append('{}: note: {}\n'.format(line, message))
|
|
+
|
|
+ reporter = EvalReporter()
|
|
+ actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
|
|
+ actual_errors = ''.join(reporter.lines)
|
|
+ if actual != expected or actual_errors != expected_errors:
|
|
+ global errors
|
|
+ errors += 1
|
|
+ # Obtain python source line information.
|
|
+ frame = inspect.stack(2)[1]
|
|
+ print('{}:{}: error: macro evaluation mismatch, actual results:'
|
|
+ .format(frame[1], frame[2]))
|
|
+ for k, v in actual.items():
|
|
+ print(' {}: {!r}'.format(k, v))
|
|
+ for msg in reporter.lines:
|
|
+ sys.stdout.write(' | ' + msg)
|
|
+
|
|
+# Individual test cases follow.
|
|
+
|
|
+check_macro_definitions('', [])
|
|
+check_macro_definitions('int main()\n{\n{\n', [])
|
|
+check_macro_definitions("""
|
|
+#define A 1
|
|
+#define B 2 /* ignored */
|
|
+#define C 3 // also ignored
|
|
+#define D \
|
|
+ 4
|
|
+#define STRING "string"
|
|
+#define FUNCLIKE(a, b) (a + b)
|
|
+#define FUNCLIKE2(a, b) (a + \
|
|
+ b)
|
|
+""", [('A', ['1']),
|
|
+ ('B', ['2']),
|
|
+ ('C', ['3']),
|
|
+ ('D', ['4']),
|
|
+ ('STRING', ['"string"']),
|
|
+ ('FUNCLIKE(a,b)', list('(a+b)')),
|
|
+ ('FUNCLIKE2(a,b)', list('(a+b)')),
|
|
+ ])
|
|
+check_macro_definitions('#define MACRO', [('MACRO', [])])
|
|
+check_macro_definitions('#define MACRO\n', [('MACRO', [])])
|
|
+check_macro_definitions('#define MACRO()', [('MACRO()', [])])
|
|
+check_macro_definitions('#define MACRO()\n', [('MACRO()', [])])
|
|
+
|
|
+check_macro_eval('#define A 1', {'A': 1})
|
|
+check_macro_eval('#define A (1)', {'A': 1})
|
|
+check_macro_eval('#define A (1 + 1)', {'A': 2})
|
|
+check_macro_eval('#define A (1U << 31)', {'A': 1 << 31})
|
|
+check_macro_eval('''\
|
|
+#define A (B + 1)
|
|
+#define B 10
|
|
+#define F(x) ignored
|
|
+#define C "not ignored"
|
|
+''', {
|
|
+ 'A': 11,
|
|
+ 'B': 10,
|
|
+ 'C': '"not ignored"',
|
|
+})
|
|
+
|
|
+# Checking for evaluation errors.
|
|
+check_macro_eval('''\
|
|
+#define A 1
|
|
+#define A 2
|
|
+''', {
|
|
+ 'A': 1,
|
|
+}, '''\
|
|
+2: error: macro A redefined
|
|
+1: note: location of previous definition
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A A
|
|
+#define B 1
|
|
+''', {
|
|
+ 'A': None,
|
|
+ 'B': 1,
|
|
+}, '''\
|
|
+1: error: macro definition A refers to itself
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A B
|
|
+#define B A
|
|
+''', {
|
|
+ 'A': None,
|
|
+ 'B': None,
|
|
+}, '''\
|
|
+1: error: macro definition A refers to itself
|
|
+2: note: evaluated from B
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A B
|
|
+#define B C
|
|
+#define C A
|
|
+''', {
|
|
+ 'A': None,
|
|
+ 'B': None,
|
|
+ 'C': None,
|
|
+}, '''\
|
|
+1: error: macro definition A refers to itself
|
|
+3: note: evaluated from C
|
|
+2: note: evaluated from B
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A 1 +
|
|
+''', {
|
|
+ 'A': None,
|
|
+}, '''\
|
|
+1: error: uninterpretable macro token sequence: 1 +
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A 3*5
|
|
+''', {
|
|
+ 'A': None,
|
|
+}, '''\
|
|
+1: error: uninterpretable macro token sequence: 3 * 5
|
|
+''')
|
|
+
|
|
+check_macro_eval('''\
|
|
+#define A 3 + 5
|
|
+''', {
|
|
+ 'A': 8,
|
|
+}, '''\
|
|
+1: error: missing parentheses around + expression
|
|
+1: note: in definition of macro A
|
|
+''')
|
|
+
|
|
+if errors:
|
|
+ sys.exit(1)
|