find-provides.ksyms, find-requires.ksyms: unify symbol parsing

Since the immediate symbols can be retrieved for "objdump -t" output
as well, it makes some sence to use it as the sole data source and
handle both vaiants inside the awk script.  This simplification comes
at some runtime cost for the simpler case of the immediate symbols,
though, and overall it is about 5% faster in the relative symbols case
and about the same 5% slower in the absolute symbols case, with the
latter not as important moving forward, probably:

    $ for i in ./lib/modules/*; do \
        echo "====== $i ====="; \
        diff -u <(find $i | ./find-provides.ksyms.old) <(find $i | ./find-provides.ksyms.new); \
        echo -n "old: "; find $i | time ./find-provides.ksyms.old > /dev/null; \
        echo -n "new: "; find $i | time ./find-provides.ksyms.new > /dev/null; \
    done
    ====== ./lib/modules/4.18.0-372.57.1.el8_6.s390x =====
    old: ./find-provides.ksyms.old > /dev/null  3.98s user 3.04s system 129% cpu 5.411 total
    new: ./find-provides.ksyms.new > /dev/null  3.78s user 3.06s system 132% cpu 5.181 total
    ====== ./lib/modules/4.18.0-372.57.1.el8_6.x86_64 =====
    old: ./find-provides.ksyms.old > /dev/null  6.18s user 4.00s system 124% cpu 8.161 total
    new: ./find-provides.ksyms.new > /dev/null  6.57s user 4.84s system 132% cpu 8.644 total
    ====== ./lib/modules/5.14.0-284.15.1.el9_2.s390x =====
    old: ./find-provides.ksyms.old > /dev/null  4.70s user 2.94s system 126% cpu 6.061 total
    new: ./find-provides.ksyms.new > /dev/null  4.37s user 3.03s system 127% cpu 5.793 total
    ====== ./lib/modules/5.14.0-284.15.1.el9_2.x86_64 =====
    old: ./find-provides.ksyms.old > /dev/null  6.66s user 4.35s system 123% cpu 8.884 total
    new: ./find-provides.ksyms.new > /dev/null  7.07s user 5.00s system 130% cpu 9.218 total
    ====== ./lib/modules/6.4.0-0.rc1.20230511git80e62bc8487b.19.eln126.s390x =====
    old: ./find-provides.ksyms.old > /dev/null  3.81s user 2.62s system 128% cpu 5.018 total
    new: ./find-provides.ksyms.new > /dev/null  3.55s user 2.56s system 128% cpu 4.743 total
    ====== ./lib/modules/6.4.0-0.rc1.20230511git80e62bc8487b.19.eln126.x86_64 =====
    old: ./find-provides.ksyms.old > /dev/null  13.79s user 8.59s system 125% cpu 17.817 total
    new: ./find-provides.ksyms.new > /dev/null  13.18s user 8.78s system 127% cpu 17.247 total

* find-provides.ksyms: Do not perform an intial "nm | awk" run in an attempt
to capture absolute symbols, just parse "objdump -t" output and handle both
absolute and relative symbols in the awk script based on the section name.
* find-requires.ksyms (all_provides): Likewise.

Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
This commit is contained in:
Eugene Syromiatnikov 2023-06-01 11:20:50 +02:00
parent 28c1a86a0c
commit 148ee639b5
2 changed files with 68 additions and 85 deletions

View File

@ -39,11 +39,15 @@ for module in $(grep -E '/lib/modules/.+\.ko(\.gz|\.bz2|\.xz|\.zst)?$') "$@"; do
# A modversion can be stored as an ELF symbol in various ways:
# - An immediate symbol whose value is available directly; it shows up
# in the nm output, for example:
# in the nm or objdump -t output, for example:
# $ nm mlx5_core_5.14.x86_64.ko | grep '__crc_' | head -n 3
# 0000000092f175ca A __crc_mlx5_access_reg
# 000000005b88c9f1 A __crc_mlx5_add_flow_rules
# 00000000e7c0ec8a A __crc_mlx5_alloc_bfreg
# $ objdump -t lib/modules/mlx5_core_5.14.x86_64.ko | grep __crc_ | sort -k 5,5 | head -n 3
# 0000000092f175ca g *ABS* 0000000000000000 __crc_mlx5_access_reg
# 000000005b88c9f1 g *ABS* 0000000000000000 __crc_mlx5_add_flow_rules
# 00000000e7c0ec8a g *ABS* 0000000000000000 __crc_mlx5_alloc_bfreg
# $ zgrep mlx5_access_reg ./lib/modules/5.14.0-284.15.1.el9_2.x86_64/symvers.gz
# 0x92f175ca mlx5_access_reg drivers/net/ethernet/mellanox/mlx5/core/mlx5_core EXPORT_SYMBOL_GPL
# This approach was being used on x86 and arm before Linux 5.19,
@ -88,59 +92,46 @@ for module in $(grep -E '/lib/modules/.+\.ko(\.gz|\.bz2|\.xz|\.zst)?$') "$@"; do
# This data, after some post-processing, can be used in the awk script
# that extracts parts of the section according to the offsets got
# from the "objdump -t" output.
#
# An important assumption here is that all the __crc_* symbols in a kmod
# are either absolute or relative ones (and this one has been held so far,
# and supposedly will in the future, as the symbols are universally
# non-immediate now).
# awk script return code:
# 0 - absolute __crc_* symbols have been found, output has been
# generated;
# 23 - a non-absolute __crc_* symbold has been found;
# 42 - no __crc_* symbols have been found.
nm "$module" \
| awk \
-v 'dep_pfx='"$dep_pfx" \
--non-decimal-data \
'BEGIN { exit_code = 42 }
match($0, /^([0-9a-f]+) (.) __crc_(.+)/, a) {
if (a[2] == "A") {
printf("%s(%s) = 0x%08x\n", dep_pfx, a[3], strtonum("0x" a[1]));
exit_code = 0;
} else {
exit_code = 23;
exit;
}
}
END { exit exit_code }'
[ 23 = "$?" ] && {
kmod_elf_hdr="$(readelf -h "$module")"
[ "x$kmod_elf_hdr" = "x${kmod_elf_hdr%Data:*little endian*}" ]
revbytes="$?"
objdump -t "$module" \
| awk \
-v 'dep_pfx='"$dep_pfx" \
-v 'module='"$module" \
-v 'revbytes='"$revbytes" \
--non-decimal-data \
'function readsect(name, a, t) {
'BEGIN { revbytes = 0 }
function check_endianness( t) {
if (revbytes) return revbytes;
revbytes = -1;
while (("readelf -h \"" module "\"" | getline t) > 0) {
if (match(t, /^ Data: *2\047s complement, little endian$/)) {
revbytes = 1;
break;
}
}
return revbytes;
}
function readsect(name, a, t) {
a = "";
while (("readelf -R \"" name "\" \"" module "\"" | getline t) > 0) {
if (match(t, /^ 0x[0-9a-f]{8}/))
a = a substr(t, 14, 8) substr(t, 23, 8) substr(t, 32, 8) substr(t, 41, 8);
}
if (revbytes) { a = gensub(/(..)(..)(..)(..)/, "\\4\\3\\2\\1", "g", a); }
if (check_endianness() == 1)
a = gensub(/(..)(..)(..)(..)/, "\\4\\3\\2\\1", "g", a);
sectdata[name] = a;
}
match($0, /^([0-9a-f]+) [gl]...... (.*) [0-9a-f]+ __crc_(.*)$/, a) {
if (a[2] == "*ABS*") {
printf("%s(%s) = 0x%08x\n", dep_pfx, a[3], strtonum("0x" a[1]));
} else {
if (!(a[2] in sectdata)) { readsect(a[2]) }
printf("%s(%s) = 0x%08s\n", dep_pfx, a[3], substr(sectdata[a[2]], (strtonum("0x" a[1]) * 2) + 1, 8))
}'
}
}'
[ -z "$tmpfile" ] || rm -f -- "$tmpfile"
done \

View File

@ -39,39 +39,28 @@ all_provides() {
module="$tmpfile"
fi
# awk script return code:
# 0 - absolute __crc_* symbols have been found, output has been
# generated;
# 23 - a non-absolute __crc_* symbold has been found;
# 42 - no __crc_* symbols have been found.
nm "$module" \
| awk \
-v 'dep_pfx='"$dep_pfx" \
--non-decimal-data \
'BEGIN { exit_code = 42 }
match($0, /^([0-9a-f]+) (.) __crc_(.+)/, a) {
if (a[2] == "A") {
printf("%s(%s) = 0x%08x\n", dep_pfx, a[3], strtonum("0x" a[1]));
exit_code = 0;
} else {
exit_code = 23;
exit;
}
}
END { exit exit_code }'
[ 23 = "$?" ] && {
kmod_elf_hdr="$(readelf -h "$module")"
[ "x$kmod_elf_hdr" = "x${kmod_elf_hdr%Data:*little endian*}" ]
revbytes="$?"
objdump -t "$module" \
| awk \
-v 'dep_pfx='"$dep_pfx" \
-v 'module='"$module" \
-v 'revbytes='"$revbytes" \
--non-decimal-data \
'function readsect(name, a, t) {
'BEGIN { revbytes = 0 }
function check_endianness( t) {
if (revbytes) return revbytes;
revbytes = -1;
while (("readelf -h \"" module "\"" | getline t) > 0) {
if (match(t, /^ Data: *2\047s complement, little endian$/)) {
revbytes = 1;
break;
}
}
return revbytes;
}
function readsect(name, a, t) {
a = "";
while (("readelf -R \"" name "\" \"" module "\"" | getline t) > 0) {
if (match(t, /^ 0x[0-9a-f]{8}/))
@ -82,10 +71,13 @@ all_provides() {
}
match($0, /^([0-9a-f]+) [gl]...... (.*) [0-9a-f]+ __crc_(.*)$/, a) {
if (a[2] == "*ABS*") {
printf("%s(%s) = 0x%08x\n", dep_pfx, a[3], strtonum("0x" a[1]));
} else {
if (!(a[2] in sectdata)) { readsect(a[2]) }
printf("%s(%s) = 0x%08s\n", dep_pfx, a[3], substr(sectdata[a[2]], (strtonum("0x" a[1]) * 2) + 1, 8))
}'
}
}'
[ -z "$tmpfile" ] || rm -f -- "$tmpfile"
done \