import CS bcc-0.30.0-6.el9

This commit is contained in:
eabdullin 2024-09-30 15:09:52 +00:00
parent 2f4d557103
commit 9b479d8367
13 changed files with 111 additions and 122823 deletions

View File

@ -1 +1 @@
8ce0ccb0724da475f127d62acc10a88569956474 SOURCES/bcc-0.28.0.tar.gz 26ec7f9fc22494b9b6f20cd38ca216edc130704e SOURCES/bcc-0.30.0.tar.gz

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/bcc-0.28.0.tar.gz SOURCES/bcc-0.30.0.tar.gz

View File

@ -1,66 +0,0 @@
From 63808fbdcb70ce2e858db0a42e7e3eeec153d5b6 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Wed, 20 Sep 2023 10:37:38 -0400
Subject: [PATCH 4/4] Adding memory zones for Power server
config PPC_BOOK3S_64 skips setting ZONE_DMA for
server processor. NORMAL and MOVABLE zones are
available on Power.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/compactsnoop.py | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/tools/compactsnoop.py b/tools/compactsnoop.py
index 2b395dec..1a476aad 100755
--- a/tools/compactsnoop.py
+++ b/tools/compactsnoop.py
@@ -260,11 +260,12 @@ TRACEPOINT_PROBE(compaction, mm_compaction_end)
}
"""
-if platform.machine() != 'x86_64':
+if platform.machine() != 'x86_64' and platform.machine() != 'ppc64le':
print("""
- Currently only support x86_64 servers, if you want to use it on
- other platforms, please refer include/linux/mmzone.h to modify
- zone_idex_to_str to get the right zone type
+ Currently only support x86_64 and power servers, if you want
+ to use it on other platforms(including power embedded processors),
+ please refer include/linux/mmzone.h to modify zone_idex_to_str to
+ get the right zone type
""")
exit()
@@ -296,13 +297,22 @@ initial_ts = 0
# from include/linux/mmzone.h
# NOTICE: consider only x86_64 servers
zone_type = {
- 0: "ZONE_DMA",
- 1: "ZONE_DMA32",
- 2: "ZONE_NORMAL",
+ 'x86_64':
+ {
+ 0: "ZONE_DMA",
+ 1: "ZONE_DMA32",
+ 2: "ZONE_NORMAL"
+ },
+ # Zones in Power server only
+ 'ppc64le':
+ {
+ 0: "ZONE_NORMAL",
+ 1: "ZONE_MOVABLE"
+ }
}
- if idx in zone_type:
- return zone_type[idx]
+ if idx in zone_type[platform.machine()]:
+ return zone_type[platform.machine()][idx]
else:
return str(idx)
--
2.43.0

View File

@ -1,45 +0,0 @@
From e6493835a28c08c45fd374e70dba7aa66f700d08 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Tue, 14 Nov 2023 03:54:19 -0500
Subject: [PATCH 2/4] Fixing pvalloc memleak test
Request to allocate 30K bytes using pvalloc(), results
in allocating 3*64Kb(on 64Kb pagesize system). The assertion
expects leak to be 30Kb, whereas leaked memory is much more
due to pvalloc's implementation for power.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tests/python/test_tools_memleak.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/python/test_tools_memleak.py b/tests/python/test_tools_memleak.py
index cae7e35d..4e921a0c 100755
--- a/tests/python/test_tools_memleak.py
+++ b/tests/python/test_tools_memleak.py
@@ -3,6 +3,7 @@
from unittest import main, skipUnless, TestCase
from utils import kernel_version_ge
import os
+import platform
import subprocess
import sys
import tempfile
@@ -102,7 +103,13 @@ TOOLS_DIR = "/bcc/tools/"
self.assertEqual(cfg.leaking_amount, self.run_leaker("memalign"))
def test_pvalloc(self):
- self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
+ # pvalloc's implementation for power invokes mmap(), which adjusts the
+ # allocated size to meet pvalloc's constraints. Actual leaked memory
+ # could be more than requested, hence assertLessEqual.
+ if platform.machine() == 'ppc64le':
+ self.assertLessEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
+ else:
+ self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
def test_aligned_alloc(self):
self.assertEqual(cfg.leaking_amount, self.run_leaker("aligned_alloc"))
--
2.43.0

View File

@ -1,41 +0,0 @@
From a5d86850e3bfeaa23ef4c82dccb9288a2cd42a27 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Mon, 11 Sep 2023 05:10:36 -0400
Subject: [PATCH 3/4] Skipping USDT tests for Power processor
Support for Power processor in folly package is absent,
so skipping USDT tests having dependency on folly.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tests/python/CMakeLists.txt | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt
index a42a16ce..81a547f0 100644
--- a/tests/python/CMakeLists.txt
+++ b/tests/python/CMakeLists.txt
@@ -71,12 +71,14 @@ add_test(NAME py_test_tools_smoke WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_tools_smoke sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_smoke.py)
add_test(NAME py_test_tools_memleak WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_tools_memleak sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_memleak.py)
-add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
-add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
-add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
+if(NOT(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64"))
+ add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
+ add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
+ add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
+endif()
add_test(NAME py_test_license WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_license sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_license.py)
add_test(NAME py_test_free_bcc_memory WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
--
2.43.0

File diff suppressed because it is too large Load Diff

View File

@ -1,476 +0,0 @@
From 60860bf3a400dcf72b4026fb2973803cfb12ccf1 Mon Sep 17 00:00:00 2001
From: mickey_zhu <mickey_zhu@realsil.com.cn>
Date: Tue, 27 Jun 2023 16:32:44 +0800
Subject: [PATCH] libbpf-tools: add block_io_{start,done} tracepoints support
to bio tools
Some bio tools fail to kprobe blk_account_io_{start,done} after v5.17,
because they become inlined, see [0]. To fix this issue, tracepoints
blick_io_{start,done} are introcuded in kernel, see[1].
Update related bio tools to support new tracepoints, and also simplify
attach.
[0] Kernel commit 450b7879e345 (block: move blk_account_io_{start,done} to blk-mq.c)
[1] Kernel commit 5a80bd075f3b (block: introduce block_io_start/block_io_done tracepoints)
Change-Id: I62b957abd7ce2901eb114bd57c78938e4f083e4d
Signed-off-by: Mickey Zhu <mickey_zhu@realsil.com.cn>
---
libbpf-tools/biosnoop.bpf.c | 9 ++++
libbpf-tools/biosnoop.c | 78 +++++++++++++--------------------
libbpf-tools/biostacks.bpf.c | 46 +++++++++++++------
libbpf-tools/biostacks.c | 85 +++++++++++++++++++++---------------
libbpf-tools/biotop.bpf.c | 44 +++++++++++++++++--
libbpf-tools/biotop.c | 59 ++++++++++++++++---------
6 files changed, 199 insertions(+), 122 deletions(-)
diff --git a/libbpf-tools/biosnoop.bpf.c b/libbpf-tools/biosnoop.bpf.c
index b791555f..fcc5c5ce 100644
--- a/libbpf-tools/biosnoop.bpf.c
+++ b/libbpf-tools/biosnoop.bpf.c
@@ -76,6 +76,15 @@ int BPF_PROG(blk_account_io_start, struct request *rq)
return trace_pid(rq);
}
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *rq)
+{
+ if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
+ return 0;
+
+ return trace_pid(rq);
+}
+
SEC("kprobe/blk_account_io_merge_bio")
int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
{
diff --git a/libbpf-tools/biosnoop.c b/libbpf-tools/biosnoop.c
index 21773729..f9468900 100644
--- a/libbpf-tools/biosnoop.c
+++ b/libbpf-tools/biosnoop.c
@@ -212,6 +212,16 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
}
+static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
+{
+ if (fentry_can_attach("blk_account_io_start", NULL))
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "blk_account_io_start");
+ else
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "__blk_account_io_start");
+}
+
int main(int argc, char **argv)
{
const struct partition *partition;
@@ -260,12 +270,23 @@ int main(int argc, char **argv)
obj->rodata->filter_cg = env.cg;
obj->rodata->min_ns = env.min_lat_ms * 1000000;
- if (fentry_can_attach("blk_account_io_start", NULL))
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "blk_account_io_start");
- else
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "__blk_account_io_start");
+ if (tracepoint_exists("block", "block_io_start"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ else {
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ blk_account_io_set_attach_target(obj);
+ }
+
+ ksyms = ksyms__load();
+ if (!ksyms) {
+ fprintf(stderr, "failed to load kallsyms\n");
+ goto cleanup;
+ }
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
+
+ if (!env.queued)
+ bpf_program__set_autoload(obj->progs.block_rq_insert, false);
err = biosnoop_bpf__load(obj);
if (err) {
@@ -288,48 +309,9 @@ int main(int argc, char **argv)
}
}
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
- if (!obj->links.blk_account_io_start) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
- strerror(-err));
- goto cleanup;
- }
- ksyms = ksyms__load();
- if (!ksyms) {
- err = -ENOMEM;
- fprintf(stderr, "failed to load kallsyms\n");
- goto cleanup;
- }
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
- obj->links.blk_account_io_merge_bio =
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
- if (!obj->links.blk_account_io_merge_bio) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
- strerror(-err));
- goto cleanup;
- }
- }
- if (env.queued) {
- obj->links.block_rq_insert =
- bpf_program__attach(obj->progs.block_rq_insert);
- if (!obj->links.block_rq_insert) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
- goto cleanup;
- }
- }
- obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
- if (!obj->links.block_rq_issue) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
- goto cleanup;
- }
- obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
- if (!obj->links.block_rq_complete) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
+ err = biosnoop_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
goto cleanup;
}
diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c
index c3950910..0ca69880 100644
--- a/libbpf-tools/biostacks.bpf.c
+++ b/libbpf-tools/biostacks.bpf.c
@@ -67,20 +67,8 @@ int trace_start(void *ctx, struct request *rq, bool merge_bio)
return 0;
}
-SEC("fentry/blk_account_io_start")
-int BPF_PROG(blk_account_io_start, struct request *rq)
-{
- return trace_start(ctx, rq, false);
-}
-
-SEC("kprobe/blk_account_io_merge_bio")
-int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
-{
- return trace_start(ctx, rq, true);
-}
-
-SEC("fentry/blk_account_io_done")
-int BPF_PROG(blk_account_io_done, struct request *rq)
+static __always_inline
+int trace_done(void *ctx, struct request *rq)
{
u64 slot, ts = bpf_ktime_get_ns();
struct internal_rqinfo *i_rqinfop;
@@ -110,4 +98,34 @@ int BPF_PROG(blk_account_io_done, struct request *rq)
return 0;
}
+SEC("kprobe/blk_account_io_merge_bio")
+int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
+{
+ return trace_start(ctx, rq, true);
+}
+
+SEC("fentry/blk_account_io_start")
+int BPF_PROG(blk_account_io_start, struct request *rq)
+{
+ return trace_start(ctx, rq, false);
+}
+
+SEC("fentry/blk_account_io_done")
+int BPF_PROG(blk_account_io_done, struct request *rq)
+{
+ return trace_done(ctx, rq);
+}
+
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *rq)
+{
+ return trace_start(ctx, rq, false);
+}
+
+SEC("tp_btf/block_io_done")
+int BPF_PROG(block_io_done, struct request *rq)
+{
+ return trace_done(ctx, rq);
+}
+
char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c
index e1878d1f..e7875f76 100644
--- a/libbpf-tools/biostacks.c
+++ b/libbpf-tools/biostacks.c
@@ -128,6 +128,39 @@ void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd)
return;
}
+static bool has_block_io_tracepoints(void)
+{
+ return tracepoint_exists("block", "block_io_start") &&
+ tracepoint_exists("block", "block_io_done");
+}
+
+static void disable_block_io_tracepoints(struct biostacks_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
+}
+
+static void disable_blk_account_io_fentry(struct biostacks_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+}
+
+static void blk_account_io_set_attach_target(struct biostacks_bpf *obj)
+{
+ if (fentry_can_attach("blk_account_io_start", NULL)) {
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "blk_account_io_start");
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
+ 0, "blk_account_io_done");
+ } else {
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "__blk_account_io_start");
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
+ 0, "__blk_account_io_done");
+ }
+}
+
int main(int argc, char **argv)
{
struct partitions *partitions = NULL;
@@ -172,50 +205,30 @@ int main(int argc, char **argv)
obj->rodata->targ_ms = env.milliseconds;
- if (fentry_can_attach("blk_account_io_start", NULL)) {
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "blk_account_io_start");
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
- "blk_account_io_done");
- } else {
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "__blk_account_io_start");
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
- "__blk_account_io_done");
- }
-
- err = biostacks_bpf__load(obj);
- if (err) {
- fprintf(stderr, "failed to load BPF object: %d\n", err);
- goto cleanup;
+ if (has_block_io_tracepoints())
+ disable_blk_account_io_fentry(obj);
+ else {
+ disable_block_io_tracepoints(obj);
+ blk_account_io_set_attach_target(obj);
}
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
- if (!obj->links.blk_account_io_start) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(-err));
- goto cleanup;
- }
ksyms = ksyms__load();
if (!ksyms) {
fprintf(stderr, "failed to load kallsyms\n");
goto cleanup;
}
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
- obj->links.blk_account_io_merge_bio =
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
- if (!obj->links.blk_account_io_merge_bio) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
- strerror(-err));
- goto cleanup;
- }
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
+
+ err = biostacks_bpf__load(obj);
+ if (err) {
+ fprintf(stderr, "failed to load BPF object: %d\n", err);
+ goto cleanup;
}
- obj->links.blk_account_io_done = bpf_program__attach(obj->progs.blk_account_io_done);
- if (!obj->links.blk_account_io_done) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_done: %s\n",
- strerror(-err));
+
+ err = biostacks_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
goto cleanup;
}
diff --git a/libbpf-tools/biotop.bpf.c b/libbpf-tools/biotop.bpf.c
index 226e32d3..07631378 100644
--- a/libbpf-tools/biotop.bpf.c
+++ b/libbpf-tools/biotop.bpf.c
@@ -30,8 +30,8 @@ struct {
__type(value, struct val_t);
} counts SEC(".maps");
-SEC("kprobe")
-int BPF_KPROBE(blk_account_io_start, struct request *req)
+static __always_inline
+int trace_start(struct request *req)
{
struct who_t who = {};
@@ -56,8 +56,8 @@ int BPF_KPROBE(blk_mq_start_request, struct request *req)
return 0;
}
-SEC("kprobe")
-int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
+static __always_inline
+int trace_done(struct request *req)
{
struct val_t *valp, zero = {};
struct info_t info = {};
@@ -103,4 +103,40 @@ int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
return 0;
}
+SEC("kprobe/blk_account_io_start")
+int BPF_KPROBE(blk_account_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("kprobe/blk_account_io_done")
+int BPF_KPROBE(blk_account_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
+SEC("kprobe/__blk_account_io_start")
+int BPF_KPROBE(__blk_account_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("kprobe/__blk_account_io_done")
+int BPF_KPROBE(__blk_account_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("tp_btf/block_io_done")
+int BPF_PROG(block_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/biotop.c b/libbpf-tools/biotop.c
index 75484281..5b3a7cf3 100644
--- a/libbpf-tools/biotop.c
+++ b/libbpf-tools/biotop.c
@@ -354,6 +354,38 @@ static int print_stat(struct biotop_bpf *obj)
return err;
}
+static bool has_block_io_tracepoints(void)
+{
+ return tracepoint_exists("block", "block_io_start") &&
+ tracepoint_exists("block", "block_io_done");
+}
+
+static void disable_block_io_tracepoints(struct biotop_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
+}
+
+static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
+}
+
+static void blk_account_io_set_autoload(struct biotop_bpf *obj,
+ struct ksyms *ksyms)
+{
+ if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
+ } else {
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+ }
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -386,32 +418,19 @@ int main(int argc, char **argv)
goto cleanup;
}
+ if (has_block_io_tracepoints())
+ disable_blk_account_io_kprobes(obj);
+ else {
+ disable_block_io_tracepoints(obj);
+ blk_account_io_set_autoload(obj, ksyms);
+ }
+
err = biotop_bpf__load(obj);
if (err) {
warn("failed to load BPF object: %d\n", err);
goto cleanup;
}
- if (ksyms__get_symbol(ksyms, "__blk_account_io_start"))
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "__blk_account_io_start");
- else
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "blk_account_io_start");
-
- if (!obj->links.blk_account_io_start) {
- warn("failed to load attach blk_account_io_start\n");
- goto cleanup;
- }
-
- if (ksyms__get_symbol(ksyms, "__blk_account_io_done"))
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "__blk_account_io_done");
- else
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "blk_account_io_done");
-
- if (!obj->links.blk_account_io_done) {
- warn("failed to load attach blk_account_io_done\n");
- goto cleanup;
- }
-
err = biotop_bpf__attach(obj);
if (err) {
warn("failed to attach BPF programs: %d\n", err);
--
2.41.0

View File

@ -1,855 +0,0 @@
From 2e758b65231f976c67a0aad791aabc7927ea7086 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Thu, 27 Jul 2023 18:19:18 +0200
Subject: [PATCH] tools: Add support for the new block_io_* tracepoints
The bio tools currently depends on blk_account_io_done/start functions
that can be inlined. To fix that, a couple of tracepoints have been
added upstream (block:block_io_start/done). This patch add the support
for those tracepoints when they are available.
Unfortunately, the bio tools relies on data that is not available to
the tracepoints (mostly the struct request). So the tracepoints can't
be used as drop in replacement for blk_account_io_*. Main difference,
is that we can't use the struct request as the hash key anymore, so it
now uses the couple (dev_t, sector) for that purpose.
For the biolatency tool, the -F option is disabled when only the
tracepoints are available because the flags are not all accessible
from the tracepoints. Otherwise, all features of the tools should
remain.
Closes #4261
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/biolatency.py | 166 ++++++++++++++++++++++++++++--------
tools/biosnoop.py | 200 +++++++++++++++++++++++++++++++++-----------
tools/biotop.py | 108 +++++++++++++++++++-----
3 files changed, 371 insertions(+), 103 deletions(-)
diff --git a/tools/biolatency.py b/tools/biolatency.py
index 8fe43a7c..03b48a4c 100755
--- a/tools/biolatency.py
+++ b/tools/biolatency.py
@@ -11,6 +11,7 @@
#
# 20-Sep-2015 Brendan Gregg Created this.
# 31-Mar-2022 Rocky Xing Added disk filter support.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -72,7 +73,7 @@ bpf_text = """
#include <linux/blk-mq.h>
typedef struct disk_key {
- char disk[DISK_NAME_LEN];
+ dev_t dev;
u64 slot;
} disk_key_t;
@@ -86,26 +87,70 @@ typedef struct ext_val {
u64 count;
} ext_val_t;
-BPF_HASH(start, struct request *);
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct start_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
+ CMD_FLAGS
+};
+
+BPF_HASH(start, struct start_key);
STORAGE
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
// time block I/O
-int trace_req_start(struct pt_regs *ctx, struct request *req)
+static int __trace_req_start(struct start_key key)
{
DISK_FILTER
u64 ts = bpf_ktime_get_ns();
- start.update(&req, &ts);
+ start.update(&key, &ts);
return 0;
}
+int trace_req_start(struct pt_regs *ctx, struct request *req)
+{
+ struct start_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ SET_FLAGS
+
+ return __trace_req_start(key);
+}
+
+int trace_req_start_tp(struct tp_args *args)
+{
+ struct start_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_start(key);
+}
+
// output
-int trace_req_done(struct pt_regs *ctx, struct request *req)
+static int __trace_req_done(struct start_key key)
{
u64 *tsp, delta;
// fetch timestamp and calculate delta
- tsp = start.lookup(&req);
+ tsp = start.lookup(&key);
if (tsp == 0) {
return 0; // missed issue
}
@@ -116,9 +161,31 @@ int trace_req_done(struct pt_regs *ctx, struct request *req)
// store as histogram
STORE
- start.delete(&req);
+ start.delete(&key);
return 0;
}
+
+int trace_req_done(struct pt_regs *ctx, struct request *req)
+{
+ struct start_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ SET_FLAGS
+
+ return __trace_req_done(key);
+}
+
+int trace_req_done_tp(struct tp_args *args)
+{
+ struct start_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_done(key);
+}
"""
# code substitutions
@@ -134,21 +201,18 @@ store_str = ""
if args.disks:
storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
disks_str = """
- disk_key_t key = {.slot = bpf_log2l(delta)};
- void *__tmp = (void *)req->__RQ_DISK__->disk_name;
- bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
- dist.atomic_increment(key);
+ disk_key_t dkey = {};
+ dkey.dev = key.dev;
+ dkey.slot = bpf_log2l(delta);
+ dist.atomic_increment(dkey);
"""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
- else:
- store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
+ store_str += disks_str
elif args.flags:
storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
store_str += """
- flag_key_t key = {.slot = bpf_log2l(delta)};
- key.flags = req->cmd_flags;
- dist.atomic_increment(key);
+ flag_key_t fkey = {.slot = bpf_log2l(delta)};
+ fkey.flags = key.flags;
+ dist.atomic_increment(fkey);
"""
else:
storage_str += "BPF_HISTOGRAM(dist);"
@@ -161,21 +225,13 @@ store_str = ""
exit(1)
stat_info = os.stat(disk_path)
- major = os.major(stat_info.st_rdev)
- minor = os.minor(stat_info.st_rdev)
-
- disk_field_str = ""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- disk_field_str = 'req->rq_disk'
- else:
- disk_field_str = 'req->q->disk'
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
disk_filter_str = """
- struct gendisk *disk = %s;
- if (!(disk->major == %d && disk->first_minor == %d)) {
+ if(key.dev != %s) {
return 0;
}
- """ % (disk_field_str, major, minor)
+ """ % (dev)
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
else:
@@ -194,6 +250,16 @@ store_str = ""
bpf_text = bpf_text.replace("STORAGE", storage_str)
bpf_text = bpf_text.replace("STORE", store_str)
+if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
+else:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
+if args.flags:
+ bpf_text = bpf_text.replace('CMD_FLAGS', 'u64 flags;')
+ bpf_text = bpf_text.replace('SET_FLAGS', 'key.flags = req->cmd_flags;')
+else:
+ bpf_text = bpf_text.replace('CMD_FLAGS', '')
+ bpf_text = bpf_text.replace('SET_FLAGS', '')
if debug or args.ebpf:
print(bpf_text)
@@ -205,25 +271,53 @@ b = BPF(text=bpf_text)
if args.queued:
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
- else:
+ elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
+ else:
+ if args.flags:
+ # Some flags are accessible in the rwbs field (RAHEAD, SYNC and META)
+ # but other aren't. Disable the -F option for tracepoint for now.
+ print("ERROR: blk_account_io_start probe not available. Can't use -F.")
+ exit()
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_req_start_tp")
else:
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
+
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
+else:
+ if args.flags:
+ print("ERROR: blk_account_io_done probe not available. Can't use -F.")
+ exit()
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_done_tp")
+
if not args.json:
print("Tracing block device I/O... Hit Ctrl-C to end.")
-def disk_print(s):
- disk = s.decode('utf-8', 'replace')
- if not disk:
- disk = "<unknown>"
- return disk
+# cache disk major,minor -> diskname
+diskstats = "/proc/diskstats"
+disklookup = {}
+with open(diskstats) as stats:
+ for line in stats:
+ a = line.split()
+ disklookup[a[0] + "," + a[1]] = a[2]
+
+def disk_print(d):
+ major = d >> 20
+ minor = d & ((1 << 20) - 1)
+
+ disk = str(major) + "," + str(minor)
+ if disk in disklookup:
+ diskname = disklookup[disk]
+ else:
+ diskname = "?"
+
+ return diskname
# see blk_fill_rwbs():
req_opf = {
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
index 33703233..f0fef98b 100755
--- a/tools/biosnoop.py
+++ b/tools/biosnoop.py
@@ -14,6 +14,7 @@
# 11-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
# 21-Jun-2022 Rocky Xing Added disk filter support.
# 13-Oct-2022 Rocky Xing Added support for displaying block I/O pattern.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -64,6 +65,24 @@ struct val_t {
char name[TASK_COMM_LEN];
};
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct hash_key {
+ dev_t dev;
+ u32 rwflag;
+ sector_t sector;
+};
+
+
#ifdef INCLUDE_PATTERN
struct sector_key_t {
u32 dev_major;
@@ -79,6 +98,7 @@ enum bio_pattern {
struct data_t {
u32 pid;
+ u32 dev;
u64 rwflag;
u64 delta;
u64 qdelta;
@@ -88,7 +108,6 @@ struct data_t {
enum bio_pattern pattern;
#endif
u64 ts;
- char disk_name[DISK_NAME_LEN];
char name[TASK_COMM_LEN];
};
@@ -96,12 +115,45 @@ struct data_t {
BPF_HASH(last_sectors, struct sector_key_t, u64);
#endif
-BPF_HASH(start, struct request *, struct start_req_t);
-BPF_HASH(infobyreq, struct request *, struct val_t);
+BPF_HASH(start, struct hash_key, struct start_req_t);
+BPF_HASH(infobyreq, struct hash_key, struct val_t);
BPF_PERF_OUTPUT(events);
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
+/*
+ * The following deals with a kernel version change (in mainline 4.7, although
+ * it may be backported to earlier kernels) with how block request write flags
+ * are tested. We handle both pre- and post-change versions here. Please avoid
+ * kernel version tests like this as much as possible: they inflate the code,
+ * test, and maintenance burden.
+ */
+static int get_rwflag(u32 cmd_flags) {
+#ifdef REQ_WRITE
+ return !!(cmd_flags & REQ_WRITE);
+#elif defined(REQ_OP_SHIFT)
+ return !!((cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
+#else
+ return !!((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
+#endif
+}
+
+#define RWBS_LEN 8
+
+static int get_rwflag_tp(char *rwbs) {
+ for (int i = 0; i < RWBS_LEN; i++) {
+ if (rwbs[i] == 'W')
+ return 1;
+ if (rwbs[i] == '\\0')
+ return 0;
+ }
+ return 0;
+}
+
// cache PID and comm by-req
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
+static int __trace_pid_start(struct hash_key key)
{
DISK_FILTER
@@ -113,47 +165,76 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
if (##QUEUE##) {
val.ts = bpf_ktime_get_ns();
}
- infobyreq.update(&req, &val);
+ infobyreq.update(&key, &val);
}
return 0;
}
+
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
+ return __trace_pid_start(key);
+}
+
+int trace_pid_start_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .rwflag = get_rwflag_tp(args->rwbs),
+ .sector = args->sector
+ };
+
+ return __trace_pid_start(key);
+}
+
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
DISK_FILTER
struct start_req_t start_req = {
.ts = bpf_ktime_get_ns(),
.data_len = req->__data_len
};
- start.update(&req, &start_req);
+ start.update(&key, &start_req);
return 0;
}
// output
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
+static int __trace_req_completion(void *ctx, struct hash_key key)
{
struct start_req_t *startp;
struct val_t *valp;
struct data_t data = {};
- struct gendisk *rq_disk;
+ //struct gendisk *rq_disk;
u64 ts;
// fetch timestamp and calculate delta
- startp = start.lookup(&req);
+ startp = start.lookup(&key);
if (startp == 0) {
// missed tracing issue
return 0;
}
ts = bpf_ktime_get_ns();
- rq_disk = req->__RQ_DISK__;
+ //rq_disk = req->__RQ_DISK__;
data.delta = ts - startp->ts;
data.ts = ts / 1000;
data.qdelta = 0;
data.len = startp->data_len;
- valp = infobyreq.lookup(&req);
+ valp = infobyreq.lookup(&key);
if (valp == 0) {
data.name[0] = '?';
data.name[1] = 0;
@@ -162,10 +243,9 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
data.qdelta = startp->ts - valp->ts;
}
data.pid = valp->pid;
- data.sector = req->__sector;
+ data.sector = key.sector;
+ data.dev = key.dev;
bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
- bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
- rq_disk->disk_name);
}
#ifdef INCLUDE_PATTERN
@@ -174,8 +254,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
u64 *sector, last_sector;
struct sector_key_t sector_key = {
- .dev_major = rq_disk->major,
- .dev_minor = rq_disk->first_minor
+ .dev_major = key.dev >> 20,
+ .dev_minor = key.dev & ((1 << 20) - 1)
};
sector = last_sectors.lookup(&sector_key);
@@ -187,27 +267,36 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
last_sectors.update(&sector_key, &last_sector);
#endif
-/*
- * The following deals with a kernel version change (in mainline 4.7, although
- * it may be backported to earlier kernels) with how block request write flags
- * are tested. We handle both pre- and post-change versions here. Please avoid
- * kernel version tests like this as much as possible: they inflate the code,
- * test, and maintenance burden.
- */
-#ifdef REQ_WRITE
- data.rwflag = !!(req->cmd_flags & REQ_WRITE);
-#elif defined(REQ_OP_SHIFT)
- data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
-#else
- data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
-#endif
+ data.rwflag = key.rwflag;
events.perf_submit(ctx, &data, sizeof(data));
- start.delete(&req);
- infobyreq.delete(&req);
+ start.delete(&key);
+ infobyreq.delete(&key);
return 0;
}
+
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
+ return __trace_req_completion(ctx, key);
+}
+
+int trace_req_completion_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .rwflag = get_rwflag_tp(args->rwbs),
+ .sector = args->sector
+ };
+
+ return __trace_req_completion(args, key);
+}
"""
if args.queue:
bpf_text = bpf_text.replace('##QUEUE##', '1')
@@ -225,21 +314,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
exit(1)
stat_info = os.stat(disk_path)
- major = os.major(stat_info.st_rdev)
- minor = os.minor(stat_info.st_rdev)
-
- disk_field_str = ""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- disk_field_str = 'req->rq_disk'
- else:
- disk_field_str = 'req->q->disk'
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
disk_filter_str = """
- struct gendisk *disk = %s;
- if (!(disk->major == %d && disk->first_minor == %d)) {
+ if(key.dev != %s) {
return 0;
}
- """ % (disk_field_str, major, minor)
+ """ % (dev)
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
else:
@@ -254,15 +335,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
b = BPF(text=bpf_text)
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
+else:
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
+else:
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
# header
print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
@@ -273,6 +358,27 @@ print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
print("%7s " % ("QUE(ms)"), end="")
print("%7s" % "LAT(ms)")
+
+# cache disk major,minor -> diskname
+diskstats = "/proc/diskstats"
+disklookup = {}
+with open(diskstats) as stats:
+ for line in stats:
+ a = line.split()
+ disklookup[a[0] + "," + a[1]] = a[2]
+
+def disk_print(d):
+ major = d >> 20
+ minor = d & ((1 << 20) - 1)
+
+ disk = str(major) + "," + str(minor)
+ if disk in disklookup:
+ diskname = disklookup[disk]
+ else:
+ diskname = "<unknown>"
+
+ return diskname
+
rwflg = ""
pattern = ""
start_ts = 0
@@ -297,9 +403,7 @@ P_RANDOM = 2
delta = float(event.ts) - start_ts
- disk_name = event.disk_name.decode('utf-8', 'replace')
- if not disk_name:
- disk_name = '<unknown>'
+ disk_name = disk_print(event.dev)
print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % (
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
diff --git a/tools/biotop.py b/tools/biotop.py
index fcdd373f..2620983a 100755
--- a/tools/biotop.py
+++ b/tools/biotop.py
@@ -14,6 +14,7 @@
#
# 06-Feb-2016 Brendan Gregg Created this.
# 17-Mar-2022 Rocky Xing Added PID filter support.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -88,14 +89,35 @@ struct val_t {
u32 io;
};
-BPF_HASH(start, struct request *, struct start_req_t);
-BPF_HASH(whobyreq, struct request *, struct who_t);
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct hash_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
+};
+
+BPF_HASH(start, struct hash_key, struct start_req_t);
+BPF_HASH(whobyreq, struct hash_key, struct who_t);
BPF_HASH(counts, struct info_t, struct val_t);
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
// cache PID and comm by-req
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
+static int __trace_pid_start(struct hash_key key)
{
- struct who_t who = {};
+ struct who_t who;
u32 pid;
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
@@ -104,30 +126,54 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
return 0;
who.pid = pid;
- whobyreq.update(&req, &who);
+ whobyreq.update(&key, &who);
}
return 0;
}
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ return __trace_pid_start(key);
+}
+
+int trace_pid_start_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_pid_start(key);
+}
+
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
struct start_req_t start_req = {
.ts = bpf_ktime_get_ns(),
.data_len = req->__data_len
};
- start.update(&req, &start_req);
+ start.update(&key, &start_req);
return 0;
}
// output
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
+static int __trace_req_completion(struct hash_key key)
{
struct start_req_t *startp;
// fetch timestamp and calculate delta
- startp = start.lookup(&req);
+ startp = start.lookup(&key);
if (startp == 0) {
return 0; // missed tracing issue
}
@@ -135,12 +181,12 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
struct who_t *whop;
u32 pid;
- whop = whobyreq.lookup(&req);
+ whop = whobyreq.lookup(&key);
pid = whop != 0 ? whop->pid : 0;
if (FILTER_PID) {
- start.delete(&req);
+ start.delete(&key);
if (whop != 0) {
- whobyreq.delete(&req);
+ whobyreq.delete(&key);
}
return 0;
}
@@ -150,8 +196,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
// setup info_t key
struct info_t info = {};
- info.major = req->__RQ_DISK__->major;
- info.minor = req->__RQ_DISK__->first_minor;
+ info.major = key.dev >> 20;
+ info.minor = key.dev & ((1 << 20) - 1);
/*
* The following deals with a kernel version change (in mainline 4.7, although
* it may be backported to earlier kernels) with how block request write flags
@@ -159,13 +205,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
* kernel version tests like this as much as possible: they inflate the code,
* test, and maintenance burden.
*/
-#ifdef REQ_WRITE
+/*#ifdef REQ_WRITE
info.rwflag = !!(req->cmd_flags & REQ_WRITE);
#elif defined(REQ_OP_SHIFT)
info.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
#else
info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
-#endif
+#endif*/
if (whop == 0) {
// missed pid who, save stats as pid 0
@@ -183,11 +229,31 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
valp->io++;
}
- start.delete(&req);
- whobyreq.delete(&req);
+ start.delete(&key);
+ whobyreq.delete(&key);
return 0;
}
+
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ return __trace_req_completion(key);
+}
+
+int trace_req_completion_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_completion(key);
+}
"""
if args.ebpf:
@@ -207,15 +273,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
b = BPF(text=bpf_text)
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
+else:
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
+else:
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
--
2.41.0

View File

@ -1,156 +0,0 @@
From 0d1a67ba9490aabbb874819d8d07b1868c8c2b1d Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 1 Feb 2023 17:30:03 +0100
Subject: [PATCH 2/2] tools/tcpstates: fix IPv6 journal
When logging ipv6 state change, journal_fields tries to pack
event.addr and event.daddr, which is not an integer in this, to
present a bytes-like object to socket.inet_ntop. This can be fixed by
having a similar type for [sd]addr for IPv4 and IPv6. Making both an
array of u32 solves the issue by presenting a bytes-like object
directly to inet_ntop, without the need for the struct packing stage.
Also now, the similar behavior, makes it easier to factor code for
IPv4 and IPv6.
It solves the following error:
/usr/share/bcc/tools/tcpstates -Y
SKADDR C-PID C-COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
ffff8b2e83e56180 0 swapper/9 :: 22 :: 0 LISTEN -> SYN_RECV 0.000
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
Traceback (most recent call last):
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
callback(cpu, data, size)
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
journal.send(**journal_fields(event, AF_INET6))
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
struct.error: required argument is not an integer
ffff8b2e83e56180 0 swapper/9 2620:52:0:2580:5054:ff:fe6b:6f1f 22 2620:52:0:2b11:2f5e:407d:b35d:4663 60396 SYN_RECV -> ESTABLISHED 0.010
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
Traceback (most recent call last):
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
callback(cpu, data, size)
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
journal.send(**journal_fields(event, AF_INET6))
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
struct.error: required argument is not an integer
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/tcpstates.py | 55 +++++++++++++++++-----------------------------
1 file changed, 20 insertions(+), 35 deletions(-)
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
index 9b2ccfa4..6c845c9b 100755
--- a/tools/tcpstates.py
+++ b/tools/tcpstates.py
@@ -19,7 +19,6 @@ from __future__ import print_function
from bcc import BPF
import argparse
from socket import inet_ntop, AF_INET, AF_INET6
-from struct import pack
from time import strftime, time
from os import getuid
@@ -78,8 +77,8 @@ BPF_HASH(last, struct sock *, u64);
struct ipv4_data_t {
u64 ts_us;
u64 skaddr;
- u32 saddr;
- u32 daddr;
+ u32 saddr[1];
+ u32 daddr[1];
u64 span_us;
u32 pid;
u16 lport;
@@ -93,8 +92,8 @@ BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t {
u64 ts_us;
u64 skaddr;
- unsigned __int128 saddr;
- unsigned __int128 daddr;
+ u32 saddr[4];
+ u32 daddr[4];
u64 span_us;
u32 pid;
u16 lport;
@@ -350,9 +349,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
'OBJECT_PID': str(event.pid),
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
# Custom fields, aka "stuff we sort of made up".
- 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
+ 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, event.saddr),
'OBJECT_TCP_SOURCE_PORT': str(event.lport),
- 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
+ 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, event.daddr),
'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
@@ -373,8 +372,7 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
return fields
# process event
-def print_ipv4_event(cpu, data, size):
- event = b["ipv4_events"].event(data)
+def print_event(event, addr_family):
global start_ts
if args.time:
if args.csv:
@@ -389,39 +387,26 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%.6f," % delta_s, end="")
else:
print("%-9.6f " % delta_s, end="")
+ if addr_family == AF_INET:
+ version = "4"
+ else:
+ version = "6"
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
- "4" if args.wide or args.csv else "",
- inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
- inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
+ version if args.wide or args.csv else "",
+ inet_ntop(addr_family, event.saddr), event.lport,
+ inet_ntop(addr_family, event.daddr), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
- journal.send(**journal_fields(event, AF_INET))
+ journal.send(**journal_fields(event, addr_family))
+
+def print_ipv4_event(cpu, data, size):
+ event = b["ipv4_events"].event(data)
+ print_event(event, AF_INET)
def print_ipv6_event(cpu, data, size):
event = b["ipv6_events"].event(data)
- global start_ts
- if args.time:
- if args.csv:
- print("%s," % strftime("%H:%M:%S"), end="")
- else:
- print("%-8s " % strftime("%H:%M:%S"), end="")
- if args.timestamp:
- if start_ts == 0:
- start_ts = event.ts_us
- delta_s = (float(event.ts_us) - start_ts) / 1000000
- if args.csv:
- print("%.6f," % delta_s, end="")
- else:
- print("%-9.6f " % delta_s, end="")
- print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
- "6" if args.wide or args.csv else "",
- inet_ntop(AF_INET6, event.saddr), event.lport,
- inet_ntop(AF_INET6, event.daddr), event.dport,
- tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
- float(event.span_us) / 1000))
- if args.journal:
- journal.send(**journal_fields(event, AF_INET6))
+ print_event(event, AF_INET6)
# initialize BPF
b = BPF(text=bpf_text)
--
2.41.0

View File

@ -1,144 +0,0 @@
From 53b89f35e8970beef55046c1bf035264f110f06d Mon Sep 17 00:00:00 2001
From: hejun01 <hejun01@corp.netease.com>
Date: Thu, 29 Jun 2023 20:24:07 +0800
Subject: [PATCH 1/2] tools/tcpstates: fix context ptr modified error
Introduce local variable tcp_new_state,
to avoid llvm optimization of args->newstate,
which will cause context ptr args modified.
spilt event.ports to lport and dport.
switch type of TCP state from unsigned int to int.
---
tools/tcpstates.py | 47 +++++++++++++++++++++++++---------------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
index 89f3638c..9b2ccfa4 100755
--- a/tools/tcpstates.py
+++ b/tools/tcpstates.py
@@ -82,9 +82,10 @@ struct ipv4_data_t {
u32 daddr;
u64 span_us;
u32 pid;
- u32 ports;
- u32 oldstate;
- u32 newstate;
+ u16 lport;
+ u16 dport;
+ int oldstate;
+ int newstate;
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(ipv4_events);
@@ -96,9 +97,10 @@ struct ipv6_data_t {
unsigned __int128 daddr;
u64 span_us;
u32 pid;
- u32 ports;
- u32 oldstate;
- u32 newstate;
+ u16 lport;
+ u16 dport;
+ int oldstate;
+ int newstate;
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(ipv6_events);
@@ -132,6 +134,9 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
u16 family = args->family;
FILTER_FAMILY
+ // workaround to avoid llvm optimization which will cause context ptr args modified
+ int tcp_newstate = args->newstate;
+
if (args->family == AF_INET) {
struct ipv4_data_t data4 = {
.span_us = delta_us,
@@ -141,8 +146,8 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
data4.ts_us = bpf_ktime_get_ns() / 1000;
__builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
__builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
- // a workaround until data4 compiles with separate lport/dport
- data4.ports = dport + ((0ULL + lport) << 16);
+ data4.lport = lport;
+ data4.dport = dport;
data4.pid = pid;
bpf_get_current_comm(&data4.task, sizeof(data4.task));
@@ -157,14 +162,14 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
data6.ts_us = bpf_ktime_get_ns() / 1000;
__builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
__builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
- // a workaround until data6 compiles with separate lport/dport
- data6.ports = dport + ((0ULL + lport) << 16);
+ data6.lport = lport;
+ data6.dport = dport;
data6.pid = pid;
bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(args, &data6, sizeof(data6));
}
- if (args->newstate == TCP_CLOSE) {
+ if (tcp_newstate == TCP_CLOSE) {
last.delete(&sk);
} else {
u64 ts = bpf_ktime_get_ns();
@@ -210,8 +215,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
data4.ts_us = bpf_ktime_get_ns() / 1000;
data4.saddr = sk->__sk_common.skc_rcv_saddr;
data4.daddr = sk->__sk_common.skc_daddr;
- // a workaround until data4 compiles with separate lport/dport
- data4.ports = dport + ((0ULL + lport) << 16);
+ data4.lport = lport;
+ data4.dport = dport;
data4.pid = pid;
bpf_get_current_comm(&data4.task, sizeof(data4.task));
@@ -228,8 +233,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
- // a workaround until data6 compiles with separate lport/dport
- data6.ports = dport + ((0ULL + lport) << 16);
+ data6.lport = lport;
+ data6.dport = dport;
data6.pid = pid;
bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
@@ -346,9 +351,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
# Custom fields, aka "stuff we sort of made up".
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
- 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 16),
+ 'OBJECT_TCP_SOURCE_PORT': str(event.lport),
'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
- 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffff),
+ 'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
'OBJECT_TCP_SPAN_TIME': str(event.span_us)
@@ -386,8 +391,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%-9.6f " % delta_s, end="")
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
"4" if args.wide or args.csv else "",
- inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 16,
- inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffff,
+ inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
+ inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
@@ -411,8 +416,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%-9.6f " % delta_s, end="")
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
"6" if args.wide or args.csv else "",
- inet_ntop(AF_INET6, event.saddr), event.ports >> 16,
- inet_ntop(AF_INET6, event.daddr), event.ports & 0xffff,
+ inet_ntop(AF_INET6, event.saddr), event.lport,
+ inet_ntop(AF_INET6, event.daddr), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
--
2.41.0

View File

@ -1,53 +0,0 @@
From 88274e83ca1a61699741d5b1d5499beb64cac753 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Mon, 16 Oct 2023 19:41:29 +0200
Subject: [PATCH] tools/trace: don't raise an exception in a ctype callback
To exit the tool when the maximal number of event is reached (-M
option), the tool currently call exit(), which raise a SystemExit
exception. The handling of exception from ctype callback doesn't seem
straightforward and dependent on python version.
This patch avoid the issue altogether by using a global variable
instead.
Closes #3049
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/trace.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/trace.py b/tools/trace.py
index 9c7cca71..2aa096fa 100755
--- a/tools/trace.py
+++ b/tools/trace.py
@@ -43,6 +43,7 @@ import sys
build_id_enabled = False
aggregate = False
symcount = {}
+ done = False
@classmethod
def configure(cls, args):
@@ -635,7 +636,7 @@ BPF_PERF_OUTPUT(%s);
if self.aggregate:
self.print_aggregate_events()
sys.stdout.flush()
- exit()
+ Probe.done = True;
def attach(self, bpf, verbose):
if len(self.library) == 0:
@@ -895,7 +896,7 @@ trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
"-" if not all_probes_trivial else ""))
sys.stdout.flush()
- while True:
+ while not Probe.done:
self.bpf.perf_buffer_poll()
def run(self):
--
2.41.0

View File

@ -0,0 +1,76 @@
From 5bc97bbc50b1ccf0c63f320ee73a2c0abe84b596 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Fri, 17 May 2024 15:36:07 +0200
Subject: [PATCH] clang: fail when the kheaders ownership is wrong (#4928)
(#4985)
file_exists_and_ownedby() returns -1 when the file exists but its
ownership is unexpected, which is very misleading since anything non
zero is interpreted as true and a function with such a name is
expected to return a boolean. So currently all this does, is write a
warning message, and continues as if nothing is wrong.
Make file_exists_and_ownedby() returns false when the ownership is
wrong and have get_proc_kheaders() fails when this happen. Also have
all the *exists* functions return bool to avoid such issues in the
future.
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
src/cc/frontends/clang/kbuild_helper.cc | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/cc/frontends/clang/kbuild_helper.cc b/src/cc/frontends/clang/kbuild_helper.cc
index 9409e4cc..5d3ad9c2 100644
--- a/src/cc/frontends/clang/kbuild_helper.cc
+++ b/src/cc/frontends/clang/kbuild_helper.cc
@@ -140,20 +140,26 @@ int KBuildHelper::get_flags(const char *uname_machine, vector<string> *cflags) {
return 0;
}
-static inline int file_exists_and_ownedby(const char *f, uid_t uid)
+static inline bool file_exists(const char *f)
+{
+ struct stat buffer;
+ return (stat(f, &buffer) == 0);
+}
+
+static inline bool file_exists_and_ownedby(const char *f, uid_t uid)
{
struct stat buffer;
int ret = stat(f, &buffer) == 0;
if (ret) {
if (buffer.st_uid != uid) {
std::cout << "ERROR: header file ownership unexpected: " << std::string(f) << "\n";
- return -1;
+ return false;
}
}
return ret;
}
-static inline int proc_kheaders_exists(void)
+static inline bool proc_kheaders_exists(void)
{
return file_exists_and_ownedby(PROC_KHEADERS_PATH, 0);
}
@@ -231,8 +237,14 @@ int get_proc_kheaders(std::string &dirpath)
uname_data.release);
dirpath = std::string(dirpath_tmp);
- if (file_exists_and_ownedby(dirpath_tmp, 0))
- return 0;
+ if (file_exists(dirpath_tmp)) {
+ if (file_exists_and_ownedby(dirpath_tmp, 0))
+ return 0;
+ else
+ // The path exists, but is owned by a non-root user
+ // Something fishy is going on
+ return -EEXIST;
+ }
// First time so extract it
return extract_kheaders(dirpath, uname_data);
--
2.44.0

View File

@ -24,21 +24,14 @@
Name: bcc Name: bcc
Version: 0.28.0 Version: 0.30.0
Release: 5%{?dist} Release: 6%{?dist}
Summary: BPF Compiler Collection (BCC) Summary: BPF Compiler Collection (BCC)
License: ASL 2.0 License: ASL 2.0
URL: https://github.com/iovisor/bcc URL: https://github.com/iovisor/bcc
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
Patch0: %%{name}-%%{version}-tools-tcpstates-fix-context-ptr-modified-error.patch Patch0: %%{name}-%%{version}-clang-fail-when-the-kheaders-ownership-is-wrong-4928.patch
Patch1: %%{name}-%%{version}-tools-tcpstates-fix-IPv6-journal.patch
Patch2: %%{name}-%%{version}-tools-Add-support-for-the-new-block_io_-tracepoints.patch
Patch3: %%{name}-%%{version}-tools-trace-don-t-raise-an-exception-in-a-ctype-call.patch
Patch4: %%{name}-%%{version}-libbpf-tools-add-block_io_-start-done-tracepoints-su.patch
Patch5: %%{name}-%%{version}-libbpf-tools-Add-s390x-support.patch
Patch6: %%{name}-%%{version}-Fixing-pvalloc-memleak-test.patch
Patch7: %%{name}-%%{version}-Skipping-USDT-tests-for-Power-processor.patch
Patch8: %%{name}-%%{version}-Adding-memory-zones-for-Power-server.patch
# Arches will be included as upstream support is added and dependencies are # Arches will be included as upstream support is added and dependencies are
# satisfied in the respective arches # satisfied in the respective arches
@ -122,6 +115,9 @@ Summary: Command line tools for BPF Compiler Collection (BCC)
Requires: bcc = %{version}-%{release} Requires: bcc = %{version}-%{release}
Requires: python3-%{name} = %{version}-%{release} Requires: python3-%{name} = %{version}-%{release}
Requires: python3-netaddr Requires: python3-netaddr
%ifnarch s390x
Requires: python3-pyelftools
%endif
%description tools %description tools
Command line tools for BPF Compiler Collection (BCC) Command line tools for BPF Compiler Collection (BCC)
@ -258,10 +254,36 @@ cp -a libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}/
%if %{with libbpf_tools} %if %{with libbpf_tools}
%files -n libbpf-tools %files -n libbpf-tools
%ifarch s390x
%exclude %{_sbindir}/bpf-numamove
%endif
# RHEL doesn't provide btrfs or f2fs
%exclude %{_sbindir}/bpf-btrfs*
%exclude %{_sbindir}/bpf-f2fs*
%{_sbindir}/bpf-* %{_sbindir}/bpf-*
%endif %endif
%changelog %changelog
* Thu Jul 04 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-6
- Rebuild with LLVM 18 (RHEL-28684)
* Fri May 31 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-5
- Drop python3-pyelftools dependency on s390x until it is available
* Tue May 21 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-4
- Exclude btrfs and f2fs libbpf tools (RHEL-36579)
* Mon May 20 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-3
- Really prevent the loading of compromised headers (RHEL-28769, CVE-2024-2314)
- Add python3-pyelftools dependency (RHEL-36583)
* Fri May 03 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-2
- Rebuild (distrobaker didn't take last build)
* Wed Apr 10 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-1
- Rebase bcc to 0.30.0 (RHEL-29031)
- Exclude bpf-numamove on s390x (RHEL-32327)
* Wed Dec 13 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-5 * Wed Dec 13 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-5
- Fix libbpf bio tools (RHEL-19368) - Fix libbpf bio tools (RHEL-19368)
- Add S390x support to libbpf-tools (RHEL-16325) - Add S390x support to libbpf-tools (RHEL-16325)