import CS bcc-0.30.0-6.el9
This commit is contained in:
parent
2f4d557103
commit
9b479d8367
@ -1 +1 @@
|
|||||||
8ce0ccb0724da475f127d62acc10a88569956474 SOURCES/bcc-0.28.0.tar.gz
|
26ec7f9fc22494b9b6f20cd38ca216edc130704e SOURCES/bcc-0.30.0.tar.gz
|
||||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
SOURCES/bcc-0.28.0.tar.gz
|
SOURCES/bcc-0.30.0.tar.gz
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
From 63808fbdcb70ce2e858db0a42e7e3eeec153d5b6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
Date: Wed, 20 Sep 2023 10:37:38 -0400
|
|
||||||
Subject: [PATCH 4/4] Adding memory zones for Power server
|
|
||||||
|
|
||||||
config PPC_BOOK3S_64 skips setting ZONE_DMA for
|
|
||||||
server processor. NORMAL and MOVABLE zones are
|
|
||||||
available on Power.
|
|
||||||
|
|
||||||
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
---
|
|
||||||
tools/compactsnoop.py | 28 +++++++++++++++++++---------
|
|
||||||
1 file changed, 19 insertions(+), 9 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/compactsnoop.py b/tools/compactsnoop.py
|
|
||||||
index 2b395dec..1a476aad 100755
|
|
||||||
--- a/tools/compactsnoop.py
|
|
||||||
+++ b/tools/compactsnoop.py
|
|
||||||
@@ -260,11 +260,12 @@ TRACEPOINT_PROBE(compaction, mm_compaction_end)
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
-if platform.machine() != 'x86_64':
|
|
||||||
+if platform.machine() != 'x86_64' and platform.machine() != 'ppc64le':
|
|
||||||
print("""
|
|
||||||
- Currently only support x86_64 servers, if you want to use it on
|
|
||||||
- other platforms, please refer include/linux/mmzone.h to modify
|
|
||||||
- zone_idex_to_str to get the right zone type
|
|
||||||
+ Currently only support x86_64 and power servers, if you want
|
|
||||||
+ to use it on other platforms(including power embedded processors),
|
|
||||||
+ please refer include/linux/mmzone.h to modify zone_idex_to_str to
|
|
||||||
+ get the right zone type
|
|
||||||
""")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
@@ -296,13 +297,22 @@ initial_ts = 0
|
|
||||||
# from include/linux/mmzone.h
|
|
||||||
# NOTICE: consider only x86_64 servers
|
|
||||||
zone_type = {
|
|
||||||
- 0: "ZONE_DMA",
|
|
||||||
- 1: "ZONE_DMA32",
|
|
||||||
- 2: "ZONE_NORMAL",
|
|
||||||
+ 'x86_64':
|
|
||||||
+ {
|
|
||||||
+ 0: "ZONE_DMA",
|
|
||||||
+ 1: "ZONE_DMA32",
|
|
||||||
+ 2: "ZONE_NORMAL"
|
|
||||||
+ },
|
|
||||||
+ # Zones in Power server only
|
|
||||||
+ 'ppc64le':
|
|
||||||
+ {
|
|
||||||
+ 0: "ZONE_NORMAL",
|
|
||||||
+ 1: "ZONE_MOVABLE"
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
- if idx in zone_type:
|
|
||||||
- return zone_type[idx]
|
|
||||||
+ if idx in zone_type[platform.machine()]:
|
|
||||||
+ return zone_type[platform.machine()][idx]
|
|
||||||
else:
|
|
||||||
return str(idx)
|
|
||||||
|
|
||||||
--
|
|
||||||
2.43.0
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
|||||||
From e6493835a28c08c45fd374e70dba7aa66f700d08 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
Date: Tue, 14 Nov 2023 03:54:19 -0500
|
|
||||||
Subject: [PATCH 2/4] Fixing pvalloc memleak test
|
|
||||||
|
|
||||||
Request to allocate 30K bytes using pvalloc(), results
|
|
||||||
in allocating 3*64Kb(on 64Kb pagesize system). The assertion
|
|
||||||
expects leak to be 30Kb, whereas leaked memory is much more
|
|
||||||
due to pvalloc's implementation for power.
|
|
||||||
|
|
||||||
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
---
|
|
||||||
tests/python/test_tools_memleak.py | 9 ++++++++-
|
|
||||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/tests/python/test_tools_memleak.py b/tests/python/test_tools_memleak.py
|
|
||||||
index cae7e35d..4e921a0c 100755
|
|
||||||
--- a/tests/python/test_tools_memleak.py
|
|
||||||
+++ b/tests/python/test_tools_memleak.py
|
|
||||||
@@ -3,6 +3,7 @@
|
|
||||||
from unittest import main, skipUnless, TestCase
|
|
||||||
from utils import kernel_version_ge
|
|
||||||
import os
|
|
||||||
+import platform
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
@@ -102,7 +103,13 @@ TOOLS_DIR = "/bcc/tools/"
|
|
||||||
self.assertEqual(cfg.leaking_amount, self.run_leaker("memalign"))
|
|
||||||
|
|
||||||
def test_pvalloc(self):
|
|
||||||
- self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
|
|
||||||
+ # pvalloc's implementation for power invokes mmap(), which adjusts the
|
|
||||||
+ # allocated size to meet pvalloc's constraints. Actual leaked memory
|
|
||||||
+ # could be more than requested, hence assertLessEqual.
|
|
||||||
+ if platform.machine() == 'ppc64le':
|
|
||||||
+ self.assertLessEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
|
|
||||||
+ else:
|
|
||||||
+ self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
|
|
||||||
|
|
||||||
def test_aligned_alloc(self):
|
|
||||||
self.assertEqual(cfg.leaking_amount, self.run_leaker("aligned_alloc"))
|
|
||||||
--
|
|
||||||
2.43.0
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
|||||||
From a5d86850e3bfeaa23ef4c82dccb9288a2cd42a27 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
Date: Mon, 11 Sep 2023 05:10:36 -0400
|
|
||||||
Subject: [PATCH 3/4] Skipping USDT tests for Power processor
|
|
||||||
|
|
||||||
Support for Power processor in folly package is absent,
|
|
||||||
so skipping USDT tests having dependency on folly.
|
|
||||||
|
|
||||||
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
|
|
||||||
---
|
|
||||||
tests/python/CMakeLists.txt | 14 ++++++++------
|
|
||||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt
|
|
||||||
index a42a16ce..81a547f0 100644
|
|
||||||
--- a/tests/python/CMakeLists.txt
|
|
||||||
+++ b/tests/python/CMakeLists.txt
|
|
||||||
@@ -71,12 +71,14 @@ add_test(NAME py_test_tools_smoke WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
COMMAND ${TEST_WRAPPER} py_test_tools_smoke sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_smoke.py)
|
|
||||||
add_test(NAME py_test_tools_memleak WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
COMMAND ${TEST_WRAPPER} py_test_tools_memleak sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_memleak.py)
|
|
||||||
-add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
- COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
|
|
||||||
-add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
- COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
|
|
||||||
-add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
- COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
|
|
||||||
+if(NOT(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64"))
|
|
||||||
+ add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
+ COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
|
|
||||||
+ add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
+ COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
|
|
||||||
+ add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
+ COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
|
|
||||||
+endif()
|
|
||||||
add_test(NAME py_test_license WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
COMMAND ${TEST_WRAPPER} py_test_license sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_license.py)
|
|
||||||
add_test(NAME py_test_free_bcc_memory WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
--
|
|
||||||
2.43.0
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -1,476 +0,0 @@
|
|||||||
From 60860bf3a400dcf72b4026fb2973803cfb12ccf1 Mon Sep 17 00:00:00 2001
|
|
||||||
From: mickey_zhu <mickey_zhu@realsil.com.cn>
|
|
||||||
Date: Tue, 27 Jun 2023 16:32:44 +0800
|
|
||||||
Subject: [PATCH] libbpf-tools: add block_io_{start,done} tracepoints support
|
|
||||||
to bio tools
|
|
||||||
|
|
||||||
Some bio tools fail to kprobe blk_account_io_{start,done} after v5.17,
|
|
||||||
because they become inlined, see [0]. To fix this issue, tracepoints
|
|
||||||
blick_io_{start,done} are introcuded in kernel, see[1].
|
|
||||||
|
|
||||||
Update related bio tools to support new tracepoints, and also simplify
|
|
||||||
attach.
|
|
||||||
|
|
||||||
[0] Kernel commit 450b7879e345 (block: move blk_account_io_{start,done} to blk-mq.c)
|
|
||||||
[1] Kernel commit 5a80bd075f3b (block: introduce block_io_start/block_io_done tracepoints)
|
|
||||||
|
|
||||||
Change-Id: I62b957abd7ce2901eb114bd57c78938e4f083e4d
|
|
||||||
Signed-off-by: Mickey Zhu <mickey_zhu@realsil.com.cn>
|
|
||||||
---
|
|
||||||
libbpf-tools/biosnoop.bpf.c | 9 ++++
|
|
||||||
libbpf-tools/biosnoop.c | 78 +++++++++++++--------------------
|
|
||||||
libbpf-tools/biostacks.bpf.c | 46 +++++++++++++------
|
|
||||||
libbpf-tools/biostacks.c | 85 +++++++++++++++++++++---------------
|
|
||||||
libbpf-tools/biotop.bpf.c | 44 +++++++++++++++++--
|
|
||||||
libbpf-tools/biotop.c | 59 ++++++++++++++++---------
|
|
||||||
6 files changed, 199 insertions(+), 122 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biosnoop.bpf.c b/libbpf-tools/biosnoop.bpf.c
|
|
||||||
index b791555f..fcc5c5ce 100644
|
|
||||||
--- a/libbpf-tools/biosnoop.bpf.c
|
|
||||||
+++ b/libbpf-tools/biosnoop.bpf.c
|
|
||||||
@@ -76,6 +76,15 @@ int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
return trace_pid(rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+ return trace_pid(rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
{
|
|
||||||
diff --git a/libbpf-tools/biosnoop.c b/libbpf-tools/biosnoop.c
|
|
||||||
index 21773729..f9468900 100644
|
|
||||||
--- a/libbpf-tools/biosnoop.c
|
|
||||||
+++ b/libbpf-tools/biosnoop.c
|
|
||||||
@@ -212,6 +212,16 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
|
|
||||||
fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ if (fentry_can_attach("blk_account_io_start", NULL))
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "blk_account_io_start");
|
|
||||||
+ else
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "__blk_account_io_start");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
const struct partition *partition;
|
|
||||||
@@ -260,12 +270,23 @@ int main(int argc, char **argv)
|
|
||||||
obj->rodata->filter_cg = env.cg;
|
|
||||||
obj->rodata->min_ns = env.min_lat_ms * 1000000;
|
|
||||||
|
|
||||||
- if (fentry_can_attach("blk_account_io_start", NULL))
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "blk_account_io_start");
|
|
||||||
- else
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "__blk_account_io_start");
|
|
||||||
+ if (tracepoint_exists("block", "block_io_start"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ else {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ blk_account_io_set_attach_target(obj);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ ksyms = ksyms__load();
|
|
||||||
+ if (!ksyms) {
|
|
||||||
+ fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
+ goto cleanup;
|
|
||||||
+ }
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
|
|
||||||
+
|
|
||||||
+ if (!env.queued)
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_rq_insert, false);
|
|
||||||
|
|
||||||
err = biosnoop_bpf__load(obj);
|
|
||||||
if (err) {
|
|
||||||
@@ -288,48 +309,9 @@ int main(int argc, char **argv)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- ksyms = ksyms__load();
|
|
||||||
- if (!ksyms) {
|
|
||||||
- err = -ENOMEM;
|
|
||||||
- fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
|
|
||||||
- obj->links.blk_account_io_merge_bio =
|
|
||||||
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
|
|
||||||
- if (!obj->links.blk_account_io_merge_bio) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- if (env.queued) {
|
|
||||||
- obj->links.block_rq_insert =
|
|
||||||
- bpf_program__attach(obj->progs.block_rq_insert);
|
|
||||||
- if (!obj->links.block_rq_insert) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
|
|
||||||
- if (!obj->links.block_rq_issue) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
|
|
||||||
- if (!obj->links.block_rq_complete) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
|
|
||||||
+ err = biosnoop_bpf__attach(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c
|
|
||||||
index c3950910..0ca69880 100644
|
|
||||||
--- a/libbpf-tools/biostacks.bpf.c
|
|
||||||
+++ b/libbpf-tools/biostacks.bpf.c
|
|
||||||
@@ -67,20 +67,8 @@ int trace_start(void *ctx, struct request *rq, bool merge_bio)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-SEC("fentry/blk_account_io_start")
|
|
||||||
-int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
-{
|
|
||||||
- return trace_start(ctx, rq, false);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
-int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
-{
|
|
||||||
- return trace_start(ctx, rq, true);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-SEC("fentry/blk_account_io_done")
|
|
||||||
-int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_done(void *ctx, struct request *rq)
|
|
||||||
{
|
|
||||||
u64 slot, ts = bpf_ktime_get_ns();
|
|
||||||
struct internal_rqinfo *i_rqinfop;
|
|
||||||
@@ -110,4 +98,34 @@ int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
+int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, true);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("fentry/blk_account_io_start")
|
|
||||||
+int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("fentry/blk_account_io_done")
|
|
||||||
+int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_done(ctx, rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_done")
|
|
||||||
+int BPF_PROG(block_io_done, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_done(ctx, rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
char LICENSE[] SEC("license") = "GPL";
|
|
||||||
diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c
|
|
||||||
index e1878d1f..e7875f76 100644
|
|
||||||
--- a/libbpf-tools/biostacks.c
|
|
||||||
+++ b/libbpf-tools/biostacks.c
|
|
||||||
@@ -128,6 +128,39 @@ void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool has_block_io_tracepoints(void)
|
|
||||||
+{
|
|
||||||
+ return tracepoint_exists("block", "block_io_start") &&
|
|
||||||
+ tracepoint_exists("block", "block_io_done");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_block_io_tracepoints(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_blk_account_io_fentry(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void blk_account_io_set_attach_target(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ if (fentry_can_attach("blk_account_io_start", NULL)) {
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "blk_account_io_start");
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
|
|
||||||
+ 0, "blk_account_io_done");
|
|
||||||
+ } else {
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "__blk_account_io_start");
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
|
|
||||||
+ 0, "__blk_account_io_done");
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct partitions *partitions = NULL;
|
|
||||||
@@ -172,50 +205,30 @@ int main(int argc, char **argv)
|
|
||||||
|
|
||||||
obj->rodata->targ_ms = env.milliseconds;
|
|
||||||
|
|
||||||
- if (fentry_can_attach("blk_account_io_start", NULL)) {
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "blk_account_io_start");
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
|
|
||||||
- "blk_account_io_done");
|
|
||||||
- } else {
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "__blk_account_io_start");
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
|
|
||||||
- "__blk_account_io_done");
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- err = biostacks_bpf__load(obj);
|
|
||||||
- if (err) {
|
|
||||||
- fprintf(stderr, "failed to load BPF object: %d\n", err);
|
|
||||||
- goto cleanup;
|
|
||||||
+ if (has_block_io_tracepoints())
|
|
||||||
+ disable_blk_account_io_fentry(obj);
|
|
||||||
+ else {
|
|
||||||
+ disable_block_io_tracepoints(obj);
|
|
||||||
+ blk_account_io_set_attach_target(obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
ksyms = ksyms__load();
|
|
||||||
if (!ksyms) {
|
|
||||||
fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
|
|
||||||
- obj->links.blk_account_io_merge_bio =
|
|
||||||
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
|
|
||||||
- if (!obj->links.blk_account_io_merge_bio) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
|
|
||||||
+
|
|
||||||
+ err = biostacks_bpf__load(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to load BPF object: %d\n", err);
|
|
||||||
+ goto cleanup;
|
|
||||||
}
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach(obj->progs.blk_account_io_done);
|
|
||||||
- if (!obj->links.blk_account_io_done) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_done: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
+
|
|
||||||
+ err = biostacks_bpf__attach(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biotop.bpf.c b/libbpf-tools/biotop.bpf.c
|
|
||||||
index 226e32d3..07631378 100644
|
|
||||||
--- a/libbpf-tools/biotop.bpf.c
|
|
||||||
+++ b/libbpf-tools/biotop.bpf.c
|
|
||||||
@@ -30,8 +30,8 @@ struct {
|
|
||||||
__type(value, struct val_t);
|
|
||||||
} counts SEC(".maps");
|
|
||||||
|
|
||||||
-SEC("kprobe")
|
|
||||||
-int BPF_KPROBE(blk_account_io_start, struct request *req)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_start(struct request *req)
|
|
||||||
{
|
|
||||||
struct who_t who = {};
|
|
||||||
|
|
||||||
@@ -56,8 +56,8 @@ int BPF_KPROBE(blk_mq_start_request, struct request *req)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-SEC("kprobe")
|
|
||||||
-int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_done(struct request *req)
|
|
||||||
{
|
|
||||||
struct val_t *valp, zero = {};
|
|
||||||
struct info_t info = {};
|
|
||||||
@@ -103,4 +103,40 @@ int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("kprobe/blk_account_io_start")
|
|
||||||
+int BPF_KPROBE(blk_account_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/blk_account_io_done")
|
|
||||||
+int BPF_KPROBE(blk_account_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/__blk_account_io_start")
|
|
||||||
+int BPF_KPROBE(__blk_account_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/__blk_account_io_done")
|
|
||||||
+int BPF_KPROBE(__blk_account_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_done")
|
|
||||||
+int BPF_PROG(block_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
char LICENSE[] SEC("license") = "GPL";
|
|
||||||
diff --git a/libbpf-tools/biotop.c b/libbpf-tools/biotop.c
|
|
||||||
index 75484281..5b3a7cf3 100644
|
|
||||||
--- a/libbpf-tools/biotop.c
|
|
||||||
+++ b/libbpf-tools/biotop.c
|
|
||||||
@@ -354,6 +354,38 @@ static int print_stat(struct biotop_bpf *obj)
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool has_block_io_tracepoints(void)
|
|
||||||
+{
|
|
||||||
+ return tracepoint_exists("block", "block_io_start") &&
|
|
||||||
+ tracepoint_exists("block", "block_io_done");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_block_io_tracepoints(struct biotop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void blk_account_io_set_autoload(struct biotop_bpf *obj,
|
|
||||||
+ struct ksyms *ksyms)
|
|
||||||
+{
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
|
|
||||||
+ } else {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
static const struct argp argp = {
|
|
||||||
@@ -386,32 +418,19 @@ int main(int argc, char **argv)
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (has_block_io_tracepoints())
|
|
||||||
+ disable_blk_account_io_kprobes(obj);
|
|
||||||
+ else {
|
|
||||||
+ disable_block_io_tracepoints(obj);
|
|
||||||
+ blk_account_io_set_autoload(obj, ksyms);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
err = biotop_bpf__load(obj);
|
|
||||||
if (err) {
|
|
||||||
warn("failed to load BPF object: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (ksyms__get_symbol(ksyms, "__blk_account_io_start"))
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "__blk_account_io_start");
|
|
||||||
- else
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "blk_account_io_start");
|
|
||||||
-
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- warn("failed to load attach blk_account_io_start\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if (ksyms__get_symbol(ksyms, "__blk_account_io_done"))
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "__blk_account_io_done");
|
|
||||||
- else
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "blk_account_io_done");
|
|
||||||
-
|
|
||||||
- if (!obj->links.blk_account_io_done) {
|
|
||||||
- warn("failed to load attach blk_account_io_done\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
err = biotop_bpf__attach(obj);
|
|
||||||
if (err) {
|
|
||||||
warn("failed to attach BPF programs: %d\n", err);
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -1,855 +0,0 @@
|
|||||||
From 2e758b65231f976c67a0aad791aabc7927ea7086 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Thu, 27 Jul 2023 18:19:18 +0200
|
|
||||||
Subject: [PATCH] tools: Add support for the new block_io_* tracepoints
|
|
||||||
|
|
||||||
The bio tools currently depends on blk_account_io_done/start functions
|
|
||||||
that can be inlined. To fix that, a couple of tracepoints have been
|
|
||||||
added upstream (block:block_io_start/done). This patch add the support
|
|
||||||
for those tracepoints when they are available.
|
|
||||||
|
|
||||||
Unfortunately, the bio tools relies on data that is not available to
|
|
||||||
the tracepoints (mostly the struct request). So the tracepoints can't
|
|
||||||
be used as drop in replacement for blk_account_io_*. Main difference,
|
|
||||||
is that we can't use the struct request as the hash key anymore, so it
|
|
||||||
now uses the couple (dev_t, sector) for that purpose.
|
|
||||||
|
|
||||||
For the biolatency tool, the -F option is disabled when only the
|
|
||||||
tracepoints are available because the flags are not all accessible
|
|
||||||
from the tracepoints. Otherwise, all features of the tools should
|
|
||||||
remain.
|
|
||||||
|
|
||||||
Closes #4261
|
|
||||||
|
|
||||||
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
---
|
|
||||||
tools/biolatency.py | 166 ++++++++++++++++++++++++++++--------
|
|
||||||
tools/biosnoop.py | 200 +++++++++++++++++++++++++++++++++-----------
|
|
||||||
tools/biotop.py | 108 +++++++++++++++++++-----
|
|
||||||
3 files changed, 371 insertions(+), 103 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/biolatency.py b/tools/biolatency.py
|
|
||||||
index 8fe43a7c..03b48a4c 100755
|
|
||||||
--- a/tools/biolatency.py
|
|
||||||
+++ b/tools/biolatency.py
|
|
||||||
@@ -11,6 +11,7 @@
|
|
||||||
#
|
|
||||||
# 20-Sep-2015 Brendan Gregg Created this.
|
|
||||||
# 31-Mar-2022 Rocky Xing Added disk filter support.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -72,7 +73,7 @@ bpf_text = """
|
|
||||||
#include <linux/blk-mq.h>
|
|
||||||
|
|
||||||
typedef struct disk_key {
|
|
||||||
- char disk[DISK_NAME_LEN];
|
|
||||||
+ dev_t dev;
|
|
||||||
u64 slot;
|
|
||||||
} disk_key_t;
|
|
||||||
|
|
||||||
@@ -86,26 +87,70 @@ typedef struct ext_val {
|
|
||||||
u64 count;
|
|
||||||
} ext_val_t;
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *);
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct start_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 _pad;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ CMD_FLAGS
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+BPF_HASH(start, struct start_key);
|
|
||||||
STORAGE
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
-int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_start(struct start_key key)
|
|
||||||
{
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
u64 ts = bpf_ktime_get_ns();
|
|
||||||
- start.update(&req, &ts);
|
|
||||||
+ start.update(&key, &ts);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ SET_FLAGS
|
|
||||||
+
|
|
||||||
+ return __trace_req_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// output
|
|
||||||
-int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_done(struct start_key key)
|
|
||||||
{
|
|
||||||
u64 *tsp, delta;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- tsp = start.lookup(&req);
|
|
||||||
+ tsp = start.lookup(&key);
|
|
||||||
if (tsp == 0) {
|
|
||||||
return 0; // missed issue
|
|
||||||
}
|
|
||||||
@@ -116,9 +161,31 @@ int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
// store as histogram
|
|
||||||
STORE
|
|
||||||
|
|
||||||
- start.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ SET_FLAGS
|
|
||||||
+
|
|
||||||
+ return __trace_req_done(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_done_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_done(key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# code substitutions
|
|
||||||
@@ -134,21 +201,18 @@ store_str = ""
|
|
||||||
if args.disks:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
|
|
||||||
disks_str = """
|
|
||||||
- disk_key_t key = {.slot = bpf_log2l(delta)};
|
|
||||||
- void *__tmp = (void *)req->__RQ_DISK__->disk_name;
|
|
||||||
- bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
|
|
||||||
- dist.atomic_increment(key);
|
|
||||||
+ disk_key_t dkey = {};
|
|
||||||
+ dkey.dev = key.dev;
|
|
||||||
+ dkey.slot = bpf_log2l(delta);
|
|
||||||
+ dist.atomic_increment(dkey);
|
|
||||||
"""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
|
|
||||||
- else:
|
|
||||||
- store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
|
|
||||||
+ store_str += disks_str
|
|
||||||
elif args.flags:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
|
|
||||||
store_str += """
|
|
||||||
- flag_key_t key = {.slot = bpf_log2l(delta)};
|
|
||||||
- key.flags = req->cmd_flags;
|
|
||||||
- dist.atomic_increment(key);
|
|
||||||
+ flag_key_t fkey = {.slot = bpf_log2l(delta)};
|
|
||||||
+ fkey.flags = key.flags;
|
|
||||||
+ dist.atomic_increment(fkey);
|
|
||||||
"""
|
|
||||||
else:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist);"
|
|
||||||
@@ -161,21 +225,13 @@ store_str = ""
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
stat_info = os.stat(disk_path)
|
|
||||||
- major = os.major(stat_info.st_rdev)
|
|
||||||
- minor = os.minor(stat_info.st_rdev)
|
|
||||||
-
|
|
||||||
- disk_field_str = ""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- disk_field_str = 'req->rq_disk'
|
|
||||||
- else:
|
|
||||||
- disk_field_str = 'req->q->disk'
|
|
||||||
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
|
|
||||||
|
|
||||||
disk_filter_str = """
|
|
||||||
- struct gendisk *disk = %s;
|
|
||||||
- if (!(disk->major == %d && disk->first_minor == %d)) {
|
|
||||||
+ if(key.dev != %s) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
- """ % (disk_field_str, major, minor)
|
|
||||||
+ """ % (dev)
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
|
|
||||||
else:
|
|
||||||
@@ -194,6 +250,16 @@ store_str = ""
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace("STORAGE", storage_str)
|
|
||||||
bpf_text = bpf_text.replace("STORE", store_str)
|
|
||||||
+if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
|
|
||||||
+else:
|
|
||||||
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
|
|
||||||
+if args.flags:
|
|
||||||
+ bpf_text = bpf_text.replace('CMD_FLAGS', 'u64 flags;')
|
|
||||||
+ bpf_text = bpf_text.replace('SET_FLAGS', 'key.flags = req->cmd_flags;')
|
|
||||||
+else:
|
|
||||||
+ bpf_text = bpf_text.replace('CMD_FLAGS', '')
|
|
||||||
+ bpf_text = bpf_text.replace('SET_FLAGS', '')
|
|
||||||
|
|
||||||
if debug or args.ebpf:
|
|
||||||
print(bpf_text)
|
|
||||||
@@ -205,25 +271,53 @@ b = BPF(text=bpf_text)
|
|
||||||
if args.queued:
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
|
|
||||||
- else:
|
|
||||||
+ elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
|
|
||||||
+ else:
|
|
||||||
+ if args.flags:
|
|
||||||
+ # Some flags are accessible in the rwbs field (RAHEAD, SYNC and META)
|
|
||||||
+ # but other aren't. Disable the -F option for tracepoint for now.
|
|
||||||
+ print("ERROR: blk_account_io_start probe not available. Can't use -F.")
|
|
||||||
+ exit()
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_req_start_tp")
|
|
||||||
else:
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
+
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
|
|
||||||
+else:
|
|
||||||
+ if args.flags:
|
|
||||||
+ print("ERROR: blk_account_io_done probe not available. Can't use -F.")
|
|
||||||
+ exit()
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_done_tp")
|
|
||||||
+
|
|
||||||
|
|
||||||
if not args.json:
|
|
||||||
print("Tracing block device I/O... Hit Ctrl-C to end.")
|
|
||||||
|
|
||||||
-def disk_print(s):
|
|
||||||
- disk = s.decode('utf-8', 'replace')
|
|
||||||
- if not disk:
|
|
||||||
- disk = "<unknown>"
|
|
||||||
- return disk
|
|
||||||
+# cache disk major,minor -> diskname
|
|
||||||
+diskstats = "/proc/diskstats"
|
|
||||||
+disklookup = {}
|
|
||||||
+with open(diskstats) as stats:
|
|
||||||
+ for line in stats:
|
|
||||||
+ a = line.split()
|
|
||||||
+ disklookup[a[0] + "," + a[1]] = a[2]
|
|
||||||
+
|
|
||||||
+def disk_print(d):
|
|
||||||
+ major = d >> 20
|
|
||||||
+ minor = d & ((1 << 20) - 1)
|
|
||||||
+
|
|
||||||
+ disk = str(major) + "," + str(minor)
|
|
||||||
+ if disk in disklookup:
|
|
||||||
+ diskname = disklookup[disk]
|
|
||||||
+ else:
|
|
||||||
+ diskname = "?"
|
|
||||||
+
|
|
||||||
+ return diskname
|
|
||||||
|
|
||||||
# see blk_fill_rwbs():
|
|
||||||
req_opf = {
|
|
||||||
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
|
|
||||||
index 33703233..f0fef98b 100755
|
|
||||||
--- a/tools/biosnoop.py
|
|
||||||
+++ b/tools/biosnoop.py
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
# 11-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
|
|
||||||
# 21-Jun-2022 Rocky Xing Added disk filter support.
|
|
||||||
# 13-Oct-2022 Rocky Xing Added support for displaying block I/O pattern.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -64,6 +65,24 @@ struct val_t {
|
|
||||||
char name[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hash_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 rwflag;
|
|
||||||
+ sector_t sector;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
#ifdef INCLUDE_PATTERN
|
|
||||||
struct sector_key_t {
|
|
||||||
u32 dev_major;
|
|
||||||
@@ -79,6 +98,7 @@ enum bio_pattern {
|
|
||||||
|
|
||||||
struct data_t {
|
|
||||||
u32 pid;
|
|
||||||
+ u32 dev;
|
|
||||||
u64 rwflag;
|
|
||||||
u64 delta;
|
|
||||||
u64 qdelta;
|
|
||||||
@@ -88,7 +108,6 @@ struct data_t {
|
|
||||||
enum bio_pattern pattern;
|
|
||||||
#endif
|
|
||||||
u64 ts;
|
|
||||||
- char disk_name[DISK_NAME_LEN];
|
|
||||||
char name[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -96,12 +115,45 @@ struct data_t {
|
|
||||||
BPF_HASH(last_sectors, struct sector_key_t, u64);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *, struct start_req_t);
|
|
||||||
-BPF_HASH(infobyreq, struct request *, struct val_t);
|
|
||||||
+BPF_HASH(start, struct hash_key, struct start_req_t);
|
|
||||||
+BPF_HASH(infobyreq, struct hash_key, struct val_t);
|
|
||||||
BPF_PERF_OUTPUT(events);
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
+ * it may be backported to earlier kernels) with how block request write flags
|
|
||||||
+ * are tested. We handle both pre- and post-change versions here. Please avoid
|
|
||||||
+ * kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
+ * test, and maintenance burden.
|
|
||||||
+ */
|
|
||||||
+static int get_rwflag(u32 cmd_flags) {
|
|
||||||
+#ifdef REQ_WRITE
|
|
||||||
+ return !!(cmd_flags & REQ_WRITE);
|
|
||||||
+#elif defined(REQ_OP_SHIFT)
|
|
||||||
+ return !!((cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
+#else
|
|
||||||
+ return !!((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
+#endif
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#define RWBS_LEN 8
|
|
||||||
+
|
|
||||||
+static int get_rwflag_tp(char *rwbs) {
|
|
||||||
+ for (int i = 0; i < RWBS_LEN; i++) {
|
|
||||||
+ if (rwbs[i] == 'W')
|
|
||||||
+ return 1;
|
|
||||||
+ if (rwbs[i] == '\\0')
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// cache PID and comm by-req
|
|
||||||
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_pid_start(struct hash_key key)
|
|
||||||
{
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
@@ -113,47 +165,76 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
if (##QUEUE##) {
|
|
||||||
val.ts = bpf_ktime_get_ns();
|
|
||||||
}
|
|
||||||
- infobyreq.update(&req, &val);
|
|
||||||
+ infobyreq.update(&key, &val);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+
|
|
||||||
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_pid_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .rwflag = get_rwflag_tp(args->rwbs),
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
struct start_req_t start_req = {
|
|
||||||
.ts = bpf_ktime_get_ns(),
|
|
||||||
.data_len = req->__data_len
|
|
||||||
};
|
|
||||||
- start.update(&req, &start_req);
|
|
||||||
+ start.update(&key, &start_req);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// output
|
|
||||||
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_completion(void *ctx, struct hash_key key)
|
|
||||||
{
|
|
||||||
struct start_req_t *startp;
|
|
||||||
struct val_t *valp;
|
|
||||||
struct data_t data = {};
|
|
||||||
- struct gendisk *rq_disk;
|
|
||||||
+ //struct gendisk *rq_disk;
|
|
||||||
u64 ts;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- startp = start.lookup(&req);
|
|
||||||
+ startp = start.lookup(&key);
|
|
||||||
if (startp == 0) {
|
|
||||||
// missed tracing issue
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
ts = bpf_ktime_get_ns();
|
|
||||||
- rq_disk = req->__RQ_DISK__;
|
|
||||||
+ //rq_disk = req->__RQ_DISK__;
|
|
||||||
data.delta = ts - startp->ts;
|
|
||||||
data.ts = ts / 1000;
|
|
||||||
data.qdelta = 0;
|
|
||||||
data.len = startp->data_len;
|
|
||||||
|
|
||||||
- valp = infobyreq.lookup(&req);
|
|
||||||
+ valp = infobyreq.lookup(&key);
|
|
||||||
if (valp == 0) {
|
|
||||||
data.name[0] = '?';
|
|
||||||
data.name[1] = 0;
|
|
||||||
@@ -162,10 +243,9 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
data.qdelta = startp->ts - valp->ts;
|
|
||||||
}
|
|
||||||
data.pid = valp->pid;
|
|
||||||
- data.sector = req->__sector;
|
|
||||||
+ data.sector = key.sector;
|
|
||||||
+ data.dev = key.dev;
|
|
||||||
bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
|
|
||||||
- bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
|
|
||||||
- rq_disk->disk_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef INCLUDE_PATTERN
|
|
||||||
@@ -174,8 +254,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
u64 *sector, last_sector;
|
|
||||||
|
|
||||||
struct sector_key_t sector_key = {
|
|
||||||
- .dev_major = rq_disk->major,
|
|
||||||
- .dev_minor = rq_disk->first_minor
|
|
||||||
+ .dev_major = key.dev >> 20,
|
|
||||||
+ .dev_minor = key.dev & ((1 << 20) - 1)
|
|
||||||
};
|
|
||||||
|
|
||||||
sector = last_sectors.lookup(§or_key);
|
|
||||||
@@ -187,27 +267,36 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
last_sectors.update(§or_key, &last_sector);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-/*
|
|
||||||
- * The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
- * it may be backported to earlier kernels) with how block request write flags
|
|
||||||
- * are tested. We handle both pre- and post-change versions here. Please avoid
|
|
||||||
- * kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
- * test, and maintenance burden.
|
|
||||||
- */
|
|
||||||
-#ifdef REQ_WRITE
|
|
||||||
- data.rwflag = !!(req->cmd_flags & REQ_WRITE);
|
|
||||||
-#elif defined(REQ_OP_SHIFT)
|
|
||||||
- data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
-#else
|
|
||||||
- data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
-#endif
|
|
||||||
+ data.rwflag = key.rwflag;
|
|
||||||
|
|
||||||
events.perf_submit(ctx, &data, sizeof(data));
|
|
||||||
- start.delete(&req);
|
|
||||||
- infobyreq.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
+ infobyreq.delete(&key);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(ctx, key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_completion_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .rwflag = get_rwflag_tp(args->rwbs),
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(args, key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
if args.queue:
|
|
||||||
bpf_text = bpf_text.replace('##QUEUE##', '1')
|
|
||||||
@@ -225,21 +314,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
stat_info = os.stat(disk_path)
|
|
||||||
- major = os.major(stat_info.st_rdev)
|
|
||||||
- minor = os.minor(stat_info.st_rdev)
|
|
||||||
-
|
|
||||||
- disk_field_str = ""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- disk_field_str = 'req->rq_disk'
|
|
||||||
- else:
|
|
||||||
- disk_field_str = 'req->q->disk'
|
|
||||||
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
|
|
||||||
|
|
||||||
disk_filter_str = """
|
|
||||||
- struct gendisk *disk = %s;
|
|
||||||
- if (!(disk->major == %d && disk->first_minor == %d)) {
|
|
||||||
+ if(key.dev != %s) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
- """ % (disk_field_str, major, minor)
|
|
||||||
+ """ % (dev)
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
|
|
||||||
else:
|
|
||||||
@@ -254,15 +335,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
|
|
||||||
|
|
||||||
# header
|
|
||||||
print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
|
|
||||||
@@ -273,6 +358,27 @@ print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
|
|
||||||
print("%7s " % ("QUE(ms)"), end="")
|
|
||||||
print("%7s" % "LAT(ms)")
|
|
||||||
|
|
||||||
+
|
|
||||||
+# cache disk major,minor -> diskname
|
|
||||||
+diskstats = "/proc/diskstats"
|
|
||||||
+disklookup = {}
|
|
||||||
+with open(diskstats) as stats:
|
|
||||||
+ for line in stats:
|
|
||||||
+ a = line.split()
|
|
||||||
+ disklookup[a[0] + "," + a[1]] = a[2]
|
|
||||||
+
|
|
||||||
+def disk_print(d):
|
|
||||||
+ major = d >> 20
|
|
||||||
+ minor = d & ((1 << 20) - 1)
|
|
||||||
+
|
|
||||||
+ disk = str(major) + "," + str(minor)
|
|
||||||
+ if disk in disklookup:
|
|
||||||
+ diskname = disklookup[disk]
|
|
||||||
+ else:
|
|
||||||
+ diskname = "<unknown>"
|
|
||||||
+
|
|
||||||
+ return diskname
|
|
||||||
+
|
|
||||||
rwflg = ""
|
|
||||||
pattern = ""
|
|
||||||
start_ts = 0
|
|
||||||
@@ -297,9 +403,7 @@ P_RANDOM = 2
|
|
||||||
|
|
||||||
delta = float(event.ts) - start_ts
|
|
||||||
|
|
||||||
- disk_name = event.disk_name.decode('utf-8', 'replace')
|
|
||||||
- if not disk_name:
|
|
||||||
- disk_name = '<unknown>'
|
|
||||||
+ disk_name = disk_print(event.dev)
|
|
||||||
|
|
||||||
print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % (
|
|
||||||
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
|
|
||||||
diff --git a/tools/biotop.py b/tools/biotop.py
|
|
||||||
index fcdd373f..2620983a 100755
|
|
||||||
--- a/tools/biotop.py
|
|
||||||
+++ b/tools/biotop.py
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
#
|
|
||||||
# 06-Feb-2016 Brendan Gregg Created this.
|
|
||||||
# 17-Mar-2022 Rocky Xing Added PID filter support.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -88,14 +89,35 @@ struct val_t {
|
|
||||||
u32 io;
|
|
||||||
};
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *, struct start_req_t);
|
|
||||||
-BPF_HASH(whobyreq, struct request *, struct who_t);
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hash_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 _pad;
|
|
||||||
+ sector_t sector;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+BPF_HASH(start, struct hash_key, struct start_req_t);
|
|
||||||
+BPF_HASH(whobyreq, struct hash_key, struct who_t);
|
|
||||||
BPF_HASH(counts, struct info_t, struct val_t);
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// cache PID and comm by-req
|
|
||||||
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_pid_start(struct hash_key key)
|
|
||||||
{
|
|
||||||
- struct who_t who = {};
|
|
||||||
+ struct who_t who;
|
|
||||||
u32 pid;
|
|
||||||
|
|
||||||
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
|
|
||||||
@@ -104,30 +126,54 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
who.pid = pid;
|
|
||||||
- whobyreq.update(&req, &who);
|
|
||||||
+ whobyreq.update(&key, &who);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_pid_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
struct start_req_t start_req = {
|
|
||||||
.ts = bpf_ktime_get_ns(),
|
|
||||||
.data_len = req->__data_len
|
|
||||||
};
|
|
||||||
- start.update(&req, &start_req);
|
|
||||||
+ start.update(&key, &start_req);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// output
|
|
||||||
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_completion(struct hash_key key)
|
|
||||||
{
|
|
||||||
struct start_req_t *startp;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- startp = start.lookup(&req);
|
|
||||||
+ startp = start.lookup(&key);
|
|
||||||
if (startp == 0) {
|
|
||||||
return 0; // missed tracing issue
|
|
||||||
}
|
|
||||||
@@ -135,12 +181,12 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
struct who_t *whop;
|
|
||||||
u32 pid;
|
|
||||||
|
|
||||||
- whop = whobyreq.lookup(&req);
|
|
||||||
+ whop = whobyreq.lookup(&key);
|
|
||||||
pid = whop != 0 ? whop->pid : 0;
|
|
||||||
if (FILTER_PID) {
|
|
||||||
- start.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
if (whop != 0) {
|
|
||||||
- whobyreq.delete(&req);
|
|
||||||
+ whobyreq.delete(&key);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -150,8 +196,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
|
|
||||||
// setup info_t key
|
|
||||||
struct info_t info = {};
|
|
||||||
- info.major = req->__RQ_DISK__->major;
|
|
||||||
- info.minor = req->__RQ_DISK__->first_minor;
|
|
||||||
+ info.major = key.dev >> 20;
|
|
||||||
+ info.minor = key.dev & ((1 << 20) - 1);
|
|
||||||
/*
|
|
||||||
* The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
* it may be backported to earlier kernels) with how block request write flags
|
|
||||||
@@ -159,13 +205,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
* kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
* test, and maintenance burden.
|
|
||||||
*/
|
|
||||||
-#ifdef REQ_WRITE
|
|
||||||
+/*#ifdef REQ_WRITE
|
|
||||||
info.rwflag = !!(req->cmd_flags & REQ_WRITE);
|
|
||||||
#elif defined(REQ_OP_SHIFT)
|
|
||||||
info.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
#else
|
|
||||||
info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
-#endif
|
|
||||||
+#endif*/
|
|
||||||
|
|
||||||
if (whop == 0) {
|
|
||||||
// missed pid who, save stats as pid 0
|
|
||||||
@@ -183,11 +229,31 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
valp->io++;
|
|
||||||
}
|
|
||||||
|
|
||||||
- start.delete(&req);
|
|
||||||
- whobyreq.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
+ whobyreq.delete(&key);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_completion_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
|
|
||||||
if args.ebpf:
|
|
||||||
@@ -207,15 +273,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
|
|
||||||
|
|
||||||
print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
|
|
||||||
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -1,156 +0,0 @@
|
|||||||
From 0d1a67ba9490aabbb874819d8d07b1868c8c2b1d Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Wed, 1 Feb 2023 17:30:03 +0100
|
|
||||||
Subject: [PATCH 2/2] tools/tcpstates: fix IPv6 journal
|
|
||||||
|
|
||||||
When logging ipv6 state change, journal_fields tries to pack
|
|
||||||
event.addr and event.daddr, which is not an integer in this, to
|
|
||||||
present a bytes-like object to socket.inet_ntop. This can be fixed by
|
|
||||||
having a similar type for [sd]addr for IPv4 and IPv6. Making both an
|
|
||||||
array of u32 solves the issue by presenting a bytes-like object
|
|
||||||
directly to inet_ntop, without the need for the struct packing stage.
|
|
||||||
|
|
||||||
Also now, the similar behavior, makes it easier to factor code for
|
|
||||||
IPv4 and IPv6.
|
|
||||||
|
|
||||||
It solves the following error:
|
|
||||||
/usr/share/bcc/tools/tcpstates -Y
|
|
||||||
SKADDR C-PID C-COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
|
|
||||||
ffff8b2e83e56180 0 swapper/9 :: 22 :: 0 LISTEN -> SYN_RECV 0.000
|
|
||||||
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
|
|
||||||
callback(cpu, data, size)
|
|
||||||
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
|
|
||||||
journal.send(**journal_fields(event, AF_INET6))
|
|
||||||
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
|
|
||||||
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
|
|
||||||
struct.error: required argument is not an integer
|
|
||||||
ffff8b2e83e56180 0 swapper/9 2620:52:0:2580:5054:ff:fe6b:6f1f 22 2620:52:0:2b11:2f5e:407d:b35d:4663 60396 SYN_RECV -> ESTABLISHED 0.010
|
|
||||||
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
|
|
||||||
callback(cpu, data, size)
|
|
||||||
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
|
|
||||||
journal.send(**journal_fields(event, AF_INET6))
|
|
||||||
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
|
|
||||||
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
|
|
||||||
struct.error: required argument is not an integer
|
|
||||||
|
|
||||||
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
---
|
|
||||||
tools/tcpstates.py | 55 +++++++++++++++++-----------------------------
|
|
||||||
1 file changed, 20 insertions(+), 35 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
|
|
||||||
index 9b2ccfa4..6c845c9b 100755
|
|
||||||
--- a/tools/tcpstates.py
|
|
||||||
+++ b/tools/tcpstates.py
|
|
||||||
@@ -19,7 +19,6 @@ from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
import argparse
|
|
||||||
from socket import inet_ntop, AF_INET, AF_INET6
|
|
||||||
-from struct import pack
|
|
||||||
from time import strftime, time
|
|
||||||
from os import getuid
|
|
||||||
|
|
||||||
@@ -78,8 +77,8 @@ BPF_HASH(last, struct sock *, u64);
|
|
||||||
struct ipv4_data_t {
|
|
||||||
u64 ts_us;
|
|
||||||
u64 skaddr;
|
|
||||||
- u32 saddr;
|
|
||||||
- u32 daddr;
|
|
||||||
+ u32 saddr[1];
|
|
||||||
+ u32 daddr[1];
|
|
||||||
u64 span_us;
|
|
||||||
u32 pid;
|
|
||||||
u16 lport;
|
|
||||||
@@ -93,8 +92,8 @@ BPF_PERF_OUTPUT(ipv4_events);
|
|
||||||
struct ipv6_data_t {
|
|
||||||
u64 ts_us;
|
|
||||||
u64 skaddr;
|
|
||||||
- unsigned __int128 saddr;
|
|
||||||
- unsigned __int128 daddr;
|
|
||||||
+ u32 saddr[4];
|
|
||||||
+ u32 daddr[4];
|
|
||||||
u64 span_us;
|
|
||||||
u32 pid;
|
|
||||||
u16 lport;
|
|
||||||
@@ -350,9 +349,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
'OBJECT_PID': str(event.pid),
|
|
||||||
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
|
|
||||||
# Custom fields, aka "stuff we sort of made up".
|
|
||||||
- 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
|
|
||||||
+ 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, event.saddr),
|
|
||||||
'OBJECT_TCP_SOURCE_PORT': str(event.lport),
|
|
||||||
- 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
|
|
||||||
+ 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, event.daddr),
|
|
||||||
'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
|
|
||||||
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
|
|
||||||
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
|
|
||||||
@@ -373,8 +372,7 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
return fields
|
|
||||||
|
|
||||||
# process event
|
|
||||||
-def print_ipv4_event(cpu, data, size):
|
|
||||||
- event = b["ipv4_events"].event(data)
|
|
||||||
+def print_event(event, addr_family):
|
|
||||||
global start_ts
|
|
||||||
if args.time:
|
|
||||||
if args.csv:
|
|
||||||
@@ -389,39 +387,26 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
print("%.6f," % delta_s, end="")
|
|
||||||
else:
|
|
||||||
print("%-9.6f " % delta_s, end="")
|
|
||||||
+ if addr_family == AF_INET:
|
|
||||||
+ version = "4"
|
|
||||||
+ else:
|
|
||||||
+ version = "6"
|
|
||||||
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
|
|
||||||
- "4" if args.wide or args.csv else "",
|
|
||||||
- inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
|
|
||||||
- inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
|
|
||||||
+ version if args.wide or args.csv else "",
|
|
||||||
+ inet_ntop(addr_family, event.saddr), event.lport,
|
|
||||||
+ inet_ntop(addr_family, event.daddr), event.dport,
|
|
||||||
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
|
|
||||||
float(event.span_us) / 1000))
|
|
||||||
if args.journal:
|
|
||||||
- journal.send(**journal_fields(event, AF_INET))
|
|
||||||
+ journal.send(**journal_fields(event, addr_family))
|
|
||||||
+
|
|
||||||
+def print_ipv4_event(cpu, data, size):
|
|
||||||
+ event = b["ipv4_events"].event(data)
|
|
||||||
+ print_event(event, AF_INET)
|
|
||||||
|
|
||||||
def print_ipv6_event(cpu, data, size):
|
|
||||||
event = b["ipv6_events"].event(data)
|
|
||||||
- global start_ts
|
|
||||||
- if args.time:
|
|
||||||
- if args.csv:
|
|
||||||
- print("%s," % strftime("%H:%M:%S"), end="")
|
|
||||||
- else:
|
|
||||||
- print("%-8s " % strftime("%H:%M:%S"), end="")
|
|
||||||
- if args.timestamp:
|
|
||||||
- if start_ts == 0:
|
|
||||||
- start_ts = event.ts_us
|
|
||||||
- delta_s = (float(event.ts_us) - start_ts) / 1000000
|
|
||||||
- if args.csv:
|
|
||||||
- print("%.6f," % delta_s, end="")
|
|
||||||
- else:
|
|
||||||
- print("%-9.6f " % delta_s, end="")
|
|
||||||
- print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
|
|
||||||
- "6" if args.wide or args.csv else "",
|
|
||||||
- inet_ntop(AF_INET6, event.saddr), event.lport,
|
|
||||||
- inet_ntop(AF_INET6, event.daddr), event.dport,
|
|
||||||
- tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
|
|
||||||
- float(event.span_us) / 1000))
|
|
||||||
- if args.journal:
|
|
||||||
- journal.send(**journal_fields(event, AF_INET6))
|
|
||||||
+ print_event(event, AF_INET6)
|
|
||||||
|
|
||||||
# initialize BPF
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -1,144 +0,0 @@
|
|||||||
From 53b89f35e8970beef55046c1bf035264f110f06d Mon Sep 17 00:00:00 2001
|
|
||||||
From: hejun01 <hejun01@corp.netease.com>
|
|
||||||
Date: Thu, 29 Jun 2023 20:24:07 +0800
|
|
||||||
Subject: [PATCH 1/2] tools/tcpstates: fix context ptr modified error
|
|
||||||
|
|
||||||
Introduce local variable tcp_new_state,
|
|
||||||
to avoid llvm optimization of args->newstate,
|
|
||||||
which will cause context ptr args modified.
|
|
||||||
spilt event.ports to lport and dport.
|
|
||||||
switch type of TCP state from unsigned int to int.
|
|
||||||
---
|
|
||||||
tools/tcpstates.py | 47 +++++++++++++++++++++++++---------------------
|
|
||||||
1 file changed, 26 insertions(+), 21 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
|
|
||||||
index 89f3638c..9b2ccfa4 100755
|
|
||||||
--- a/tools/tcpstates.py
|
|
||||||
+++ b/tools/tcpstates.py
|
|
||||||
@@ -82,9 +82,10 @@ struct ipv4_data_t {
|
|
||||||
u32 daddr;
|
|
||||||
u64 span_us;
|
|
||||||
u32 pid;
|
|
||||||
- u32 ports;
|
|
||||||
- u32 oldstate;
|
|
||||||
- u32 newstate;
|
|
||||||
+ u16 lport;
|
|
||||||
+ u16 dport;
|
|
||||||
+ int oldstate;
|
|
||||||
+ int newstate;
|
|
||||||
char task[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
BPF_PERF_OUTPUT(ipv4_events);
|
|
||||||
@@ -96,9 +97,10 @@ struct ipv6_data_t {
|
|
||||||
unsigned __int128 daddr;
|
|
||||||
u64 span_us;
|
|
||||||
u32 pid;
|
|
||||||
- u32 ports;
|
|
||||||
- u32 oldstate;
|
|
||||||
- u32 newstate;
|
|
||||||
+ u16 lport;
|
|
||||||
+ u16 dport;
|
|
||||||
+ int oldstate;
|
|
||||||
+ int newstate;
|
|
||||||
char task[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
BPF_PERF_OUTPUT(ipv6_events);
|
|
||||||
@@ -132,6 +134,9 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
|
|
||||||
u16 family = args->family;
|
|
||||||
FILTER_FAMILY
|
|
||||||
|
|
||||||
+ // workaround to avoid llvm optimization which will cause context ptr args modified
|
|
||||||
+ int tcp_newstate = args->newstate;
|
|
||||||
+
|
|
||||||
if (args->family == AF_INET) {
|
|
||||||
struct ipv4_data_t data4 = {
|
|
||||||
.span_us = delta_us,
|
|
||||||
@@ -141,8 +146,8 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
|
|
||||||
data4.ts_us = bpf_ktime_get_ns() / 1000;
|
|
||||||
__builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
|
|
||||||
__builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
|
|
||||||
- // a workaround until data4 compiles with separate lport/dport
|
|
||||||
- data4.ports = dport + ((0ULL + lport) << 16);
|
|
||||||
+ data4.lport = lport;
|
|
||||||
+ data4.dport = dport;
|
|
||||||
data4.pid = pid;
|
|
||||||
|
|
||||||
bpf_get_current_comm(&data4.task, sizeof(data4.task));
|
|
||||||
@@ -157,14 +162,14 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
|
|
||||||
data6.ts_us = bpf_ktime_get_ns() / 1000;
|
|
||||||
__builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
|
|
||||||
__builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
|
|
||||||
- // a workaround until data6 compiles with separate lport/dport
|
|
||||||
- data6.ports = dport + ((0ULL + lport) << 16);
|
|
||||||
+ data6.lport = lport;
|
|
||||||
+ data6.dport = dport;
|
|
||||||
data6.pid = pid;
|
|
||||||
bpf_get_current_comm(&data6.task, sizeof(data6.task));
|
|
||||||
ipv6_events.perf_submit(args, &data6, sizeof(data6));
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (args->newstate == TCP_CLOSE) {
|
|
||||||
+ if (tcp_newstate == TCP_CLOSE) {
|
|
||||||
last.delete(&sk);
|
|
||||||
} else {
|
|
||||||
u64 ts = bpf_ktime_get_ns();
|
|
||||||
@@ -210,8 +215,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
|
|
||||||
data4.ts_us = bpf_ktime_get_ns() / 1000;
|
|
||||||
data4.saddr = sk->__sk_common.skc_rcv_saddr;
|
|
||||||
data4.daddr = sk->__sk_common.skc_daddr;
|
|
||||||
- // a workaround until data4 compiles with separate lport/dport
|
|
||||||
- data4.ports = dport + ((0ULL + lport) << 16);
|
|
||||||
+ data4.lport = lport;
|
|
||||||
+ data4.dport = dport;
|
|
||||||
data4.pid = pid;
|
|
||||||
|
|
||||||
bpf_get_current_comm(&data4.task, sizeof(data4.task));
|
|
||||||
@@ -228,8 +233,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
|
|
||||||
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
|
|
||||||
bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
|
|
||||||
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
|
|
||||||
- // a workaround until data6 compiles with separate lport/dport
|
|
||||||
- data6.ports = dport + ((0ULL + lport) << 16);
|
|
||||||
+ data6.lport = lport;
|
|
||||||
+ data6.dport = dport;
|
|
||||||
data6.pid = pid;
|
|
||||||
bpf_get_current_comm(&data6.task, sizeof(data6.task));
|
|
||||||
ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
|
|
||||||
@@ -346,9 +351,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
|
|
||||||
# Custom fields, aka "stuff we sort of made up".
|
|
||||||
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
|
|
||||||
- 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 16),
|
|
||||||
+ 'OBJECT_TCP_SOURCE_PORT': str(event.lport),
|
|
||||||
'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
|
|
||||||
- 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffff),
|
|
||||||
+ 'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
|
|
||||||
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
|
|
||||||
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
|
|
||||||
'OBJECT_TCP_SPAN_TIME': str(event.span_us)
|
|
||||||
@@ -386,8 +391,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
print("%-9.6f " % delta_s, end="")
|
|
||||||
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
|
|
||||||
"4" if args.wide or args.csv else "",
|
|
||||||
- inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 16,
|
|
||||||
- inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffff,
|
|
||||||
+ inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
|
|
||||||
+ inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
|
|
||||||
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
|
|
||||||
float(event.span_us) / 1000))
|
|
||||||
if args.journal:
|
|
||||||
@@ -411,8 +416,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
|
|
||||||
print("%-9.6f " % delta_s, end="")
|
|
||||||
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
|
|
||||||
"6" if args.wide or args.csv else "",
|
|
||||||
- inet_ntop(AF_INET6, event.saddr), event.ports >> 16,
|
|
||||||
- inet_ntop(AF_INET6, event.daddr), event.ports & 0xffff,
|
|
||||||
+ inet_ntop(AF_INET6, event.saddr), event.lport,
|
|
||||||
+ inet_ntop(AF_INET6, event.daddr), event.dport,
|
|
||||||
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
|
|
||||||
float(event.span_us) / 1000))
|
|
||||||
if args.journal:
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
|||||||
From 88274e83ca1a61699741d5b1d5499beb64cac753 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Mon, 16 Oct 2023 19:41:29 +0200
|
|
||||||
Subject: [PATCH] tools/trace: don't raise an exception in a ctype callback
|
|
||||||
|
|
||||||
To exit the tool when the maximal number of event is reached (-M
|
|
||||||
option), the tool currently call exit(), which raise a SystemExit
|
|
||||||
exception. The handling of exception from ctype callback doesn't seem
|
|
||||||
straightforward and dependent on python version.
|
|
||||||
|
|
||||||
This patch avoid the issue altogether by using a global variable
|
|
||||||
instead.
|
|
||||||
|
|
||||||
Closes #3049
|
|
||||||
|
|
||||||
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
---
|
|
||||||
tools/trace.py | 5 +++--
|
|
||||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/trace.py b/tools/trace.py
|
|
||||||
index 9c7cca71..2aa096fa 100755
|
|
||||||
--- a/tools/trace.py
|
|
||||||
+++ b/tools/trace.py
|
|
||||||
@@ -43,6 +43,7 @@ import sys
|
|
||||||
build_id_enabled = False
|
|
||||||
aggregate = False
|
|
||||||
symcount = {}
|
|
||||||
+ done = False
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def configure(cls, args):
|
|
||||||
@@ -635,7 +636,7 @@ BPF_PERF_OUTPUT(%s);
|
|
||||||
if self.aggregate:
|
|
||||||
self.print_aggregate_events()
|
|
||||||
sys.stdout.flush()
|
|
||||||
- exit()
|
|
||||||
+ Probe.done = True;
|
|
||||||
|
|
||||||
def attach(self, bpf, verbose):
|
|
||||||
if len(self.library) == 0:
|
|
||||||
@@ -895,7 +896,7 @@ trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
|
|
||||||
"-" if not all_probes_trivial else ""))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
- while True:
|
|
||||||
+ while not Probe.done:
|
|
||||||
self.bpf.perf_buffer_poll()
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
|||||||
|
From 5bc97bbc50b1ccf0c63f320ee73a2c0abe84b596 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jerome Marchand <jmarchan@redhat.com>
|
||||||
|
Date: Fri, 17 May 2024 15:36:07 +0200
|
||||||
|
Subject: [PATCH] clang: fail when the kheaders ownership is wrong (#4928)
|
||||||
|
(#4985)
|
||||||
|
|
||||||
|
file_exists_and_ownedby() returns -1 when the file exists but its
|
||||||
|
ownership is unexpected, which is very misleading since anything non
|
||||||
|
zero is interpreted as true and a function with such a name is
|
||||||
|
expected to return a boolean. So currently all this does, is write a
|
||||||
|
warning message, and continues as if nothing is wrong.
|
||||||
|
|
||||||
|
Make file_exists_and_ownedby() returns false when the ownership is
|
||||||
|
wrong and have get_proc_kheaders() fails when this happen. Also have
|
||||||
|
all the *exists* functions return bool to avoid such issues in the
|
||||||
|
future.
|
||||||
|
|
||||||
|
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
|
||||||
|
---
|
||||||
|
src/cc/frontends/clang/kbuild_helper.cc | 22 +++++++++++++++++-----
|
||||||
|
1 file changed, 17 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/cc/frontends/clang/kbuild_helper.cc b/src/cc/frontends/clang/kbuild_helper.cc
|
||||||
|
index 9409e4cc..5d3ad9c2 100644
|
||||||
|
--- a/src/cc/frontends/clang/kbuild_helper.cc
|
||||||
|
+++ b/src/cc/frontends/clang/kbuild_helper.cc
|
||||||
|
@@ -140,20 +140,26 @@ int KBuildHelper::get_flags(const char *uname_machine, vector<string> *cflags) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline int file_exists_and_ownedby(const char *f, uid_t uid)
|
||||||
|
+static inline bool file_exists(const char *f)
|
||||||
|
+{
|
||||||
|
+ struct stat buffer;
|
||||||
|
+ return (stat(f, &buffer) == 0);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline bool file_exists_and_ownedby(const char *f, uid_t uid)
|
||||||
|
{
|
||||||
|
struct stat buffer;
|
||||||
|
int ret = stat(f, &buffer) == 0;
|
||||||
|
if (ret) {
|
||||||
|
if (buffer.st_uid != uid) {
|
||||||
|
std::cout << "ERROR: header file ownership unexpected: " << std::string(f) << "\n";
|
||||||
|
- return -1;
|
||||||
|
+ return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static inline int proc_kheaders_exists(void)
|
||||||
|
+static inline bool proc_kheaders_exists(void)
|
||||||
|
{
|
||||||
|
return file_exists_and_ownedby(PROC_KHEADERS_PATH, 0);
|
||||||
|
}
|
||||||
|
@@ -231,8 +237,14 @@ int get_proc_kheaders(std::string &dirpath)
|
||||||
|
uname_data.release);
|
||||||
|
dirpath = std::string(dirpath_tmp);
|
||||||
|
|
||||||
|
- if (file_exists_and_ownedby(dirpath_tmp, 0))
|
||||||
|
- return 0;
|
||||||
|
+ if (file_exists(dirpath_tmp)) {
|
||||||
|
+ if (file_exists_and_ownedby(dirpath_tmp, 0))
|
||||||
|
+ return 0;
|
||||||
|
+ else
|
||||||
|
+ // The path exists, but is owned by a non-root user
|
||||||
|
+ // Something fishy is going on
|
||||||
|
+ return -EEXIST;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
// First time so extract it
|
||||||
|
return extract_kheaders(dirpath, uname_data);
|
||||||
|
--
|
||||||
|
2.44.0
|
||||||
|
|
@ -24,21 +24,14 @@
|
|||||||
|
|
||||||
|
|
||||||
Name: bcc
|
Name: bcc
|
||||||
Version: 0.28.0
|
Version: 0.30.0
|
||||||
Release: 5%{?dist}
|
Release: 6%{?dist}
|
||||||
Summary: BPF Compiler Collection (BCC)
|
Summary: BPF Compiler Collection (BCC)
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
URL: https://github.com/iovisor/bcc
|
URL: https://github.com/iovisor/bcc
|
||||||
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
|
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
|
||||||
Patch0: %%{name}-%%{version}-tools-tcpstates-fix-context-ptr-modified-error.patch
|
Patch0: %%{name}-%%{version}-clang-fail-when-the-kheaders-ownership-is-wrong-4928.patch
|
||||||
Patch1: %%{name}-%%{version}-tools-tcpstates-fix-IPv6-journal.patch
|
|
||||||
Patch2: %%{name}-%%{version}-tools-Add-support-for-the-new-block_io_-tracepoints.patch
|
|
||||||
Patch3: %%{name}-%%{version}-tools-trace-don-t-raise-an-exception-in-a-ctype-call.patch
|
|
||||||
Patch4: %%{name}-%%{version}-libbpf-tools-add-block_io_-start-done-tracepoints-su.patch
|
|
||||||
Patch5: %%{name}-%%{version}-libbpf-tools-Add-s390x-support.patch
|
|
||||||
Patch6: %%{name}-%%{version}-Fixing-pvalloc-memleak-test.patch
|
|
||||||
Patch7: %%{name}-%%{version}-Skipping-USDT-tests-for-Power-processor.patch
|
|
||||||
Patch8: %%{name}-%%{version}-Adding-memory-zones-for-Power-server.patch
|
|
||||||
|
|
||||||
# Arches will be included as upstream support is added and dependencies are
|
# Arches will be included as upstream support is added and dependencies are
|
||||||
# satisfied in the respective arches
|
# satisfied in the respective arches
|
||||||
@ -122,6 +115,9 @@ Summary: Command line tools for BPF Compiler Collection (BCC)
|
|||||||
Requires: bcc = %{version}-%{release}
|
Requires: bcc = %{version}-%{release}
|
||||||
Requires: python3-%{name} = %{version}-%{release}
|
Requires: python3-%{name} = %{version}-%{release}
|
||||||
Requires: python3-netaddr
|
Requires: python3-netaddr
|
||||||
|
%ifnarch s390x
|
||||||
|
Requires: python3-pyelftools
|
||||||
|
%endif
|
||||||
|
|
||||||
%description tools
|
%description tools
|
||||||
Command line tools for BPF Compiler Collection (BCC)
|
Command line tools for BPF Compiler Collection (BCC)
|
||||||
@ -258,10 +254,36 @@ cp -a libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}/
|
|||||||
|
|
||||||
%if %{with libbpf_tools}
|
%if %{with libbpf_tools}
|
||||||
%files -n libbpf-tools
|
%files -n libbpf-tools
|
||||||
|
%ifarch s390x
|
||||||
|
%exclude %{_sbindir}/bpf-numamove
|
||||||
|
%endif
|
||||||
|
# RHEL doesn't provide btrfs or f2fs
|
||||||
|
%exclude %{_sbindir}/bpf-btrfs*
|
||||||
|
%exclude %{_sbindir}/bpf-f2fs*
|
||||||
%{_sbindir}/bpf-*
|
%{_sbindir}/bpf-*
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Jul 04 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-6
|
||||||
|
- Rebuild with LLVM 18 (RHEL-28684)
|
||||||
|
|
||||||
|
* Fri May 31 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-5
|
||||||
|
- Drop python3-pyelftools dependency on s390x until it is available
|
||||||
|
|
||||||
|
* Tue May 21 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-4
|
||||||
|
- Exclude btrfs and f2fs libbpf tools (RHEL-36579)
|
||||||
|
|
||||||
|
* Mon May 20 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-3
|
||||||
|
- Really prevent the loading of compromised headers (RHEL-28769, CVE-2024-2314)
|
||||||
|
- Add python3-pyelftools dependency (RHEL-36583)
|
||||||
|
|
||||||
|
* Fri May 03 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-2
|
||||||
|
- Rebuild (distrobaker didn't take last build)
|
||||||
|
|
||||||
|
* Wed Apr 10 2024 Jerome Marchand <jmarchan@redhat.com> - 0.30.0-1
|
||||||
|
- Rebase bcc to 0.30.0 (RHEL-29031)
|
||||||
|
- Exclude bpf-numamove on s390x (RHEL-32327)
|
||||||
|
|
||||||
* Wed Dec 13 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-5
|
* Wed Dec 13 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-5
|
||||||
- Fix libbpf bio tools (RHEL-19368)
|
- Fix libbpf bio tools (RHEL-19368)
|
||||||
- Add S390x support to libbpf-tools (RHEL-16325)
|
- Add S390x support to libbpf-tools (RHEL-16325)
|
||||||
|
Loading…
Reference in New Issue
Block a user