Import from AlmaLinux stable repository

This commit is contained in:
eabdullin 2024-05-15 07:08:36 +00:00
parent 5157f4fad1
commit e24d4b1193
17 changed files with 122865 additions and 569 deletions

View File

@ -1 +1 @@
059187f62e915eb74ea7b18e19fcb185f9d18255 SOURCES/bcc-0.25.0.tar.gz 8ce0ccb0724da475f127d62acc10a88569956474 SOURCES/bcc-0.28.0.tar.gz

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/bcc-0.25.0.tar.gz SOURCES/bcc-0.28.0.tar.gz

View File

@ -1,77 +0,0 @@
From c17a12ac030c5d9c812e611f8132570af0e795af Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 13 Aug 2022 17:50:07 -0700
Subject: [PATCH 1/2] Fix bpf_pseudo_fd() type conversion error
With llvm15 and llvm16, the following command line
sudo ./trace.py 'smp_call_function_single "%K", arg1'
will cause error:
/virtual/main.c:60:36: error: incompatible integer to pointer conversion passing 'u64'
(aka 'unsigned long long') to parameter of type 'void *' [-Wint-conversion]
bpf_perf_event_output(ctx, bpf_pseudo_fd(1, -1), CUR_CPU_IDENTIFIER, &__data, sizeof(__data));
^~~~~~~~~~~~~~~~~~~~
1 error generated.
Failed to compile BPF module <text>
In helpers.h, we have
u64 bpf_pseudo_fd(u64, u64) asm("llvm.bpf.pseudo");
Apparently, <= llvm14 can tolerate u64 -> 'void *' conversion, but
llvm15 by default will cause an error.
Let us explicitly convert bpf_pseudo_fd to 'void *' to avoid
such errors.
Signed-off-by: Yonghong Song <yhs@fb.com>
---
src/cc/frontends/clang/b_frontend_action.cc | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index a4e05b16..dbeba3e4 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -957,7 +957,7 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
string args_other = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(1)),
GET_ENDLOC(Call->getArg(2)))));
- txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")";
+ txt = "bpf_perf_event_output(" + arg0 + ", (void *)bpf_pseudo_fd(1, " + fd + ")";
txt += ", CUR_CPU_IDENTIFIER, " + args_other + ")";
// e.g.
@@ -986,7 +986,7 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
string meta_len = rewriter_.getRewrittenText(expansionRange(Call->getArg(3)->getSourceRange()));
txt = "bpf_perf_event_output(" +
skb + ", " +
- "bpf_pseudo_fd(1, " + fd + "), " +
+ "(void *)bpf_pseudo_fd(1, " + fd + "), " +
"((__u64)" + skb_len + " << 32) | BPF_F_CURRENT_CPU, " +
meta + ", " +
meta_len + ");";
@@ -1006,12 +1006,12 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
string keyp = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
string flag = rewriter_.getRewrittenText(expansionRange(Call->getArg(2)->getSourceRange()));
txt = "bpf_" + string(memb_name) + "(" + ctx + ", " +
- "bpf_pseudo_fd(1, " + fd + "), " + keyp + ", " + flag + ");";
+ "(void *)bpf_pseudo_fd(1, " + fd + "), " + keyp + ", " + flag + ");";
} else if (memb_name == "ringbuf_output") {
string name = string(Ref->getDecl()->getName());
string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
GET_ENDLOC(Call->getArg(2)))));
- txt = "bpf_ringbuf_output(bpf_pseudo_fd(1, " + fd + ")";
+ txt = "bpf_ringbuf_output((void *)bpf_pseudo_fd(1, " + fd + ")";
txt += ", " + args + ")";
// e.g.
@@ -1033,7 +1033,7 @@ bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
} else if (memb_name == "ringbuf_reserve") {
string name = string(Ref->getDecl()->getName());
string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
- txt = "bpf_ringbuf_reserve(bpf_pseudo_fd(1, " + fd + ")";
+ txt = "bpf_ringbuf_reserve((void *)bpf_pseudo_fd(1, " + fd + ")";
txt += ", " + arg0 + ", 0)"; // Flags in reserve are meaningless
} else if (memb_name == "ringbuf_discard") {
string name = string(Ref->getDecl()->getName());
--
2.38.1

View File

@ -1,96 +0,0 @@
From 9ae3908ae38b3e8d8e36a52c0e5664c453d4c015 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 26 Oct 2022 14:41:54 +0200
Subject: [PATCH 2/2] Fix clang 15 int to pointer conversion errors
Since version 15, clang issues error for implicit conversion of
integer to pointer. Several tools are broken. This patch add explicit
pointer cast where needed.
Fixes the following errors:
/virtual/main.c:37:18: error: incompatible integer to pointer conversion initializing 'struct request *' with an expression of type 'unsigned long' [-Wint-conversion]
struct request *req = ctx->di;
^ ~~~~~~~
/virtual/main.c:49:18: error: incompatible integer to pointer conversion initializing 'struct request *' with an expression of type 'unsigned long' [-Wint-conversion]
struct request *req = ctx->di;
^ ~~~~~~~
2 errors generated.
/virtual/main.c:73:19: error: incompatible integer to pointer conversion initializing 'struct pt_regs *' with an expression of type 'unsigned long' [-Wint-conversion]
struct pt_regs * __ctx = ctx->di;
^ ~~~~~~~
/virtual/main.c:100:240: error: incompatible integer to pointer conversion passing 'u64' (aka 'unsigned long long') to parameter of type 'const void *' [-Wint-conversion]
data.ppid = ({ typeof(pid_t) _val; __builtin_memset(&_val, 0, sizeof(_val)); bpf_probe_read(&_val, sizeof(_val), (u64)&({ typeof(struct task_struct *) _val; __builtin_memset(&_val, 0, sizeof(_val)); bpf_probe_read(&_val, sizeof(_val), (u64)&task->real_parent); _val; })->tgid); _val; });
^~~~~~~~~~~~~~~~~~~~~~~
/virtual/main.c:100:118: error: incompatible integer to pointer conversion passing 'u64' (aka 'unsigned long long') to parameter of type 'const void *' [-Wint-conversion]
data.ppid = ({ typeof(pid_t) _val; __builtin_memset(&_val, 0, sizeof(_val)); bpf_probe_read(&_val, sizeof(_val), (u64)&({ typeof(struct task_struct *) _val; __builtin_memset(&_val, 0, sizeof(_val)); bpf_probe_read(&_val, sizeof(_val), (u64)&task->real_parent); _val; })->tgid); _val; });
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
src/cc/frontends/clang/b_frontend_action.cc | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index dbeba3e4..c0582464 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -517,9 +517,9 @@ bool ProbeVisitor::VisitUnaryOperator(UnaryOperator *E) {
string pre, post;
pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
if (cannot_fall_back_safely)
- pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)";
+ pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)";
else
- pre += " bpf_probe_read(&_val, sizeof(_val), (u64)";
+ pre += " bpf_probe_read(&_val, sizeof(_val), (void *)";
post = "); _val; })";
rewriter_.ReplaceText(expansionLoc(E->getOperatorLoc()), 1, pre);
rewriter_.InsertTextAfterToken(expansionLoc(GET_ENDLOC(sub)), post);
@@ -581,9 +581,9 @@ bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
string pre, post;
pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
if (cannot_fall_back_safely)
- pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)&";
+ pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)&";
else
- pre += " bpf_probe_read(&_val, sizeof(_val), (u64)&";
+ pre += " bpf_probe_read(&_val, sizeof(_val), (void *)&";
post = rhs + "); _val; })";
rewriter_.InsertText(expansionLoc(GET_BEGINLOC(E)), pre);
rewriter_.ReplaceText(expansionRange(SourceRange(member, GET_ENDLOC(E))), post);
@@ -635,9 +635,9 @@ bool ProbeVisitor::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
if (cannot_fall_back_safely)
- pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)((";
+ pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)((";
else
- pre += " bpf_probe_read(&_val, sizeof(_val), (u64)((";
+ pre += " bpf_probe_read(&_val, sizeof(_val), (void *)((";
if (isMemberDereference(base)) {
pre += "&";
// If the base of the array subscript is a member dereference, we'll rewrite
@@ -747,8 +747,8 @@ void BTypeVisitor::genParamDirectAssign(FunctionDecl *D, string& preamble,
arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs"));
size_t d = idx - 1;
const char *reg = calling_conv_regs[d];
- preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" +
- string(reg) + ";";
+ preamble += " " + text + " = (" + arg->getType().getAsString() + ")" +
+ fn_args_[0]->getName().str() + "->" + string(reg) + ";";
}
}
}
@@ -762,7 +762,7 @@ void BTypeVisitor::genParamIndirectAssign(FunctionDecl *D, string& preamble,
if (idx == 0) {
new_ctx = "__" + arg->getName().str();
- preamble += " struct pt_regs * " + new_ctx + " = " +
+ preamble += " struct pt_regs * " + new_ctx + " = (void *)" +
arg->getName().str() + "->" +
string(calling_conv_regs[0]) + ";";
} else {
--
2.38.1

View File

@ -1,222 +0,0 @@
From 2f6565681e627d11dde0177503100669df020684 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Sun, 28 Aug 2022 07:44:01 +0200
Subject: [PATCH] Fix some documentation issues (#4197)
* compactsnoop-manpage: fix the name of the tool in the NAME section
In its manpage, compactsnoop tools is called compacstall in the NAME
section. I don't know where that name comes from, but it should be
compactsnoop.
* dirtop-manpage: use '-d' option in the EXAMPLES section
The mandatory '-d' option of dirtop is missing in the EXAMPLES
section. Copy it from the usage message. Also remove '.py' suffixes.
* funclatency-manpage: fix typo in one of the examples
There is a spurious colon in one of the manpage examples. Remove it.
* tools/killsnoop: add '-s' option in the synopsis of the example file
Commit 33c8b1ac ("Update man page and example file") added '-s' option
to the manpage and an example in the example file, but missed the
sysnopsis in that later case.
* trace-manpage: add missing options (-c,-n,-f and -B) to the synopsis
Copy the full sysopsis from the usage message.
* tcptracer-manpage: add missing '-t' option in the manpage
Add '-t' option to the synopsis and description.
* tcpsubnet-manpage: remove '--ebpf' option from the manpage
This option is explicitly suppressed in argparse and no manpage of
other tools mentions it.
* manpages: remove '.py' suffix from the synopsis of some *snoop tools
Other manpages don't show the suffix, nor do the usage messages.
---
man/man8/bindsnoop.8 | 2 +-
man/man8/compactsnoop.8 | 4 ++--
man/man8/dirtop.8 | 8 ++++----
man/man8/drsnoop.8 | 2 +-
man/man8/funclatency.8 | 2 +-
man/man8/opensnoop.8 | 2 +-
man/man8/tcpsubnet.8 | 5 +----
man/man8/tcptracer.8 | 5 ++++-
man/man8/trace.8 | 6 ++++--
tools/killsnoop_example.txt | 2 ++
10 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/man/man8/bindsnoop.8 b/man/man8/bindsnoop.8
index f8fa1850..0eb42ccb 100644
--- a/man/man8/bindsnoop.8
+++ b/man/man8/bindsnoop.8
@@ -2,7 +2,7 @@
.SH NAME
bindsnoop \- Trace bind() system calls.
.SH SYNOPSIS
-.B bindsnoop.py [\fB-h\fP] [\fB-w\fP] [\fB-t\fP] [\fB-p\fP PID] [\fB-P\fP PORT] [\fB-E\fP] [\fB-U\fP] [\fB-u\fP UID] [\fB--count\fP] [\fB--cgroupmap MAP\fP] [\fB--mntnsmap MNTNSMAP\fP]
+.B bindsnoop [\fB-h\fP] [\fB-w\fP] [\fB-t\fP] [\fB-p\fP PID] [\fB-P\fP PORT] [\fB-E\fP] [\fB-U\fP] [\fB-u\fP UID] [\fB--count\fP] [\fB--cgroupmap MAP\fP] [\fB--mntnsmap MNTNSMAP\fP]
.SH DESCRIPTION
bindsnoop reports socket options set before the bind call that would impact this system call behavior.
.PP
diff --git a/man/man8/compactsnoop.8 b/man/man8/compactsnoop.8
index a2933d7a..e9cde0ce 100644
--- a/man/man8/compactsnoop.8
+++ b/man/man8/compactsnoop.8
@@ -1,8 +1,8 @@
.TH compactsnoop 8 "2019-11-1" "USER COMMANDS"
.SH NAME
-compactstall \- Trace compact zone events. Uses Linux eBPF/bcc.
+compactsnoop \- Trace compact zone events. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B compactsnoop.py [\-h] [\-T] [\-p PID] [\-d DURATION] [\-K] [\-e]
+.B compactsnoop [\-h] [\-T] [\-p PID] [\-d DURATION] [\-K] [\-e]
.SH DESCRIPTION
compactsnoop traces the compact zone events, showing which processes are
allocing pages with memory compaction. This can be useful for discovering
diff --git a/man/man8/dirtop.8 b/man/man8/dirtop.8
index cc61a676..eaa0c0c4 100644
--- a/man/man8/dirtop.8
+++ b/man/man8/dirtop.8
@@ -55,19 +55,19 @@ Number of interval summaries.
.TP
Summarize block device I/O by directory, 1 second screen refresh:
#
-.B dirtop.py
+.B dirtop -d '/hdfs/uuid/*/yarn'
.TP
Don't clear the screen, and top 8 rows only:
#
-.B dirtop.py -Cr 8
+.B dirtop -d '/hdfs/uuid/*/yarn' -Cr 8
.TP
5 second summaries, 10 times only:
#
-.B dirtop.py 5 10
+.B dirtop -d '/hdfs/uuid/*/yarn' 5 10
.TP
Report read & write IOs generated in mutliple yarn and data directories:
#
-.B dirtop.py -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data'
+.B dirtop -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data'
.SH FIELDS
.TP
loadavg:
diff --git a/man/man8/drsnoop.8 b/man/man8/drsnoop.8
index 90ca901f..8fb3789a 100644
--- a/man/man8/drsnoop.8
+++ b/man/man8/drsnoop.8
@@ -2,7 +2,7 @@
.SH NAME
drsnoop \- Trace direct reclaim events. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B drsnoop.py [\-h] [\-T] [\-U] [\-p PID] [\-t TID] [\-u UID] [\-d DURATION] [-n name] [-v]
+.B drsnoop [\-h] [\-T] [\-U] [\-p PID] [\-t TID] [\-u UID] [\-d DURATION] [-n name] [-v]
.SH DESCRIPTION
drsnoop trace direct reclaim events, showing which processes are allocing pages
with direct reclaiming. This can be useful for discovering when allocstall (/p-
diff --git a/man/man8/funclatency.8 b/man/man8/funclatency.8
index 9012b832..f96f6098 100644
--- a/man/man8/funclatency.8
+++ b/man/man8/funclatency.8
@@ -89,7 +89,7 @@ Print the BPF program (for debugging purposes).
.TP
Time vfs_read() for process ID 181 only:
#
-.B funclatency \-p 181 vfs_read:
+.B funclatency \-p 181 vfs_read
.TP
Time both vfs_fstat() and vfs_fstatat() calls, by use of a wildcard:
#
diff --git a/man/man8/opensnoop.8 b/man/man8/opensnoop.8
index fee83263..d1888772 100644
--- a/man/man8/opensnoop.8
+++ b/man/man8/opensnoop.8
@@ -2,7 +2,7 @@
.SH NAME
opensnoop \- Trace open() syscalls. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B opensnoop.py [\-h] [\-T] [\-U] [\-x] [\-p PID] [\-t TID] [\-u UID]
+.B opensnoop [\-h] [\-T] [\-U] [\-x] [\-p PID] [\-t TID] [\-u UID]
[\-d DURATION] [\-n NAME] [\-e] [\-f FLAG_FILTER]
[--cgroupmap MAPPATH] [--mntnsmap MAPPATH]
.SH DESCRIPTION
diff --git a/man/man8/tcpsubnet.8 b/man/man8/tcpsubnet.8
index 525b8082..ad5f1be1 100644
--- a/man/man8/tcpsubnet.8
+++ b/man/man8/tcpsubnet.8
@@ -2,7 +2,7 @@
.SH NAME
tcpsubnet \- Summarize and aggregate IPv4 TCP traffic by subnet.
.SH SYNOPSIS
-.B tcpsubnet [\-h] [\-v] [\--ebpf] [\-J] [\-f FORMAT] [\-i INTERVAL] [subnets]
+.B tcpsubnet [\-h] [\-v] [\-J] [\-f FORMAT] [\-i INTERVAL] [subnets]
.SH DESCRIPTION
This tool summarizes and aggregates IPv4 TCP sent to the subnets
passed in argument and prints to stdout on a fixed interval.
@@ -35,9 +35,6 @@ Interval between updates, seconds (default 1).
Format output units. Supported values are bkmBKM. When using
kmKM the output will be rounded to floor.
.TP
-\--ebpf
-Prints the BPF program.
-.TP
subnets
Comma separated list of subnets. Traffic will be categorized
in theses subnets. Order matters.
diff --git a/man/man8/tcptracer.8 b/man/man8/tcptracer.8
index 59240f4b..19a6164d 100644
--- a/man/man8/tcptracer.8
+++ b/man/man8/tcptracer.8
@@ -2,7 +2,7 @@
.SH NAME
tcptracer \- Trace TCP established connections. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B tcptracer [\-h] [\-v] [\-p PID] [\-N NETNS] [\-\-cgroupmap MAPPATH] [--mntnsmap MAPPATH] [\-4 | \-6]
+.B tcptracer [\-h] [\-v] [-t] [\-p PID] [\-N NETNS] [\-\-cgroupmap MAPPATH] [--mntnsmap MAPPATH] [\-4 | \-6]
.SH DESCRIPTION
This tool traces established TCP connections that open and close while tracing,
and prints a line of output per connect, accept and close events. This includes
@@ -23,6 +23,9 @@ Print usage message.
\-v
Print full lines, with long event type names and network namespace numbers.
.TP
+\-t
+Include timestamp on output
+.TP
\-p PID
Trace this process ID only (filtered in-kernel).
.TP
diff --git a/man/man8/trace.8 b/man/man8/trace.8
index c4417e5f..64a5e799 100644
--- a/man/man8/trace.8
+++ b/man/man8/trace.8
@@ -2,9 +2,11 @@
.SH NAME
trace \- Trace a function and print its arguments or return value, optionally evaluating a filter. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B trace [-h] [-b BUFFER_PAGES] [-p PID] [-L TID] [--uid UID] [-v] [-Z STRING_SIZE] [-S] [-s SYM_FILE_LIST]
- [-M MAX_EVENTS] [-t] [-u] [-T] [-C] [-K] [-U] [-a] [-I header] [-A]
+.B trace [-h] [-b BUFFER_PAGES] [-p PID] [-L TID] [--uid UID] [-v] [-Z STRING_SIZE] [-S] [-M MAX_EVENTS] [-t]
+ [-u] [-T] [-C] [-c CGROUP_PATH] [-n NAME] [-f MSG_FILTER] [-B] [-s SYM_FILE_LIST] [-K] [-U] [-a]
+ [-I header] [-A]
probe [probe ...]
+
.SH DESCRIPTION
trace probes functions you specify and displays trace messages if a particular
condition is met. You can control the message format to display function
diff --git a/tools/killsnoop_example.txt b/tools/killsnoop_example.txt
index 7746f2a0..038d09c6 100644
--- a/tools/killsnoop_example.txt
+++ b/tools/killsnoop_example.txt
@@ -27,6 +27,8 @@ Trace signals issued by the kill() syscall
-h, --help show this help message and exit
-x, --failed only show failed kill syscalls
-p PID, --pid PID trace this PID only
+ -s SIGNAL, --signal SIGNAL
+ trace this signal only
examples:
./killsnoop # trace all kill() signals
--
2.38.1

View File

@ -1,100 +0,0 @@
From 2e14fbaf9105e0b504f243ffc6d7d5a16e13a2a7 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 14 Oct 2022 13:01:58 +0000
Subject: [PATCH] bcc: support building with external libbpf package and older
uapi linux/bpf.h
When building bcc with a relatively new packaged libbpf (0.8.1)
and -DCMAKE_USE_LIBBPF_PACKAGE:BOOL=TRUE, multiple compilation
failures are encountered due the fact the system uapi header
in /usr/include/linux/bpf.h is not very recent (this is often
the case for distros, which sync it via a kernel headers
package quite conservatively due to use by glibc).
With libbpf built via git submodule, the uapi header included in
the libbpf package is used, so here a similar approach is proposed
for the external package build. Instead of having to sync
another file the already present compat/linux/virtual_bpf.h
is used; we copy it to compat/linux/bpf.h (eliminating the
string prefix/suffix on first/last lines).
From there, we ensure that places that assume the presence of
the libbpf git submodule point at compat/ as a location to
find the uapi header.
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
examples/cpp/CMakeLists.txt | 4 ++++
introspection/CMakeLists.txt | 4 ++++
src/cc/CMakeLists.txt | 6 ++++++
tests/cc/CMakeLists.txt | 4 ++++
4 files changed, 18 insertions(+)
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 801e6bad..8d09ae11 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -4,7 +4,11 @@
include_directories(${PROJECT_BINARY_DIR}/src/cc)
include_directories(${PROJECT_SOURCE_DIR}/src/cc)
include_directories(${PROJECT_SOURCE_DIR}/src/cc/api)
+if (CMAKE_USE_LIBBPF_PACKAGE AND LIBBPF_FOUND)
+include_directories(${PROJECT_SOURCE_DIR}/src/cc/compat)
+else()
include_directories(${PROJECT_SOURCE_DIR}/src/cc/libbpf/include/uapi)
+endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
diff --git a/introspection/CMakeLists.txt b/introspection/CMakeLists.txt
index dcbe69a3..ce2d03dc 100644
--- a/introspection/CMakeLists.txt
+++ b/introspection/CMakeLists.txt
@@ -3,7 +3,11 @@
include_directories(${PROJECT_SOURCE_DIR}/src/cc)
include_directories(${PROJECT_SOURCE_DIR}/src/cc/api)
+if (CMAKE_USE_LIBBPF_PACKAGE AND LIBBPF_FOUND)
+include_directories(${PROJECT_SOURCE_DIR}/src/cc/compat)
+else()
include_directories(${PROJECT_SOURCE_DIR}/src/cc/libbpf/include/uapi)
+endif()
option(INSTALL_INTROSPECTION "Install BPF introspection tools" ON)
option(BPS_LINK_RT "Pass -lrt to linker when linking bps tool" ON)
diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt
index ffe8feec..c7f53530 100644
--- a/src/cc/CMakeLists.txt
+++ b/src/cc/CMakeLists.txt
@@ -15,6 +15,12 @@ endif (LIBDEBUGINFOD_FOUND)
# todo: if check for kernel version
if (CMAKE_USE_LIBBPF_PACKAGE AND LIBBPF_FOUND)
include_directories(${LIBBPF_INCLUDE_DIRS})
+ # create up-to-date linux/bpf.h from virtual_bpf.h (remove string wrapper);
+ # when libbpf is built as a submodule we use its version of linux/bpf.h
+ # so this does similar for the libbpf package, removing reliance on the
+ # system uapi header which can be out of date.
+ execute_process(COMMAND sh -c "cd ${CMAKE_CURRENT_SOURCE_DIR}/compat/linux && grep -ve '\\*\\*\\*\\*' virtual_bpf.h > bpf.h")
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/compat)
else()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libbpf/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libbpf/include/uapi)
diff --git a/tests/cc/CMakeLists.txt b/tests/cc/CMakeLists.txt
index 677867d7..47056455 100644
--- a/tests/cc/CMakeLists.txt
+++ b/tests/cc/CMakeLists.txt
@@ -3,7 +3,11 @@
include_directories(${PROJECT_SOURCE_DIR}/src/cc)
include_directories(${PROJECT_SOURCE_DIR}/src/cc/api)
+if (CMAKE_USE_LIBBPF_PACKAGE AND LIBBPF_FOUND)
+include_directories(${PROJECT_SOURCE_DIR}/src/cc/compat)
+else()
include_directories(${PROJECT_SOURCE_DIR}/src/cc/libbpf/include/uapi)
+endif()
include_directories(${PROJECT_SOURCE_DIR}/tests/python/include)
add_executable(test_static test_static.c)
--
2.37.3

View File

@ -1,64 +0,0 @@
From acc8800b6f4380b6f4c7f04ee9a1263cf11deb35 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Tue, 20 Dec 2022 11:33:51 +0100
Subject: [PATCH] tools: nfsslower: fix an uninitialized struct error
Fixes the following error:
bpf: Failed to load program: Permission denied
reg type unsupported for arg#0 function trace_read_return#22
0: R1=ctx(off=0,imm=0) R10=fp0
; int trace_read_return(struct pt_regs *ctx)
0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0)
[...]
; bpf_probe_read_kernel(&data.file, sizeof(data.file), (void *)qs.name);
75: (b7) r2 = 32 ; R2_w=32
76: (85) call bpf_probe_read_kernel#113 ; R0_w=scalar() fp-16=????mmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm
; bpf_perf_event_output(ctx, (void *)bpf_pseudo_fd(1, -2), CUR_CPU_IDENTIFIER, &data, sizeof(data));
77: (18) r2 = 0xffff9a0a42177200 ; R2_w=map_ptr(off=0,ks=4,vs=4,imm=0)
79: (bf) r4 = r10 ; R4_w=fp0 R10=fp0
; bpf_probe_read_kernel(&data.file, sizeof(data.file), (void *)qs.name);
80: (07) r4 += -104 ; R4_w=fp-104
; bpf_perf_event_output(ctx, (void *)bpf_pseudo_fd(1, -2), CUR_CPU_IDENTIFIER, &data, sizeof(data));
81: (bf) r1 = r6 ; R1_w=ctx(off=0,imm=0) R6=ctx(off=0,imm=0)
82: (18) r3 = 0xffffffff ; R3_w=4294967295
84: (b7) r5 = 96 ; R5_w=96
85: (85) call bpf_perf_event_output#25
invalid indirect read from stack R4 off -104+92 size 96
processed 82 insns (limit 1000000) max_states_per_insn 0 total_states 4 peak_states 4 mark_read 3
Traceback (most recent call last):
File "/usr/share/bcc/tools/nfsslower", line 283, in <module>
b.attach_kretprobe(event="nfs_file_read", fn_name="trace_read_return")
File "/usr/lib/python3.9/site-packages/bcc/__init__.py", line 872, in attach_kretprobe
fn = self.load_func(fn_name, BPF.KPROBE)
File "/usr/lib/python3.9/site-packages/bcc/__init__.py", line 523, in load_func
raise Exception("Failed to load BPF program %s: %s" %
Exception: Failed to load BPF program b'trace_read_return': Permission denied
---
tools/nfsslower.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/tools/nfsslower.py b/tools/nfsslower.py
index b5df8f19..682a3fb7 100755
--- a/tools/nfsslower.py
+++ b/tools/nfsslower.py
@@ -179,8 +179,12 @@ static int trace_exit(struct pt_regs *ctx, int type)
// populate output struct
u32 size = PT_REGS_RC(ctx);
- struct data_t data = {.type = type, .size = size, .delta_us = delta_us,
- .pid = pid};
+ struct data_t data;
+ __builtin_memset(&data, 0, sizeof(data));
+ data.type = type;
+ data.size = size;
+ data.delta_us = delta_us;
+ data.pid = pid;
data.ts_us = ts / 1000;
data.offset = valp->offset;
bpf_get_current_comm(&data.task, sizeof(data.task));
--
2.38.1

View File

@ -0,0 +1,66 @@
From 63808fbdcb70ce2e858db0a42e7e3eeec153d5b6 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Wed, 20 Sep 2023 10:37:38 -0400
Subject: [PATCH 4/4] Adding memory zones for Power server
config PPC_BOOK3S_64 skips setting ZONE_DMA for
server processor. NORMAL and MOVABLE zones are
available on Power.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/compactsnoop.py | 28 +++++++++++++++++++---------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/tools/compactsnoop.py b/tools/compactsnoop.py
index 2b395dec..1a476aad 100755
--- a/tools/compactsnoop.py
+++ b/tools/compactsnoop.py
@@ -260,11 +260,12 @@ TRACEPOINT_PROBE(compaction, mm_compaction_end)
}
"""
-if platform.machine() != 'x86_64':
+if platform.machine() != 'x86_64' and platform.machine() != 'ppc64le':
print("""
- Currently only support x86_64 servers, if you want to use it on
- other platforms, please refer include/linux/mmzone.h to modify
- zone_idex_to_str to get the right zone type
+ Currently only support x86_64 and power servers, if you want
+ to use it on other platforms(including power embedded processors),
+ please refer include/linux/mmzone.h to modify zone_idex_to_str to
+ get the right zone type
""")
exit()
@@ -296,13 +297,22 @@ initial_ts = 0
# from include/linux/mmzone.h
# NOTICE: consider only x86_64 servers
zone_type = {
- 0: "ZONE_DMA",
- 1: "ZONE_DMA32",
- 2: "ZONE_NORMAL",
+ 'x86_64':
+ {
+ 0: "ZONE_DMA",
+ 1: "ZONE_DMA32",
+ 2: "ZONE_NORMAL"
+ },
+ # Zones in Power server only
+ 'ppc64le':
+ {
+ 0: "ZONE_NORMAL",
+ 1: "ZONE_MOVABLE"
+ }
}
- if idx in zone_type:
- return zone_type[idx]
+ if idx in zone_type[platform.machine()]:
+ return zone_type[platform.machine()][idx]
else:
return str(idx)
--
2.43.0

View File

@ -0,0 +1,45 @@
From e6493835a28c08c45fd374e70dba7aa66f700d08 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Tue, 14 Nov 2023 03:54:19 -0500
Subject: [PATCH 2/4] Fixing pvalloc memleak test
Request to allocate 30K bytes using pvalloc(), results
in allocating 3*64Kb(on 64Kb pagesize system). The assertion
expects leak to be 30Kb, whereas leaked memory is much more
due to pvalloc's implementation for power.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tests/python/test_tools_memleak.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tests/python/test_tools_memleak.py b/tests/python/test_tools_memleak.py
index cae7e35d..4e921a0c 100755
--- a/tests/python/test_tools_memleak.py
+++ b/tests/python/test_tools_memleak.py
@@ -3,6 +3,7 @@
from unittest import main, skipUnless, TestCase
from utils import kernel_version_ge
import os
+import platform
import subprocess
import sys
import tempfile
@@ -102,7 +103,13 @@ TOOLS_DIR = "/bcc/tools/"
self.assertEqual(cfg.leaking_amount, self.run_leaker("memalign"))
def test_pvalloc(self):
- self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
+ # pvalloc's implementation for power invokes mmap(), which adjusts the
+ # allocated size to meet pvalloc's constraints. Actual leaked memory
+ # could be more than requested, hence assertLessEqual.
+ if platform.machine() == 'ppc64le':
+ self.assertLessEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
+ else:
+ self.assertEqual(cfg.leaking_amount, self.run_leaker("pvalloc"))
def test_aligned_alloc(self):
self.assertEqual(cfg.leaking_amount, self.run_leaker("aligned_alloc"))
--
2.43.0

View File

@ -0,0 +1,41 @@
From a5d86850e3bfeaa23ef4c82dccb9288a2cd42a27 Mon Sep 17 00:00:00 2001
From: Abhishek Dubey <adubey@linux.ibm.com>
Date: Mon, 11 Sep 2023 05:10:36 -0400
Subject: [PATCH 3/4] Skipping USDT tests for Power processor
Support for Power processor in folly package is absent,
so skipping USDT tests having dependency on folly.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tests/python/CMakeLists.txt | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt
index a42a16ce..81a547f0 100644
--- a/tests/python/CMakeLists.txt
+++ b/tests/python/CMakeLists.txt
@@ -71,12 +71,14 @@ add_test(NAME py_test_tools_smoke WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_tools_smoke sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_smoke.py)
add_test(NAME py_test_tools_memleak WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_tools_memleak sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_tools_memleak.py)
-add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
-add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
-add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
+if(NOT(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64"))
+ add_test(NAME py_test_usdt WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt.py)
+ add_test(NAME py_test_usdt2 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt2 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt2.py)
+ add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
+endif()
add_test(NAME py_test_license WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_license sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_license.py)
add_test(NAME py_test_free_bcc_memory WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
--
2.43.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,476 @@
From 60860bf3a400dcf72b4026fb2973803cfb12ccf1 Mon Sep 17 00:00:00 2001
From: mickey_zhu <mickey_zhu@realsil.com.cn>
Date: Tue, 27 Jun 2023 16:32:44 +0800
Subject: [PATCH] libbpf-tools: add block_io_{start,done} tracepoints support
to bio tools
Some bio tools fail to kprobe blk_account_io_{start,done} after v5.17,
because they become inlined, see [0]. To fix this issue, tracepoints
blick_io_{start,done} are introcuded in kernel, see[1].
Update related bio tools to support new tracepoints, and also simplify
attach.
[0] Kernel commit 450b7879e345 (block: move blk_account_io_{start,done} to blk-mq.c)
[1] Kernel commit 5a80bd075f3b (block: introduce block_io_start/block_io_done tracepoints)
Change-Id: I62b957abd7ce2901eb114bd57c78938e4f083e4d
Signed-off-by: Mickey Zhu <mickey_zhu@realsil.com.cn>
---
libbpf-tools/biosnoop.bpf.c | 9 ++++
libbpf-tools/biosnoop.c | 78 +++++++++++++--------------------
libbpf-tools/biostacks.bpf.c | 46 +++++++++++++------
libbpf-tools/biostacks.c | 85 +++++++++++++++++++++---------------
libbpf-tools/biotop.bpf.c | 44 +++++++++++++++++--
libbpf-tools/biotop.c | 59 ++++++++++++++++---------
6 files changed, 199 insertions(+), 122 deletions(-)
diff --git a/libbpf-tools/biosnoop.bpf.c b/libbpf-tools/biosnoop.bpf.c
index b791555f..fcc5c5ce 100644
--- a/libbpf-tools/biosnoop.bpf.c
+++ b/libbpf-tools/biosnoop.bpf.c
@@ -76,6 +76,15 @@ int BPF_PROG(blk_account_io_start, struct request *rq)
return trace_pid(rq);
}
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *rq)
+{
+ if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
+ return 0;
+
+ return trace_pid(rq);
+}
+
SEC("kprobe/blk_account_io_merge_bio")
int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
{
diff --git a/libbpf-tools/biosnoop.c b/libbpf-tools/biosnoop.c
index 21773729..f9468900 100644
--- a/libbpf-tools/biosnoop.c
+++ b/libbpf-tools/biosnoop.c
@@ -212,6 +212,16 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
}
+static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
+{
+ if (fentry_can_attach("blk_account_io_start", NULL))
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "blk_account_io_start");
+ else
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "__blk_account_io_start");
+}
+
int main(int argc, char **argv)
{
const struct partition *partition;
@@ -260,12 +270,23 @@ int main(int argc, char **argv)
obj->rodata->filter_cg = env.cg;
obj->rodata->min_ns = env.min_lat_ms * 1000000;
- if (fentry_can_attach("blk_account_io_start", NULL))
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "blk_account_io_start");
- else
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "__blk_account_io_start");
+ if (tracepoint_exists("block", "block_io_start"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ else {
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ blk_account_io_set_attach_target(obj);
+ }
+
+ ksyms = ksyms__load();
+ if (!ksyms) {
+ fprintf(stderr, "failed to load kallsyms\n");
+ goto cleanup;
+ }
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
+
+ if (!env.queued)
+ bpf_program__set_autoload(obj->progs.block_rq_insert, false);
err = biosnoop_bpf__load(obj);
if (err) {
@@ -288,48 +309,9 @@ int main(int argc, char **argv)
}
}
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
- if (!obj->links.blk_account_io_start) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
- strerror(-err));
- goto cleanup;
- }
- ksyms = ksyms__load();
- if (!ksyms) {
- err = -ENOMEM;
- fprintf(stderr, "failed to load kallsyms\n");
- goto cleanup;
- }
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
- obj->links.blk_account_io_merge_bio =
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
- if (!obj->links.blk_account_io_merge_bio) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
- strerror(-err));
- goto cleanup;
- }
- }
- if (env.queued) {
- obj->links.block_rq_insert =
- bpf_program__attach(obj->progs.block_rq_insert);
- if (!obj->links.block_rq_insert) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
- goto cleanup;
- }
- }
- obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
- if (!obj->links.block_rq_issue) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
- goto cleanup;
- }
- obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
- if (!obj->links.block_rq_complete) {
- err = -errno;
- fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
+ err = biosnoop_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
goto cleanup;
}
diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c
index c3950910..0ca69880 100644
--- a/libbpf-tools/biostacks.bpf.c
+++ b/libbpf-tools/biostacks.bpf.c
@@ -67,20 +67,8 @@ int trace_start(void *ctx, struct request *rq, bool merge_bio)
return 0;
}
-SEC("fentry/blk_account_io_start")
-int BPF_PROG(blk_account_io_start, struct request *rq)
-{
- return trace_start(ctx, rq, false);
-}
-
-SEC("kprobe/blk_account_io_merge_bio")
-int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
-{
- return trace_start(ctx, rq, true);
-}
-
-SEC("fentry/blk_account_io_done")
-int BPF_PROG(blk_account_io_done, struct request *rq)
+static __always_inline
+int trace_done(void *ctx, struct request *rq)
{
u64 slot, ts = bpf_ktime_get_ns();
struct internal_rqinfo *i_rqinfop;
@@ -110,4 +98,34 @@ int BPF_PROG(blk_account_io_done, struct request *rq)
return 0;
}
+SEC("kprobe/blk_account_io_merge_bio")
+int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
+{
+ return trace_start(ctx, rq, true);
+}
+
+SEC("fentry/blk_account_io_start")
+int BPF_PROG(blk_account_io_start, struct request *rq)
+{
+ return trace_start(ctx, rq, false);
+}
+
+SEC("fentry/blk_account_io_done")
+int BPF_PROG(blk_account_io_done, struct request *rq)
+{
+ return trace_done(ctx, rq);
+}
+
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *rq)
+{
+ return trace_start(ctx, rq, false);
+}
+
+SEC("tp_btf/block_io_done")
+int BPF_PROG(block_io_done, struct request *rq)
+{
+ return trace_done(ctx, rq);
+}
+
char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c
index e1878d1f..e7875f76 100644
--- a/libbpf-tools/biostacks.c
+++ b/libbpf-tools/biostacks.c
@@ -128,6 +128,39 @@ void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd)
return;
}
+static bool has_block_io_tracepoints(void)
+{
+ return tracepoint_exists("block", "block_io_start") &&
+ tracepoint_exists("block", "block_io_done");
+}
+
+static void disable_block_io_tracepoints(struct biostacks_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
+}
+
+static void disable_blk_account_io_fentry(struct biostacks_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+}
+
+static void blk_account_io_set_attach_target(struct biostacks_bpf *obj)
+{
+ if (fentry_can_attach("blk_account_io_start", NULL)) {
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "blk_account_io_start");
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
+ 0, "blk_account_io_done");
+ } else {
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
+ 0, "__blk_account_io_start");
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
+ 0, "__blk_account_io_done");
+ }
+}
+
int main(int argc, char **argv)
{
struct partitions *partitions = NULL;
@@ -172,50 +205,30 @@ int main(int argc, char **argv)
obj->rodata->targ_ms = env.milliseconds;
- if (fentry_can_attach("blk_account_io_start", NULL)) {
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "blk_account_io_start");
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
- "blk_account_io_done");
- } else {
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
- "__blk_account_io_start");
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
- "__blk_account_io_done");
- }
-
- err = biostacks_bpf__load(obj);
- if (err) {
- fprintf(stderr, "failed to load BPF object: %d\n", err);
- goto cleanup;
+ if (has_block_io_tracepoints())
+ disable_blk_account_io_fentry(obj);
+ else {
+ disable_block_io_tracepoints(obj);
+ blk_account_io_set_attach_target(obj);
}
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
- if (!obj->links.blk_account_io_start) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(-err));
- goto cleanup;
- }
ksyms = ksyms__load();
if (!ksyms) {
fprintf(stderr, "failed to load kallsyms\n");
goto cleanup;
}
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
- obj->links.blk_account_io_merge_bio =
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
- if (!obj->links.blk_account_io_merge_bio) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
- strerror(-err));
- goto cleanup;
- }
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
+
+ err = biostacks_bpf__load(obj);
+ if (err) {
+ fprintf(stderr, "failed to load BPF object: %d\n", err);
+ goto cleanup;
}
- obj->links.blk_account_io_done = bpf_program__attach(obj->progs.blk_account_io_done);
- if (!obj->links.blk_account_io_done) {
- err = -errno;
- fprintf(stderr, "failed to attach blk_account_io_done: %s\n",
- strerror(-err));
+
+ err = biostacks_bpf__attach(obj);
+ if (err) {
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
goto cleanup;
}
diff --git a/libbpf-tools/biotop.bpf.c b/libbpf-tools/biotop.bpf.c
index 226e32d3..07631378 100644
--- a/libbpf-tools/biotop.bpf.c
+++ b/libbpf-tools/biotop.bpf.c
@@ -30,8 +30,8 @@ struct {
__type(value, struct val_t);
} counts SEC(".maps");
-SEC("kprobe")
-int BPF_KPROBE(blk_account_io_start, struct request *req)
+static __always_inline
+int trace_start(struct request *req)
{
struct who_t who = {};
@@ -56,8 +56,8 @@ int BPF_KPROBE(blk_mq_start_request, struct request *req)
return 0;
}
-SEC("kprobe")
-int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
+static __always_inline
+int trace_done(struct request *req)
{
struct val_t *valp, zero = {};
struct info_t info = {};
@@ -103,4 +103,40 @@ int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
return 0;
}
+SEC("kprobe/blk_account_io_start")
+int BPF_KPROBE(blk_account_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("kprobe/blk_account_io_done")
+int BPF_KPROBE(blk_account_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
+SEC("kprobe/__blk_account_io_start")
+int BPF_KPROBE(__blk_account_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("kprobe/__blk_account_io_done")
+int BPF_KPROBE(__blk_account_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
+SEC("tp_btf/block_io_start")
+int BPF_PROG(block_io_start, struct request *req)
+{
+ return trace_start(req);
+}
+
+SEC("tp_btf/block_io_done")
+int BPF_PROG(block_io_done, struct request *req)
+{
+ return trace_done(req);
+}
+
char LICENSE[] SEC("license") = "GPL";
diff --git a/libbpf-tools/biotop.c b/libbpf-tools/biotop.c
index 75484281..5b3a7cf3 100644
--- a/libbpf-tools/biotop.c
+++ b/libbpf-tools/biotop.c
@@ -354,6 +354,38 @@ static int print_stat(struct biotop_bpf *obj)
return err;
}
+static bool has_block_io_tracepoints(void)
+{
+ return tracepoint_exists("block", "block_io_start") &&
+ tracepoint_exists("block", "block_io_done");
+}
+
+static void disable_block_io_tracepoints(struct biotop_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
+}
+
+static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
+}
+
+static void blk_account_io_set_autoload(struct biotop_bpf *obj,
+ struct ksyms *ksyms)
+{
+ if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
+ } else {
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
+ }
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -386,32 +418,19 @@ int main(int argc, char **argv)
goto cleanup;
}
+ if (has_block_io_tracepoints())
+ disable_blk_account_io_kprobes(obj);
+ else {
+ disable_block_io_tracepoints(obj);
+ blk_account_io_set_autoload(obj, ksyms);
+ }
+
err = biotop_bpf__load(obj);
if (err) {
warn("failed to load BPF object: %d\n", err);
goto cleanup;
}
- if (ksyms__get_symbol(ksyms, "__blk_account_io_start"))
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "__blk_account_io_start");
- else
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "blk_account_io_start");
-
- if (!obj->links.blk_account_io_start) {
- warn("failed to load attach blk_account_io_start\n");
- goto cleanup;
- }
-
- if (ksyms__get_symbol(ksyms, "__blk_account_io_done"))
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "__blk_account_io_done");
- else
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "blk_account_io_done");
-
- if (!obj->links.blk_account_io_done) {
- warn("failed to load attach blk_account_io_done\n");
- goto cleanup;
- }
-
err = biotop_bpf__attach(obj);
if (err) {
warn("failed to attach BPF programs: %d\n", err);
--
2.41.0

View File

@ -0,0 +1,855 @@
From 2e758b65231f976c67a0aad791aabc7927ea7086 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Thu, 27 Jul 2023 18:19:18 +0200
Subject: [PATCH] tools: Add support for the new block_io_* tracepoints
The bio tools currently depends on blk_account_io_done/start functions
that can be inlined. To fix that, a couple of tracepoints have been
added upstream (block:block_io_start/done). This patch add the support
for those tracepoints when they are available.
Unfortunately, the bio tools relies on data that is not available to
the tracepoints (mostly the struct request). So the tracepoints can't
be used as drop in replacement for blk_account_io_*. Main difference,
is that we can't use the struct request as the hash key anymore, so it
now uses the couple (dev_t, sector) for that purpose.
For the biolatency tool, the -F option is disabled when only the
tracepoints are available because the flags are not all accessible
from the tracepoints. Otherwise, all features of the tools should
remain.
Closes #4261
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/biolatency.py | 166 ++++++++++++++++++++++++++++--------
tools/biosnoop.py | 200 +++++++++++++++++++++++++++++++++-----------
tools/biotop.py | 108 +++++++++++++++++++-----
3 files changed, 371 insertions(+), 103 deletions(-)
diff --git a/tools/biolatency.py b/tools/biolatency.py
index 8fe43a7c..03b48a4c 100755
--- a/tools/biolatency.py
+++ b/tools/biolatency.py
@@ -11,6 +11,7 @@
#
# 20-Sep-2015 Brendan Gregg Created this.
# 31-Mar-2022 Rocky Xing Added disk filter support.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -72,7 +73,7 @@ bpf_text = """
#include <linux/blk-mq.h>
typedef struct disk_key {
- char disk[DISK_NAME_LEN];
+ dev_t dev;
u64 slot;
} disk_key_t;
@@ -86,26 +87,70 @@ typedef struct ext_val {
u64 count;
} ext_val_t;
-BPF_HASH(start, struct request *);
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct start_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
+ CMD_FLAGS
+};
+
+BPF_HASH(start, struct start_key);
STORAGE
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
// time block I/O
-int trace_req_start(struct pt_regs *ctx, struct request *req)
+static int __trace_req_start(struct start_key key)
{
DISK_FILTER
u64 ts = bpf_ktime_get_ns();
- start.update(&req, &ts);
+ start.update(&key, &ts);
return 0;
}
+int trace_req_start(struct pt_regs *ctx, struct request *req)
+{
+ struct start_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ SET_FLAGS
+
+ return __trace_req_start(key);
+}
+
+int trace_req_start_tp(struct tp_args *args)
+{
+ struct start_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_start(key);
+}
+
// output
-int trace_req_done(struct pt_regs *ctx, struct request *req)
+static int __trace_req_done(struct start_key key)
{
u64 *tsp, delta;
// fetch timestamp and calculate delta
- tsp = start.lookup(&req);
+ tsp = start.lookup(&key);
if (tsp == 0) {
return 0; // missed issue
}
@@ -116,9 +161,31 @@ int trace_req_done(struct pt_regs *ctx, struct request *req)
// store as histogram
STORE
- start.delete(&req);
+ start.delete(&key);
return 0;
}
+
+int trace_req_done(struct pt_regs *ctx, struct request *req)
+{
+ struct start_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ SET_FLAGS
+
+ return __trace_req_done(key);
+}
+
+int trace_req_done_tp(struct tp_args *args)
+{
+ struct start_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_done(key);
+}
"""
# code substitutions
@@ -134,21 +201,18 @@ store_str = ""
if args.disks:
storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
disks_str = """
- disk_key_t key = {.slot = bpf_log2l(delta)};
- void *__tmp = (void *)req->__RQ_DISK__->disk_name;
- bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
- dist.atomic_increment(key);
+ disk_key_t dkey = {};
+ dkey.dev = key.dev;
+ dkey.slot = bpf_log2l(delta);
+ dist.atomic_increment(dkey);
"""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
- else:
- store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
+ store_str += disks_str
elif args.flags:
storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
store_str += """
- flag_key_t key = {.slot = bpf_log2l(delta)};
- key.flags = req->cmd_flags;
- dist.atomic_increment(key);
+ flag_key_t fkey = {.slot = bpf_log2l(delta)};
+ fkey.flags = key.flags;
+ dist.atomic_increment(fkey);
"""
else:
storage_str += "BPF_HISTOGRAM(dist);"
@@ -161,21 +225,13 @@ store_str = ""
exit(1)
stat_info = os.stat(disk_path)
- major = os.major(stat_info.st_rdev)
- minor = os.minor(stat_info.st_rdev)
-
- disk_field_str = ""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- disk_field_str = 'req->rq_disk'
- else:
- disk_field_str = 'req->q->disk'
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
disk_filter_str = """
- struct gendisk *disk = %s;
- if (!(disk->major == %d && disk->first_minor == %d)) {
+ if(key.dev != %s) {
return 0;
}
- """ % (disk_field_str, major, minor)
+ """ % (dev)
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
else:
@@ -194,6 +250,16 @@ store_str = ""
bpf_text = bpf_text.replace("STORAGE", storage_str)
bpf_text = bpf_text.replace("STORE", store_str)
+if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
+else:
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
+if args.flags:
+ bpf_text = bpf_text.replace('CMD_FLAGS', 'u64 flags;')
+ bpf_text = bpf_text.replace('SET_FLAGS', 'key.flags = req->cmd_flags;')
+else:
+ bpf_text = bpf_text.replace('CMD_FLAGS', '')
+ bpf_text = bpf_text.replace('SET_FLAGS', '')
if debug or args.ebpf:
print(bpf_text)
@@ -205,25 +271,53 @@ b = BPF(text=bpf_text)
if args.queued:
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
- else:
+ elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
+ else:
+ if args.flags:
+ # Some flags are accessible in the rwbs field (RAHEAD, SYNC and META)
+ # but other aren't. Disable the -F option for tracepoint for now.
+ print("ERROR: blk_account_io_start probe not available. Can't use -F.")
+ exit()
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_req_start_tp")
else:
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
+
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
+else:
+ if args.flags:
+ print("ERROR: blk_account_io_done probe not available. Can't use -F.")
+ exit()
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_done_tp")
+
if not args.json:
print("Tracing block device I/O... Hit Ctrl-C to end.")
-def disk_print(s):
- disk = s.decode('utf-8', 'replace')
- if not disk:
- disk = "<unknown>"
- return disk
+# cache disk major,minor -> diskname
+diskstats = "/proc/diskstats"
+disklookup = {}
+with open(diskstats) as stats:
+ for line in stats:
+ a = line.split()
+ disklookup[a[0] + "," + a[1]] = a[2]
+
+def disk_print(d):
+ major = d >> 20
+ minor = d & ((1 << 20) - 1)
+
+ disk = str(major) + "," + str(minor)
+ if disk in disklookup:
+ diskname = disklookup[disk]
+ else:
+ diskname = "?"
+
+ return diskname
# see blk_fill_rwbs():
req_opf = {
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
index 33703233..f0fef98b 100755
--- a/tools/biosnoop.py
+++ b/tools/biosnoop.py
@@ -14,6 +14,7 @@
# 11-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
# 21-Jun-2022 Rocky Xing Added disk filter support.
# 13-Oct-2022 Rocky Xing Added support for displaying block I/O pattern.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -64,6 +65,24 @@ struct val_t {
char name[TASK_COMM_LEN];
};
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct hash_key {
+ dev_t dev;
+ u32 rwflag;
+ sector_t sector;
+};
+
+
#ifdef INCLUDE_PATTERN
struct sector_key_t {
u32 dev_major;
@@ -79,6 +98,7 @@ enum bio_pattern {
struct data_t {
u32 pid;
+ u32 dev;
u64 rwflag;
u64 delta;
u64 qdelta;
@@ -88,7 +108,6 @@ struct data_t {
enum bio_pattern pattern;
#endif
u64 ts;
- char disk_name[DISK_NAME_LEN];
char name[TASK_COMM_LEN];
};
@@ -96,12 +115,45 @@ struct data_t {
BPF_HASH(last_sectors, struct sector_key_t, u64);
#endif
-BPF_HASH(start, struct request *, struct start_req_t);
-BPF_HASH(infobyreq, struct request *, struct val_t);
+BPF_HASH(start, struct hash_key, struct start_req_t);
+BPF_HASH(infobyreq, struct hash_key, struct val_t);
BPF_PERF_OUTPUT(events);
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
+/*
+ * The following deals with a kernel version change (in mainline 4.7, although
+ * it may be backported to earlier kernels) with how block request write flags
+ * are tested. We handle both pre- and post-change versions here. Please avoid
+ * kernel version tests like this as much as possible: they inflate the code,
+ * test, and maintenance burden.
+ */
+static int get_rwflag(u32 cmd_flags) {
+#ifdef REQ_WRITE
+ return !!(cmd_flags & REQ_WRITE);
+#elif defined(REQ_OP_SHIFT)
+ return !!((cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
+#else
+ return !!((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
+#endif
+}
+
+#define RWBS_LEN 8
+
+static int get_rwflag_tp(char *rwbs) {
+ for (int i = 0; i < RWBS_LEN; i++) {
+ if (rwbs[i] == 'W')
+ return 1;
+ if (rwbs[i] == '\\0')
+ return 0;
+ }
+ return 0;
+}
+
// cache PID and comm by-req
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
+static int __trace_pid_start(struct hash_key key)
{
DISK_FILTER
@@ -113,47 +165,76 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
if (##QUEUE##) {
val.ts = bpf_ktime_get_ns();
}
- infobyreq.update(&req, &val);
+ infobyreq.update(&key, &val);
}
return 0;
}
+
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
+ return __trace_pid_start(key);
+}
+
+int trace_pid_start_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .rwflag = get_rwflag_tp(args->rwbs),
+ .sector = args->sector
+ };
+
+ return __trace_pid_start(key);
+}
+
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
DISK_FILTER
struct start_req_t start_req = {
.ts = bpf_ktime_get_ns(),
.data_len = req->__data_len
};
- start.update(&req, &start_req);
+ start.update(&key, &start_req);
return 0;
}
// output
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
+static int __trace_req_completion(void *ctx, struct hash_key key)
{
struct start_req_t *startp;
struct val_t *valp;
struct data_t data = {};
- struct gendisk *rq_disk;
+ //struct gendisk *rq_disk;
u64 ts;
// fetch timestamp and calculate delta
- startp = start.lookup(&req);
+ startp = start.lookup(&key);
if (startp == 0) {
// missed tracing issue
return 0;
}
ts = bpf_ktime_get_ns();
- rq_disk = req->__RQ_DISK__;
+ //rq_disk = req->__RQ_DISK__;
data.delta = ts - startp->ts;
data.ts = ts / 1000;
data.qdelta = 0;
data.len = startp->data_len;
- valp = infobyreq.lookup(&req);
+ valp = infobyreq.lookup(&key);
if (valp == 0) {
data.name[0] = '?';
data.name[1] = 0;
@@ -162,10 +243,9 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
data.qdelta = startp->ts - valp->ts;
}
data.pid = valp->pid;
- data.sector = req->__sector;
+ data.sector = key.sector;
+ data.dev = key.dev;
bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
- bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
- rq_disk->disk_name);
}
#ifdef INCLUDE_PATTERN
@@ -174,8 +254,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
u64 *sector, last_sector;
struct sector_key_t sector_key = {
- .dev_major = rq_disk->major,
- .dev_minor = rq_disk->first_minor
+ .dev_major = key.dev >> 20,
+ .dev_minor = key.dev & ((1 << 20) - 1)
};
sector = last_sectors.lookup(&sector_key);
@@ -187,27 +267,36 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
last_sectors.update(&sector_key, &last_sector);
#endif
-/*
- * The following deals with a kernel version change (in mainline 4.7, although
- * it may be backported to earlier kernels) with how block request write flags
- * are tested. We handle both pre- and post-change versions here. Please avoid
- * kernel version tests like this as much as possible: they inflate the code,
- * test, and maintenance burden.
- */
-#ifdef REQ_WRITE
- data.rwflag = !!(req->cmd_flags & REQ_WRITE);
-#elif defined(REQ_OP_SHIFT)
- data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
-#else
- data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
-#endif
+ data.rwflag = key.rwflag;
events.perf_submit(ctx, &data, sizeof(data));
- start.delete(&req);
- infobyreq.delete(&req);
+ start.delete(&key);
+ infobyreq.delete(&key);
return 0;
}
+
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .rwflag = get_rwflag(req->cmd_flags),
+ .sector = req->__sector
+ };
+
+ return __trace_req_completion(ctx, key);
+}
+
+int trace_req_completion_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .rwflag = get_rwflag_tp(args->rwbs),
+ .sector = args->sector
+ };
+
+ return __trace_req_completion(args, key);
+}
"""
if args.queue:
bpf_text = bpf_text.replace('##QUEUE##', '1')
@@ -225,21 +314,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
exit(1)
stat_info = os.stat(disk_path)
- major = os.major(stat_info.st_rdev)
- minor = os.minor(stat_info.st_rdev)
-
- disk_field_str = ""
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
- disk_field_str = 'req->rq_disk'
- else:
- disk_field_str = 'req->q->disk'
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
disk_filter_str = """
- struct gendisk *disk = %s;
- if (!(disk->major == %d && disk->first_minor == %d)) {
+ if(key.dev != %s) {
return 0;
}
- """ % (disk_field_str, major, minor)
+ """ % (dev)
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
else:
@@ -254,15 +335,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
b = BPF(text=bpf_text)
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
+else:
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
+else:
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
# header
print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
@@ -273,6 +358,27 @@ print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
print("%7s " % ("QUE(ms)"), end="")
print("%7s" % "LAT(ms)")
+
+# cache disk major,minor -> diskname
+diskstats = "/proc/diskstats"
+disklookup = {}
+with open(diskstats) as stats:
+ for line in stats:
+ a = line.split()
+ disklookup[a[0] + "," + a[1]] = a[2]
+
+def disk_print(d):
+ major = d >> 20
+ minor = d & ((1 << 20) - 1)
+
+ disk = str(major) + "," + str(minor)
+ if disk in disklookup:
+ diskname = disklookup[disk]
+ else:
+ diskname = "<unknown>"
+
+ return diskname
+
rwflg = ""
pattern = ""
start_ts = 0
@@ -297,9 +403,7 @@ P_RANDOM = 2
delta = float(event.ts) - start_ts
- disk_name = event.disk_name.decode('utf-8', 'replace')
- if not disk_name:
- disk_name = '<unknown>'
+ disk_name = disk_print(event.dev)
print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % (
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
diff --git a/tools/biotop.py b/tools/biotop.py
index fcdd373f..2620983a 100755
--- a/tools/biotop.py
+++ b/tools/biotop.py
@@ -14,6 +14,7 @@
#
# 06-Feb-2016 Brendan Gregg Created this.
# 17-Mar-2022 Rocky Xing Added PID filter support.
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
from __future__ import print_function
from bcc import BPF
@@ -88,14 +89,35 @@ struct val_t {
u32 io;
};
-BPF_HASH(start, struct request *, struct start_req_t);
-BPF_HASH(whobyreq, struct request *, struct who_t);
+struct tp_args {
+ u64 __unused__;
+ dev_t dev;
+ sector_t sector;
+ unsigned int nr_sector;
+ unsigned int bytes;
+ char rwbs[8];
+ char comm[16];
+ char cmd[];
+};
+
+struct hash_key {
+ dev_t dev;
+ u32 _pad;
+ sector_t sector;
+};
+
+BPF_HASH(start, struct hash_key, struct start_req_t);
+BPF_HASH(whobyreq, struct hash_key, struct who_t);
BPF_HASH(counts, struct info_t, struct val_t);
+static dev_t ddevt(struct gendisk *disk) {
+ return (disk->major << 20) | disk->first_minor;
+}
+
// cache PID and comm by-req
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
+static int __trace_pid_start(struct hash_key key)
{
- struct who_t who = {};
+ struct who_t who;
u32 pid;
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
@@ -104,30 +126,54 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
return 0;
who.pid = pid;
- whobyreq.update(&req, &who);
+ whobyreq.update(&key, &who);
}
return 0;
}
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ return __trace_pid_start(key);
+}
+
+int trace_pid_start_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_pid_start(key);
+}
+
// time block I/O
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
struct start_req_t start_req = {
.ts = bpf_ktime_get_ns(),
.data_len = req->__data_len
};
- start.update(&req, &start_req);
+ start.update(&key, &start_req);
return 0;
}
// output
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
+static int __trace_req_completion(struct hash_key key)
{
struct start_req_t *startp;
// fetch timestamp and calculate delta
- startp = start.lookup(&req);
+ startp = start.lookup(&key);
if (startp == 0) {
return 0; // missed tracing issue
}
@@ -135,12 +181,12 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
struct who_t *whop;
u32 pid;
- whop = whobyreq.lookup(&req);
+ whop = whobyreq.lookup(&key);
pid = whop != 0 ? whop->pid : 0;
if (FILTER_PID) {
- start.delete(&req);
+ start.delete(&key);
if (whop != 0) {
- whobyreq.delete(&req);
+ whobyreq.delete(&key);
}
return 0;
}
@@ -150,8 +196,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
// setup info_t key
struct info_t info = {};
- info.major = req->__RQ_DISK__->major;
- info.minor = req->__RQ_DISK__->first_minor;
+ info.major = key.dev >> 20;
+ info.minor = key.dev & ((1 << 20) - 1);
/*
* The following deals with a kernel version change (in mainline 4.7, although
* it may be backported to earlier kernels) with how block request write flags
@@ -159,13 +205,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
* kernel version tests like this as much as possible: they inflate the code,
* test, and maintenance burden.
*/
-#ifdef REQ_WRITE
+/*#ifdef REQ_WRITE
info.rwflag = !!(req->cmd_flags & REQ_WRITE);
#elif defined(REQ_OP_SHIFT)
info.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
#else
info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
-#endif
+#endif*/
if (whop == 0) {
// missed pid who, save stats as pid 0
@@ -183,11 +229,31 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
valp->io++;
}
- start.delete(&req);
- whobyreq.delete(&req);
+ start.delete(&key);
+ whobyreq.delete(&key);
return 0;
}
+
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
+{
+ struct hash_key key = {
+ .dev = ddevt(req->__RQ_DISK__),
+ .sector = req->__sector
+ };
+
+ return __trace_req_completion(key);
+}
+
+int trace_req_completion_tp(struct tp_args *args)
+{
+ struct hash_key key = {
+ .dev = args->dev,
+ .sector = args->sector
+ };
+
+ return __trace_req_completion(key);
+}
"""
if args.ebpf:
@@ -207,15 +273,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
b = BPF(text=bpf_text)
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
+else:
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
-else:
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
+else:
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
--
2.41.0

View File

@ -0,0 +1,156 @@
From 0d1a67ba9490aabbb874819d8d07b1868c8c2b1d Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 1 Feb 2023 17:30:03 +0100
Subject: [PATCH 2/2] tools/tcpstates: fix IPv6 journal
When logging ipv6 state change, journal_fields tries to pack
event.addr and event.daddr, which is not an integer in this, to
present a bytes-like object to socket.inet_ntop. This can be fixed by
having a similar type for [sd]addr for IPv4 and IPv6. Making both an
array of u32 solves the issue by presenting a bytes-like object
directly to inet_ntop, without the need for the struct packing stage.
Also now, the similar behavior, makes it easier to factor code for
IPv4 and IPv6.
It solves the following error:
/usr/share/bcc/tools/tcpstates -Y
SKADDR C-PID C-COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
ffff8b2e83e56180 0 swapper/9 :: 22 :: 0 LISTEN -> SYN_RECV 0.000
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
Traceback (most recent call last):
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
callback(cpu, data, size)
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
journal.send(**journal_fields(event, AF_INET6))
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
struct.error: required argument is not an integer
ffff8b2e83e56180 0 swapper/9 2620:52:0:2580:5054:ff:fe6b:6f1f 22 2620:52:0:2b11:2f5e:407d:b35d:4663 60396 SYN_RECV -> ESTABLISHED 0.010
Exception ignored on calling ctypes callback function: <function PerfEventArray._open_perf_buffer.<locals>.raw_cb_ at 0x7f894c8d7f70>
Traceback (most recent call last):
File "/usr/lib/python3.9/site-packages/bcc/table.py", line 982, in raw_cb_
callback(cpu, data, size)
File "/usr/share/bcc/tools/tcpstates", line 419, in print_ipv6_event
journal.send(**journal_fields(event, AF_INET6))
File "/usr/share/bcc/tools/tcpstates", line 348, in journal_fields
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
struct.error: required argument is not an integer
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/tcpstates.py | 55 +++++++++++++++++-----------------------------
1 file changed, 20 insertions(+), 35 deletions(-)
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
index 9b2ccfa4..6c845c9b 100755
--- a/tools/tcpstates.py
+++ b/tools/tcpstates.py
@@ -19,7 +19,6 @@ from __future__ import print_function
from bcc import BPF
import argparse
from socket import inet_ntop, AF_INET, AF_INET6
-from struct import pack
from time import strftime, time
from os import getuid
@@ -78,8 +77,8 @@ BPF_HASH(last, struct sock *, u64);
struct ipv4_data_t {
u64 ts_us;
u64 skaddr;
- u32 saddr;
- u32 daddr;
+ u32 saddr[1];
+ u32 daddr[1];
u64 span_us;
u32 pid;
u16 lport;
@@ -93,8 +92,8 @@ BPF_PERF_OUTPUT(ipv4_events);
struct ipv6_data_t {
u64 ts_us;
u64 skaddr;
- unsigned __int128 saddr;
- unsigned __int128 daddr;
+ u32 saddr[4];
+ u32 daddr[4];
u64 span_us;
u32 pid;
u16 lport;
@@ -350,9 +349,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
'OBJECT_PID': str(event.pid),
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
# Custom fields, aka "stuff we sort of made up".
- 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
+ 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, event.saddr),
'OBJECT_TCP_SOURCE_PORT': str(event.lport),
- 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
+ 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, event.daddr),
'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
@@ -373,8 +372,7 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
return fields
# process event
-def print_ipv4_event(cpu, data, size):
- event = b["ipv4_events"].event(data)
+def print_event(event, addr_family):
global start_ts
if args.time:
if args.csv:
@@ -389,39 +387,26 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%.6f," % delta_s, end="")
else:
print("%-9.6f " % delta_s, end="")
+ if addr_family == AF_INET:
+ version = "4"
+ else:
+ version = "6"
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
- "4" if args.wide or args.csv else "",
- inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
- inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
+ version if args.wide or args.csv else "",
+ inet_ntop(addr_family, event.saddr), event.lport,
+ inet_ntop(addr_family, event.daddr), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
- journal.send(**journal_fields(event, AF_INET))
+ journal.send(**journal_fields(event, addr_family))
+
+def print_ipv4_event(cpu, data, size):
+ event = b["ipv4_events"].event(data)
+ print_event(event, AF_INET)
def print_ipv6_event(cpu, data, size):
event = b["ipv6_events"].event(data)
- global start_ts
- if args.time:
- if args.csv:
- print("%s," % strftime("%H:%M:%S"), end="")
- else:
- print("%-8s " % strftime("%H:%M:%S"), end="")
- if args.timestamp:
- if start_ts == 0:
- start_ts = event.ts_us
- delta_s = (float(event.ts_us) - start_ts) / 1000000
- if args.csv:
- print("%.6f," % delta_s, end="")
- else:
- print("%-9.6f " % delta_s, end="")
- print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
- "6" if args.wide or args.csv else "",
- inet_ntop(AF_INET6, event.saddr), event.lport,
- inet_ntop(AF_INET6, event.daddr), event.dport,
- tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
- float(event.span_us) / 1000))
- if args.journal:
- journal.send(**journal_fields(event, AF_INET6))
+ print_event(event, AF_INET6)
# initialize BPF
b = BPF(text=bpf_text)
--
2.41.0

View File

@ -0,0 +1,144 @@
From 53b89f35e8970beef55046c1bf035264f110f06d Mon Sep 17 00:00:00 2001
From: hejun01 <hejun01@corp.netease.com>
Date: Thu, 29 Jun 2023 20:24:07 +0800
Subject: [PATCH 1/2] tools/tcpstates: fix context ptr modified error
Introduce local variable tcp_new_state,
to avoid llvm optimization of args->newstate,
which will cause context ptr args modified.
spilt event.ports to lport and dport.
switch type of TCP state from unsigned int to int.
---
tools/tcpstates.py | 47 +++++++++++++++++++++++++---------------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
index 89f3638c..9b2ccfa4 100755
--- a/tools/tcpstates.py
+++ b/tools/tcpstates.py
@@ -82,9 +82,10 @@ struct ipv4_data_t {
u32 daddr;
u64 span_us;
u32 pid;
- u32 ports;
- u32 oldstate;
- u32 newstate;
+ u16 lport;
+ u16 dport;
+ int oldstate;
+ int newstate;
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(ipv4_events);
@@ -96,9 +97,10 @@ struct ipv6_data_t {
unsigned __int128 daddr;
u64 span_us;
u32 pid;
- u32 ports;
- u32 oldstate;
- u32 newstate;
+ u16 lport;
+ u16 dport;
+ int oldstate;
+ int newstate;
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(ipv6_events);
@@ -132,6 +134,9 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
u16 family = args->family;
FILTER_FAMILY
+ // workaround to avoid llvm optimization which will cause context ptr args modified
+ int tcp_newstate = args->newstate;
+
if (args->family == AF_INET) {
struct ipv4_data_t data4 = {
.span_us = delta_us,
@@ -141,8 +146,8 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
data4.ts_us = bpf_ktime_get_ns() / 1000;
__builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
__builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
- // a workaround until data4 compiles with separate lport/dport
- data4.ports = dport + ((0ULL + lport) << 16);
+ data4.lport = lport;
+ data4.dport = dport;
data4.pid = pid;
bpf_get_current_comm(&data4.task, sizeof(data4.task));
@@ -157,14 +162,14 @@ TRACEPOINT_PROBE(sock, inet_sock_set_state)
data6.ts_us = bpf_ktime_get_ns() / 1000;
__builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr));
__builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr));
- // a workaround until data6 compiles with separate lport/dport
- data6.ports = dport + ((0ULL + lport) << 16);
+ data6.lport = lport;
+ data6.dport = dport;
data6.pid = pid;
bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(args, &data6, sizeof(data6));
}
- if (args->newstate == TCP_CLOSE) {
+ if (tcp_newstate == TCP_CLOSE) {
last.delete(&sk);
} else {
u64 ts = bpf_ktime_get_ns();
@@ -210,8 +215,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
data4.ts_us = bpf_ktime_get_ns() / 1000;
data4.saddr = sk->__sk_common.skc_rcv_saddr;
data4.daddr = sk->__sk_common.skc_daddr;
- // a workaround until data4 compiles with separate lport/dport
- data4.ports = dport + ((0ULL + lport) << 16);
+ data4.lport = lport;
+ data4.dport = dport;
data4.pid = pid;
bpf_get_current_comm(&data4.task, sizeof(data4.task));
@@ -228,8 +233,8 @@ int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state)
sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr),
sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
- // a workaround until data6 compiles with separate lport/dport
- data6.ports = dport + ((0ULL + lport) << 16);
+ data6.lport = lport;
+ data6.dport = dport;
data6.pid = pid;
bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
@@ -346,9 +351,9 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
'OBJECT_COMM': event.task.decode('utf-8', 'replace'),
# Custom fields, aka "stuff we sort of made up".
'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)),
- 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 16),
+ 'OBJECT_TCP_SOURCE_PORT': str(event.lport),
'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)),
- 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffff),
+ 'OBJECT_TCP_DESTINATION_PORT': str(event.dport),
'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate),
'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate),
'OBJECT_TCP_SPAN_TIME': str(event.span_us)
@@ -386,8 +391,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%-9.6f " % delta_s, end="")
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
"4" if args.wide or args.csv else "",
- inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 16,
- inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffff,
+ inet_ntop(AF_INET, pack("I", event.saddr)), event.lport,
+ inet_ntop(AF_INET, pack("I", event.daddr)), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
@@ -411,8 +416,8 @@ format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
print("%-9.6f " % delta_s, end="")
print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'),
"6" if args.wide or args.csv else "",
- inet_ntop(AF_INET6, event.saddr), event.ports >> 16,
- inet_ntop(AF_INET6, event.daddr), event.ports & 0xffff,
+ inet_ntop(AF_INET6, event.saddr), event.lport,
+ inet_ntop(AF_INET6, event.daddr), event.dport,
tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
float(event.span_us) / 1000))
if args.journal:
--
2.41.0

View File

@ -0,0 +1,53 @@
From 88274e83ca1a61699741d5b1d5499beb64cac753 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Mon, 16 Oct 2023 19:41:29 +0200
Subject: [PATCH] tools/trace: don't raise an exception in a ctype callback
To exit the tool when the maximal number of event is reached (-M
option), the tool currently call exit(), which raise a SystemExit
exception. The handling of exception from ctype callback doesn't seem
straightforward and dependent on python version.
This patch avoid the issue altogether by using a global variable
instead.
Closes #3049
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
---
tools/trace.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/trace.py b/tools/trace.py
index 9c7cca71..2aa096fa 100755
--- a/tools/trace.py
+++ b/tools/trace.py
@@ -43,6 +43,7 @@ import sys
build_id_enabled = False
aggregate = False
symcount = {}
+ done = False
@classmethod
def configure(cls, args):
@@ -635,7 +636,7 @@ BPF_PERF_OUTPUT(%s);
if self.aggregate:
self.print_aggregate_events()
sys.stdout.flush()
- exit()
+ Probe.done = True;
def attach(self, bpf, verbose):
if len(self.library) == 0:
@@ -895,7 +896,7 @@ trace -s /lib/x86_64-linux-gnu/libc.so.6,/bin/ping 'p:c:inet_pton' -U
"-" if not all_probes_trivial else ""))
sys.stdout.flush()
- while True:
+ while not Probe.done:
self.bpf.perf_buffer_poll()
def run(self):
--
2.41.0

View File

@ -10,7 +10,7 @@
%endif %endif
%endif %endif
%ifarch x86_64 ppc64 ppc64le aarch64 %ifarch x86_64 ppc64 ppc64le aarch64 s390x
%bcond_without libbpf_tools %bcond_without libbpf_tools
%else %else
%bcond_with libbpf_tools %bcond_with libbpf_tools
@ -24,17 +24,21 @@
Name: bcc Name: bcc
Version: 0.25.0 Version: 0.28.0
Release: 2%{?dist} Release: 5%{?dist}
Summary: BPF Compiler Collection (BCC) Summary: BPF Compiler Collection (BCC)
License: ASL 2.0 License: ASL 2.0
URL: https://github.com/iovisor/bcc URL: https://github.com/iovisor/bcc
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
Patch0: %%{name}-%%{version}-bcc-support-building-with-external-libbpf-package-an.patch Patch0: %%{name}-%%{version}-tools-tcpstates-fix-context-ptr-modified-error.patch
Patch2: %%{name}-%%{version}-Fix-bpf_pseudo_fd-type-conversion-error.patch Patch1: %%{name}-%%{version}-tools-tcpstates-fix-IPv6-journal.patch
Patch3: %%{name}-%%{version}-Fix-clang-15-int-to-pointer-conversion-errors.patch Patch2: %%{name}-%%{version}-tools-Add-support-for-the-new-block_io_-tracepoints.patch
Patch4: %%{name}-%%{version}-Fix-some-documentation-issues-4197.patch Patch3: %%{name}-%%{version}-tools-trace-don-t-raise-an-exception-in-a-ctype-call.patch
Patch5: %%{name}-%%{version}-tools-nfsslower-fix-an-uninitialized-struct-error.patch Patch4: %%{name}-%%{version}-libbpf-tools-add-block_io_-start-done-tracepoints-su.patch
Patch5: %%{name}-%%{version}-libbpf-tools-Add-s390x-support.patch
Patch6: %%{name}-%%{version}-Fixing-pvalloc-memleak-test.patch
Patch7: %%{name}-%%{version}-Skipping-USDT-tests-for-Power-processor.patch
Patch8: %%{name}-%%{version}-Adding-memory-zones-for-Power-server.patch
# Arches will be included as upstream support is added and dependencies are # Arches will be included as upstream support is added and dependencies are
# satisfied in the respective arches # satisfied in the respective arches
@ -258,6 +262,47 @@ cp -a libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}/
%endif %endif
%changelog %changelog
* Wed Dec 13 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-5
- Fix libbpf bio tools (RHEL-19368)
- Add S390x support to libbpf-tools (RHEL-16325)
- Power enhancements(RHEL-11477)
* Tue Nov 21 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-4
- Rebuild with LLVM 17 in the side tag (RHEL-10591)
* Tue Nov 21 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-3
- Rebuild with LLVM 17 (RHEL-10591)
* Mon Nov 06 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-2
- Fix trace tool (RHEL-8605)
* Mon Oct 23 2023 Jerome Marchand <jmarchan@redhat.com> - 0.28.0-1
- Rebase to v0.28.0 (RHEL-9976)
- Rebuild with LLVM 17 (RHEL-10591)
- Fix bpf-biosnoop out of bound access (RHEL-8664)
- Fix kvmexit missing VM exit reasons and statistics (RHEL-8702)
- Fix multi-word array type handling (RHEL-8674)
- Fix tcpstates -Y (RHEL-8490)
- Fix bio tools (RHEL-8553)
* Wed Aug 09 2023 Jerome Marchand <jmarchan@redhat.com> - 0.26.0-4
- Fix tcpretrans (rhbz#2226967)
* Fri May 12 2023 Jerome Marchand <jmarchan@redhat.com> - 0.26.0-3
- Rebuild with LLVM 16 (rhbz#2050112)
- Fix compactsnoop (rhbz#2042236)
- Fix killsnoop documentation (rhbz#2075500)
- Fix funcslower (rhbz#2075415)
- Fix deadlock memory usage issue (rhbz#2050112)
- Fix nfsslower (rhbz#2180934)
- Use upstream fix for nfsslower unititialized struct issue
* Wed Mar 15 2023 Jerome Marchand <jmarchan@redhat.com> - 0.26.0-2
- Rebuild with the right rhel-target
* Fri Mar 10 2023 Jerome Marchand <jmarchan@redhat.com> - 0.26.0-1
- Rebase to v0.26.0
* Thu Jan 05 2023 Jerome Marchand <jmarchan@redhat.com> - 0.25.0-2 * Thu Jan 05 2023 Jerome Marchand <jmarchan@redhat.com> - 0.25.0-2
- Rebuild for libbpf 1.0 - Rebuild for libbpf 1.0