Rebase bcc to v0.29.1 and enable libbpf-tools on s390x
Also fix bpf-bindsnoop, ttysnoop and sync libbpf. Resolves: bz#2253688 Resolves: bz#2249458
This commit is contained in:
parent
eae885cfcc
commit
110e48716f
1
.gitignore
vendored
1
.gitignore
vendored
@ -21,3 +21,4 @@
|
|||||||
/bcc-0.26.0.tar.gz
|
/bcc-0.26.0.tar.gz
|
||||||
/bcc-0.27.0.tar.gz
|
/bcc-0.27.0.tar.gz
|
||||||
/bcc-0.28.0.tar.gz
|
/bcc-0.28.0.tar.gz
|
||||||
|
/bcc-0.29.1.tar.gz
|
||||||
|
132
Fix-ttysnoop.py-with-newer-kernels.patch
Normal file
132
Fix-ttysnoop.py-with-newer-kernels.patch
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
From 89126c7452c29736d38dc072a952b0b0c831fade Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yonghong Song <yonghong.song@linux.dev>
|
||||||
|
Date: Mon, 29 Jan 2024 16:13:30 -0800
|
||||||
|
Subject: [PATCH] [PATCH] Fix ttysnoop.py with newer kernels
|
||||||
|
|
||||||
|
Jerome Marchand reported that ttysnoop.py won't work properly
|
||||||
|
with newer kernels (#4884). I did some investigation and found
|
||||||
|
that some kernel data structure change caused verification failure.
|
||||||
|
The failure is caused by the following:
|
||||||
|
; kvec = from->kvec;
|
||||||
|
// R1=ptr_iov_iter()
|
||||||
|
15: (79) r1 = *(u64 *)(r1 +16) ; R1_w=scalar()
|
||||||
|
; count = kvec->iov_len;
|
||||||
|
16: (bf) r2 = r1 ; R1_w=scalar(id=1) R2_w=scalar(id=1)
|
||||||
|
17: (07) r2 += 8 ; R2_w=scalar()
|
||||||
|
18: (05) goto pc+3
|
||||||
|
;
|
||||||
|
22: (79) r2 = *(u64 *)(r2 +0)
|
||||||
|
R2 invalid mem access 'scalar'
|
||||||
|
|
||||||
|
So basically, loading 'iov_iter + 16' returns a scalar but verifier
|
||||||
|
expects it to be a pointer.
|
||||||
|
|
||||||
|
In v6.4, we have
|
||||||
|
struct iovec
|
||||||
|
{
|
||||||
|
void __user *iov_base; /* BSD uses caddr_t (1003.1g requires void *) */
|
||||||
|
__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
|
||||||
|
};
|
||||||
|
struct iov_iter {
|
||||||
|
u8 iter_type;
|
||||||
|
bool copy_mc;
|
||||||
|
bool nofault;
|
||||||
|
bool data_source;
|
||||||
|
bool user_backed;
|
||||||
|
union {
|
||||||
|
size_t iov_offset;
|
||||||
|
int last_offset;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
struct iovec __ubuf_iovec;
|
||||||
|
struct {
|
||||||
|
union {
|
||||||
|
const struct iovec *__iov;
|
||||||
|
const struct kvec *kvec;
|
||||||
|
const struct bio_vec *bvec;
|
||||||
|
struct xarray *xarray;
|
||||||
|
struct pipe_inode_info *pipe;
|
||||||
|
void __user *ubuf;
|
||||||
|
};
|
||||||
|
size_t count;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
unsigned long nr_segs;
|
||||||
|
struct {
|
||||||
|
unsigned int head;
|
||||||
|
unsigned int start_head;
|
||||||
|
};
|
||||||
|
loff_t xarray_start;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
The kernel traversal chain will be
|
||||||
|
"struct iov_iter" -> "struct iovec __ubuf_iovec" -> "void __user *iov_base".
|
||||||
|
Since the "iov_base" type is a ptr to void, the kernel considers the
|
||||||
|
loaded value as a scalar which caused verification failure.
|
||||||
|
|
||||||
|
But for old kernel like 5.19, we do not have this issue.
|
||||||
|
struct iovec
|
||||||
|
{
|
||||||
|
void __user *iov_base; /* BSD uses caddr_t (1003.1g requires void *) */
|
||||||
|
__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
|
||||||
|
};
|
||||||
|
struct iov_iter {
|
||||||
|
u8 iter_type;
|
||||||
|
bool nofault;
|
||||||
|
bool data_source;
|
||||||
|
bool user_backed;
|
||||||
|
size_t iov_offset;
|
||||||
|
size_t count;
|
||||||
|
union {
|
||||||
|
const struct iovec *iov;
|
||||||
|
const struct kvec *kvec;
|
||||||
|
const struct bio_vec *bvec;
|
||||||
|
struct xarray *xarray;
|
||||||
|
struct pipe_inode_info *pipe;
|
||||||
|
void __user *ubuf;
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
unsigned long nr_segs;
|
||||||
|
struct {
|
||||||
|
unsigned int head;
|
||||||
|
unsigned int start_head;
|
||||||
|
};
|
||||||
|
loff_t xarray_start;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
The kernel traversal chain will be
|
||||||
|
"struct iov_iter" -> "const struct iovec *iov"
|
||||||
|
Note that "const struct iovec *iov" is used since it is the *first* member
|
||||||
|
inside the union. The traversal stops once we hit a pointer.
|
||||||
|
So the kernel verifier returns a 'struct iovec' object (untrusted, cannot
|
||||||
|
be used as a parameter to a call) and verifier can proceed.
|
||||||
|
|
||||||
|
To fix the problem, let us use bpf_probe_read_kernel() instead
|
||||||
|
so ttysnoop.py can continue to work with newer kernel.
|
||||||
|
|
||||||
|
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
|
||||||
|
---
|
||||||
|
tools/ttysnoop.py | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/ttysnoop.py b/tools/ttysnoop.py
|
||||||
|
index 77f97b7c..aca09db4 100755
|
||||||
|
--- a/tools/ttysnoop.py
|
||||||
|
+++ b/tools/ttysnoop.py
|
||||||
|
@@ -162,8 +162,8 @@ PROBE_TTY_WRITE
|
||||||
|
*/
|
||||||
|
case CASE_ITER_IOVEC_NAME:
|
||||||
|
kvec = from->kvec;
|
||||||
|
- buf = kvec->iov_base;
|
||||||
|
- count = kvec->iov_len;
|
||||||
|
+ bpf_probe_read_kernel(&buf, sizeof(buf), &kvec->iov_base);
|
||||||
|
+ bpf_probe_read_kernel(&count, sizeof(count), &kvec->iov_len);
|
||||||
|
break;
|
||||||
|
CASE_ITER_UBUF_TEXT
|
||||||
|
/* TODO: Support more type */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
727
Sync-with-latest-libbpf-repo-4889.patch
Normal file
727
Sync-with-latest-libbpf-repo-4889.patch
Normal file
@ -0,0 +1,727 @@
|
|||||||
|
From c0691e35cd65d5400f0b792d5eba81f8eae236dc Mon Sep 17 00:00:00 2001
|
||||||
|
From: yonghong-song <ys114321@gmail.com>
|
||||||
|
Date: Tue, 30 Jan 2024 09:14:30 -0800
|
||||||
|
Subject: [PATCH] Sync with latest libbpf repo (#4889)
|
||||||
|
|
||||||
|
Sync with latest libbpf repo.
|
||||||
|
The top libbpf commit is:
|
||||||
|
3b0973892891 sync: remove NETDEV_XSK_FLAGS_MASK which is not in bpf/bpf-next anymore
|
||||||
|
|
||||||
|
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
|
||||||
|
---
|
||||||
|
introspection/bps.c | 1 +
|
||||||
|
src/cc/compat/linux/virtual_bpf.h | 368 ++++++++++++++++++++++++++----
|
||||||
|
src/cc/libbpf | 2 +-
|
||||||
|
3 files changed, 326 insertions(+), 45 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/introspection/bps.c b/introspection/bps.c
|
||||||
|
index 3956fbf2..8cdef54a 100644
|
||||||
|
--- a/introspection/bps.c
|
||||||
|
+++ b/introspection/bps.c
|
||||||
|
@@ -48,6 +48,7 @@ static const char * const prog_type_strings[] = {
|
||||||
|
[BPF_PROG_TYPE_LSM] = "lsm",
|
||||||
|
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
|
||||||
|
[BPF_PROG_TYPE_SYSCALL] = "syscall",
|
||||||
|
+ [BPF_PROG_TYPE_NETFILTER] = "netfilter",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const map_type_strings[] = {
|
||||||
|
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
index a182123e..fcabe71a 100644
|
||||||
|
--- a/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
+++ b/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
@@ -20,6 +20,7 @@ R"********(
|
||||||
|
|
||||||
|
/* ld/ldx fields */
|
||||||
|
#define BPF_DW 0x18 /* double word (64-bit) */
|
||||||
|
+#define BPF_MEMSX 0x80 /* load with sign extension */
|
||||||
|
#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */
|
||||||
|
#define BPF_XADD 0xc0 /* exclusive add - legacy name */
|
||||||
|
|
||||||
|
@@ -847,6 +848,36 @@ union bpf_iter_link_info {
|
||||||
|
* Returns zero on success. On error, -1 is returned and *errno*
|
||||||
|
* is set appropriately.
|
||||||
|
*
|
||||||
|
+ * BPF_TOKEN_CREATE
|
||||||
|
+ * Description
|
||||||
|
+ * Create BPF token with embedded information about what
|
||||||
|
+ * BPF-related functionality it allows:
|
||||||
|
+ * - a set of allowed bpf() syscall commands;
|
||||||
|
+ * - a set of allowed BPF map types to be created with
|
||||||
|
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
|
||||||
|
+ * - a set of allowed BPF program types and BPF program attach
|
||||||
|
+ * types to be loaded with BPF_PROG_LOAD command, if
|
||||||
|
+ * BPF_PROG_LOAD itself is allowed.
|
||||||
|
+ *
|
||||||
|
+ * BPF token is created (derived) from an instance of BPF FS,
|
||||||
|
+ * assuming it has necessary delegation mount options specified.
|
||||||
|
+ * This BPF token can be passed as an extra parameter to various
|
||||||
|
+ * bpf() syscall commands to grant BPF subsystem functionality to
|
||||||
|
+ * unprivileged processes.
|
||||||
|
+ *
|
||||||
|
+ * When created, BPF token is "associated" with the owning
|
||||||
|
+ * user namespace of BPF FS instance (super block) that it was
|
||||||
|
+ * derived from, and subsequent BPF operations performed with
|
||||||
|
+ * BPF token would be performing capabilities checks (i.e.,
|
||||||
|
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
|
||||||
|
+ * that user namespace. Without BPF token, such capabilities
|
||||||
|
+ * have to be granted in init user namespace, making bpf()
|
||||||
|
+ * syscall incompatible with user namespace, for the most part.
|
||||||
|
+ *
|
||||||
|
+ * Return
|
||||||
|
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||||
|
+ * error occurred (in which case, *errno* is set appropriately).
|
||||||
|
+ *
|
||||||
|
* NOTES
|
||||||
|
* eBPF objects (maps and programs) can be shared between processes.
|
||||||
|
*
|
||||||
|
@@ -901,6 +932,8 @@ enum bpf_cmd {
|
||||||
|
BPF_ITER_CREATE,
|
||||||
|
BPF_LINK_DETACH,
|
||||||
|
BPF_PROG_BIND_MAP,
|
||||||
|
+ BPF_TOKEN_CREATE,
|
||||||
|
+ __MAX_BPF_CMD,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bpf_map_type {
|
||||||
|
@@ -932,7 +965,14 @@ enum bpf_map_type {
|
||||||
|
*/
|
||||||
|
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
|
||||||
|
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
|
||||||
|
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
|
||||||
|
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
|
||||||
|
+ /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
|
||||||
|
+ * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
|
||||||
|
+ * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
|
||||||
|
+ * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
|
||||||
|
+ * deprecated.
|
||||||
|
+ */
|
||||||
|
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
|
||||||
|
BPF_MAP_TYPE_QUEUE,
|
||||||
|
BPF_MAP_TYPE_STACK,
|
||||||
|
BPF_MAP_TYPE_SK_STORAGE,
|
||||||
|
@@ -944,6 +984,7 @@ enum bpf_map_type {
|
||||||
|
BPF_MAP_TYPE_BLOOM_FILTER,
|
||||||
|
BPF_MAP_TYPE_USER_RINGBUF,
|
||||||
|
BPF_MAP_TYPE_CGRP_STORAGE,
|
||||||
|
+ __MAX_BPF_MAP_TYPE
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Note that tracing related programs such as
|
||||||
|
@@ -987,6 +1028,8 @@ enum bpf_prog_type {
|
||||||
|
BPF_PROG_TYPE_LSM,
|
||||||
|
BPF_PROG_TYPE_SK_LOOKUP,
|
||||||
|
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
|
||||||
|
+ BPF_PROG_TYPE_NETFILTER,
|
||||||
|
+ __MAX_BPF_PROG_TYPE
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bpf_attach_type {
|
||||||
|
@@ -1035,6 +1078,17 @@ enum bpf_attach_type {
|
||||||
|
BPF_TRACE_KPROBE_MULTI,
|
||||||
|
BPF_LSM_CGROUP,
|
||||||
|
BPF_STRUCT_OPS,
|
||||||
|
+ BPF_NETFILTER,
|
||||||
|
+ BPF_TCX_INGRESS,
|
||||||
|
+ BPF_TCX_EGRESS,
|
||||||
|
+ BPF_TRACE_UPROBE_MULTI,
|
||||||
|
+ BPF_CGROUP_UNIX_CONNECT,
|
||||||
|
+ BPF_CGROUP_UNIX_SENDMSG,
|
||||||
|
+ BPF_CGROUP_UNIX_RECVMSG,
|
||||||
|
+ BPF_CGROUP_UNIX_GETPEERNAME,
|
||||||
|
+ BPF_CGROUP_UNIX_GETSOCKNAME,
|
||||||
|
+ BPF_NETKIT_PRIMARY,
|
||||||
|
+ BPF_NETKIT_PEER,
|
||||||
|
__MAX_BPF_ATTACH_TYPE
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -1051,8 +1105,23 @@ enum bpf_link_type {
|
||||||
|
BPF_LINK_TYPE_PERF_EVENT = 7,
|
||||||
|
BPF_LINK_TYPE_KPROBE_MULTI = 8,
|
||||||
|
BPF_LINK_TYPE_STRUCT_OPS = 9,
|
||||||
|
+ BPF_LINK_TYPE_NETFILTER = 10,
|
||||||
|
+ BPF_LINK_TYPE_TCX = 11,
|
||||||
|
+ BPF_LINK_TYPE_UPROBE_MULTI = 12,
|
||||||
|
+ BPF_LINK_TYPE_NETKIT = 13,
|
||||||
|
+ __MAX_BPF_LINK_TYPE,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
|
||||||
|
|
||||||
|
- MAX_BPF_LINK_TYPE,
|
||||||
|
+enum bpf_perf_event_type {
|
||||||
|
+ BPF_PERF_EVENT_UNSPEC = 0,
|
||||||
|
+ BPF_PERF_EVENT_UPROBE = 1,
|
||||||
|
+ BPF_PERF_EVENT_URETPROBE = 2,
|
||||||
|
+ BPF_PERF_EVENT_KPROBE = 3,
|
||||||
|
+ BPF_PERF_EVENT_KRETPROBE = 4,
|
||||||
|
+ BPF_PERF_EVENT_TRACEPOINT = 5,
|
||||||
|
+ BPF_PERF_EVENT_EVENT = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
|
||||||
|
@@ -1101,7 +1170,12 @@ enum bpf_link_type {
|
||||||
|
*/
|
||||||
|
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
|
||||||
|
#define BPF_F_ALLOW_MULTI (1U << 1)
|
||||||
|
+/* Generic attachment flags. */
|
||||||
|
#define BPF_F_REPLACE (1U << 2)
|
||||||
|
+#define BPF_F_BEFORE (1U << 3)
|
||||||
|
+#define BPF_F_AFTER (1U << 4)
|
||||||
|
+#define BPF_F_ID (1U << 5)
|
||||||
|
+#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
|
||||||
|
|
||||||
|
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
|
||||||
|
* verifier will perform strict alignment checking as if the kernel
|
||||||
|
@@ -1163,10 +1237,27 @@ enum bpf_link_type {
|
||||||
|
*/
|
||||||
|
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
|
||||||
|
|
||||||
|
+/* The verifier internal test flag. Behavior is undefined */
|
||||||
|
+#define BPF_F_TEST_REG_INVARIANTS (1U << 7)
|
||||||
|
+
|
||||||
|
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
|
||||||
|
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
|
||||||
|
*/
|
||||||
|
-#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
|
||||||
|
+enum {
|
||||||
|
+ BPF_F_KPROBE_MULTI_RETURN = (1U << 0)
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* link_create.uprobe_multi.flags used in LINK_CREATE command for
|
||||||
|
+ * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
|
||||||
|
+ */
|
||||||
|
+enum {
|
||||||
|
+ BPF_F_UPROBE_MULTI_RETURN = (1U << 0)
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* link_create.netfilter.flags used in LINK_CREATE command for
|
||||||
|
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
|
||||||
|
+ */
|
||||||
|
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
|
||||||
|
|
||||||
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
|
* the following extensions:
|
||||||
|
@@ -1271,6 +1362,15 @@ enum {
|
||||||
|
|
||||||
|
/* Create a map that will be registered/unregesitered by the backed bpf_link */
|
||||||
|
BPF_F_LINK = (1U << 13),
|
||||||
|
+
|
||||||
|
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
|
||||||
|
+ BPF_F_PATH_FD = (1U << 14),
|
||||||
|
+
|
||||||
|
+/* Flag for value_type_btf_obj_fd, the fd is available */
|
||||||
|
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
|
||||||
|
+
|
||||||
|
+/* BPF token FD is passed in a corresponding command's token_fd field */
|
||||||
|
+ BPF_F_TOKEN_FD = (1U << 16),
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Flags for BPF_PROG_QUERY. */
|
||||||
|
@@ -1344,6 +1444,15 @@ union bpf_attr {
|
||||||
|
* to using 5 hash functions).
|
||||||
|
*/
|
||||||
|
__u64 map_extra;
|
||||||
|
+
|
||||||
|
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
|
||||||
|
+ * type data for
|
||||||
|
+ * btf_vmlinux_value_type_id.
|
||||||
|
+ */
|
||||||
|
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
|
||||||
|
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
|
||||||
|
+ */
|
||||||
|
+ __s32 map_token_fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||||
|
@@ -1413,23 +1522,39 @@ union bpf_attr {
|
||||||
|
* truncated), or smaller (if log buffer wasn't filled completely).
|
||||||
|
*/
|
||||||
|
__u32 log_true_size;
|
||||||
|
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
|
||||||
|
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
|
||||||
|
+ */
|
||||||
|
+ __s32 prog_token_fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||||
|
__aligned_u64 pathname;
|
||||||
|
__u32 bpf_fd;
|
||||||
|
__u32 file_flags;
|
||||||
|
+ /* Same as dirfd in openat() syscall; see openat(2)
|
||||||
|
+ * manpage for details of path FD and pathname semantics;
|
||||||
|
+ * path_fd should accompanied by BPF_F_PATH_FD flag set in
|
||||||
|
+ * file_flags field, otherwise it should be set to zero;
|
||||||
|
+ * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
|
||||||
|
+ */
|
||||||
|
+ __s32 path_fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
|
||||||
|
- __u32 target_fd; /* container object to attach to */
|
||||||
|
- __u32 attach_bpf_fd; /* eBPF program to attach */
|
||||||
|
+ union {
|
||||||
|
+ __u32 target_fd; /* target object to attach to or ... */
|
||||||
|
+ __u32 target_ifindex; /* target ifindex */
|
||||||
|
+ };
|
||||||
|
+ __u32 attach_bpf_fd;
|
||||||
|
__u32 attach_type;
|
||||||
|
__u32 attach_flags;
|
||||||
|
- __u32 replace_bpf_fd; /* previously attached eBPF
|
||||||
|
- * program to replace if
|
||||||
|
- * BPF_F_REPLACE is used
|
||||||
|
- */
|
||||||
|
+ __u32 replace_bpf_fd;
|
||||||
|
+ union {
|
||||||
|
+ __u32 relative_fd;
|
||||||
|
+ __u32 relative_id;
|
||||||
|
+ };
|
||||||
|
+ __u64 expected_revision;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
|
||||||
|
@@ -1475,16 +1600,26 @@ union bpf_attr {
|
||||||
|
} info;
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_PROG_QUERY command */
|
||||||
|
- __u32 target_fd; /* container object to query */
|
||||||
|
+ union {
|
||||||
|
+ __u32 target_fd; /* target object to query or ... */
|
||||||
|
+ __u32 target_ifindex; /* target ifindex */
|
||||||
|
+ };
|
||||||
|
__u32 attach_type;
|
||||||
|
__u32 query_flags;
|
||||||
|
__u32 attach_flags;
|
||||||
|
__aligned_u64 prog_ids;
|
||||||
|
- __u32 prog_cnt;
|
||||||
|
+ union {
|
||||||
|
+ __u32 prog_cnt;
|
||||||
|
+ __u32 count;
|
||||||
|
+ };
|
||||||
|
+ __u32 :32;
|
||||||
|
/* output: per-program attach_flags.
|
||||||
|
* not allowed to be set during effective query.
|
||||||
|
*/
|
||||||
|
__aligned_u64 prog_attach_flags;
|
||||||
|
+ __aligned_u64 link_ids;
|
||||||
|
+ __aligned_u64 link_attach_flags;
|
||||||
|
+ __u64 revision;
|
||||||
|
} query;
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
|
||||||
|
@@ -1503,6 +1638,11 @@ union bpf_attr {
|
||||||
|
* truncated), or smaller (if log buffer wasn't filled completely).
|
||||||
|
*/
|
||||||
|
__u32 btf_log_true_size;
|
||||||
|
+ __u32 btf_flags;
|
||||||
|
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
|
||||||
|
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
|
||||||
|
+ */
|
||||||
|
+ __s32 btf_token_fd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
@@ -1527,13 +1667,13 @@ union bpf_attr {
|
||||||
|
__u32 map_fd; /* struct_ops to attach */
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
- __u32 target_fd; /* object to attach to */
|
||||||
|
- __u32 target_ifindex; /* target ifindex */
|
||||||
|
+ __u32 target_fd; /* target object to attach to or ... */
|
||||||
|
+ __u32 target_ifindex; /* target ifindex */
|
||||||
|
};
|
||||||
|
__u32 attach_type; /* attach type */
|
||||||
|
__u32 flags; /* extra flags */
|
||||||
|
union {
|
||||||
|
- __u32 target_btf_id; /* btf_id of target to attach to */
|
||||||
|
+ __u32 target_btf_id; /* btf_id of target to attach to */
|
||||||
|
struct {
|
||||||
|
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
|
||||||
|
__u32 iter_info_len; /* iter_info length */
|
||||||
|
@@ -1561,6 +1701,35 @@ union bpf_attr {
|
||||||
|
*/
|
||||||
|
__u64 cookie;
|
||||||
|
} tracing;
|
||||||
|
+ struct {
|
||||||
|
+ __u32 pf;
|
||||||
|
+ __u32 hooknum;
|
||||||
|
+ __s32 priority;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ } netfilter;
|
||||||
|
+ struct {
|
||||||
|
+ union {
|
||||||
|
+ __u32 relative_fd;
|
||||||
|
+ __u32 relative_id;
|
||||||
|
+ };
|
||||||
|
+ __u64 expected_revision;
|
||||||
|
+ } tcx;
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 path;
|
||||||
|
+ __aligned_u64 offsets;
|
||||||
|
+ __aligned_u64 ref_ctr_offsets;
|
||||||
|
+ __aligned_u64 cookies;
|
||||||
|
+ __u32 cnt;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 pid;
|
||||||
|
+ } uprobe_multi;
|
||||||
|
+ struct {
|
||||||
|
+ union {
|
||||||
|
+ __u32 relative_fd;
|
||||||
|
+ __u32 relative_id;
|
||||||
|
+ };
|
||||||
|
+ __u64 expected_revision;
|
||||||
|
+ } netkit;
|
||||||
|
};
|
||||||
|
} link_create;
|
||||||
|
|
||||||
|
@@ -1604,6 +1773,11 @@ union bpf_attr {
|
||||||
|
__u32 flags; /* extra flags */
|
||||||
|
} prog_bind_map;
|
||||||
|
|
||||||
|
+ struct { /* struct used by BPF_TOKEN_CREATE command */
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 bpffs_fd;
|
||||||
|
+ } token_create;
|
||||||
|
+
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
/* The description below is an attempt at providing documentation to eBPF
|
||||||
|
@@ -1879,7 +2053,9 @@ union bpf_attr {
|
||||||
|
* performed again, if the helper is used in combination with
|
||||||
|
* direct packet access.
|
||||||
|
* Return
|
||||||
|
- * 0 on success, or a negative error in case of failure.
|
||||||
|
+ * 0 on success, or a negative error in case of failure. Positive
|
||||||
|
+ * error indicates a potential drop or congestion in the target
|
||||||
|
+ * device. The particular positive error codes are not defined.
|
||||||
|
*
|
||||||
|
* u64 bpf_get_current_pid_tgid(void)
|
||||||
|
* Description
|
||||||
|
@@ -2612,8 +2788,8 @@ union bpf_attr {
|
||||||
|
* *bpf_socket* should be one of the following:
|
||||||
|
*
|
||||||
|
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||||
|
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||||
|
- * and **BPF_CGROUP_INET6_CONNECT**.
|
||||||
|
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
|
||||||
|
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
|
||||||
|
*
|
||||||
|
* This helper actually implements a subset of **setsockopt()**.
|
||||||
|
* It supports the following *level*\ s:
|
||||||
|
@@ -2851,8 +3027,8 @@ union bpf_attr {
|
||||||
|
* *bpf_socket* should be one of the following:
|
||||||
|
*
|
||||||
|
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||||
|
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||||
|
- * and **BPF_CGROUP_INET6_CONNECT**.
|
||||||
|
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
|
||||||
|
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
|
||||||
|
*
|
||||||
|
* This helper actually implements a subset of **getsockopt()**.
|
||||||
|
* It supports the same set of *optname*\ s that is supported by
|
||||||
|
@@ -3160,6 +3336,10 @@ union bpf_attr {
|
||||||
|
* **BPF_FIB_LOOKUP_DIRECT**
|
||||||
|
* Do a direct table lookup vs full lookup using FIB
|
||||||
|
* rules.
|
||||||
|
+ * **BPF_FIB_LOOKUP_TBID**
|
||||||
|
+ * Used with BPF_FIB_LOOKUP_DIRECT.
|
||||||
|
+ * Use the routing table ID present in *params*->tbid
|
||||||
|
+ * for the fib lookup.
|
||||||
|
* **BPF_FIB_LOOKUP_OUTPUT**
|
||||||
|
* Perform lookup from an egress perspective (default is
|
||||||
|
* ingress).
|
||||||
|
@@ -3168,6 +3348,11 @@ union bpf_attr {
|
||||||
|
* and *params*->smac will not be set as output. A common
|
||||||
|
* use case is to call **bpf_redirect_neigh**\ () after
|
||||||
|
* doing **bpf_fib_lookup**\ ().
|
||||||
|
+ * **BPF_FIB_LOOKUP_SRC**
|
||||||
|
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
|
||||||
|
+ * for the nexthop. If the src addr cannot be derived,
|
||||||
|
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
|
||||||
|
+ * case, *params*->dmac and *params*->smac are not set either.
|
||||||
|
*
|
||||||
|
* *ctx* is either **struct xdp_md** for XDP programs or
|
||||||
|
* **struct sk_buff** tc cls_act programs.
|
||||||
|
@@ -4137,9 +4322,6 @@ union bpf_attr {
|
||||||
|
* **-EOPNOTSUPP** if the operation is not supported, for example
|
||||||
|
* a call from outside of TC ingress.
|
||||||
|
*
|
||||||
|
- * **-ESOCKTNOSUPPORT** if the socket type is not supported
|
||||||
|
- * (reuseport).
|
||||||
|
- *
|
||||||
|
* long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Helper is overloaded depending on BPF program type. This
|
||||||
|
@@ -4404,6 +4586,8 @@ union bpf_attr {
|
||||||
|
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Return a user or a kernel stack in bpf program provided buffer.
|
||||||
|
+ * Note: the user stack will only be populated if the *task* is
|
||||||
|
+ * the current task; all other tasks will return -EOPNOTSUPP.
|
||||||
|
* To achieve this, the helper needs *task*, which is a valid
|
||||||
|
* pointer to **struct task_struct**. To store the stacktrace, the
|
||||||
|
* bpf program provides *buf* with a nonnegative *size*.
|
||||||
|
@@ -4415,6 +4599,7 @@ union bpf_attr {
|
||||||
|
*
|
||||||
|
* **BPF_F_USER_STACK**
|
||||||
|
* Collect a user space stack instead of a kernel stack.
|
||||||
|
+ * The *task* must be the current task.
|
||||||
|
* **BPF_F_USER_BUILD_ID**
|
||||||
|
* Collect buildid+offset instead of ips for user stack,
|
||||||
|
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||||
|
@@ -4718,9 +4903,9 @@ union bpf_attr {
|
||||||
|
* going through the CPU's backlog queue.
|
||||||
|
*
|
||||||
|
* The *flags* argument is reserved and must be 0. The helper is
|
||||||
|
- * currently only supported for tc BPF program types at the ingress
|
||||||
|
- * hook and for veth device types. The peer device must reside in a
|
||||||
|
- * different network namespace.
|
||||||
|
+ * currently only supported for tc BPF program types at the
|
||||||
|
+ * ingress hook and for veth and netkit target device types. The
|
||||||
|
+ * peer device must reside in a different network namespace.
|
||||||
|
* Return
|
||||||
|
* The helper returns **TC_ACT_REDIRECT** on success or
|
||||||
|
* **TC_ACT_SHOT** on error.
|
||||||
|
@@ -5003,6 +5188,8 @@ union bpf_attr {
|
||||||
|
* **BPF_F_TIMER_ABS**
|
||||||
|
* Start the timer in absolute expire value instead of the
|
||||||
|
* default relative one.
|
||||||
|
+ * **BPF_F_TIMER_CPU_PIN**
|
||||||
|
+ * Timer will be pinned to the CPU of the caller.
|
||||||
|
*
|
||||||
|
* Return
|
||||||
|
* 0 on success.
|
||||||
|
@@ -5022,9 +5209,14 @@ union bpf_attr {
|
||||||
|
* u64 bpf_get_func_ip(void *ctx)
|
||||||
|
* Description
|
||||||
|
* Get address of the traced function (for tracing and kprobe programs).
|
||||||
|
+ *
|
||||||
|
+ * When called for kprobe program attached as uprobe it returns
|
||||||
|
+ * probe address for both entry and return uprobe.
|
||||||
|
+ *
|
||||||
|
* Return
|
||||||
|
- * Address of the traced function.
|
||||||
|
+ * Address of the traced function for kprobe.
|
||||||
|
* 0 for kprobes placed within the function (not at the entry).
|
||||||
|
+ * Address of the probe for uprobe and return uprobe.
|
||||||
|
*
|
||||||
|
* u64 bpf_get_attach_cookie(void *ctx)
|
||||||
|
* Description
|
||||||
|
@@ -6165,6 +6357,19 @@ struct bpf_sock_tuple {
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* (Simplified) user return codes for tcx prog type.
|
||||||
|
+ * A valid tcx program must return one of these defined values. All other
|
||||||
|
+ * return codes are reserved for future use. Must remain compatible with
|
||||||
|
+ * their TC_ACT_* counter-parts. For compatibility in behavior, unknown
|
||||||
|
+ * return codes are mapped to TCX_NEXT.
|
||||||
|
+ */
|
||||||
|
+enum tcx_action_base {
|
||||||
|
+ TCX_NEXT = -1,
|
||||||
|
+ TCX_PASS = 0,
|
||||||
|
+ TCX_DROP = 2,
|
||||||
|
+ TCX_REDIRECT = 7,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
struct bpf_xdp_sock {
|
||||||
|
__u32 queue_id;
|
||||||
|
};
|
||||||
|
@@ -6346,7 +6551,7 @@ struct bpf_map_info {
|
||||||
|
__u32 btf_id;
|
||||||
|
__u32 btf_key_type_id;
|
||||||
|
__u32 btf_value_type_id;
|
||||||
|
- __u32 :32; /* alignment pad */
|
||||||
|
+ __u32 btf_vmlinux_id;
|
||||||
|
__u64 map_extra;
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
@@ -6411,6 +6616,69 @@ struct bpf_link_info {
|
||||||
|
struct {
|
||||||
|
__u32 map_id;
|
||||||
|
} struct_ops;
|
||||||
|
+ struct {
|
||||||
|
+ __u32 pf;
|
||||||
|
+ __u32 hooknum;
|
||||||
|
+ __s32 priority;
|
||||||
|
+ __u32 flags;
|
||||||
|
+ } netfilter;
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 addrs;
|
||||||
|
+ __u32 count; /* in/out: kprobe_multi function count */
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u64 missed;
|
||||||
|
+ __aligned_u64 cookies;
|
||||||
|
+ } kprobe_multi;
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 path;
|
||||||
|
+ __aligned_u64 offsets;
|
||||||
|
+ __aligned_u64 ref_ctr_offsets;
|
||||||
|
+ __aligned_u64 cookies;
|
||||||
|
+ __u32 path_size; /* in/out: real path size on success, including zero byte */
|
||||||
|
+ __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */
|
||||||
|
+ __u32 flags;
|
||||||
|
+ __u32 pid;
|
||||||
|
+ } uprobe_multi;
|
||||||
|
+ struct {
|
||||||
|
+ __u32 type; /* enum bpf_perf_event_type */
|
||||||
|
+ __u32 :32;
|
||||||
|
+ union {
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 file_name; /* in/out */
|
||||||
|
+ __u32 name_len;
|
||||||
|
+ __u32 offset; /* offset from file_name */
|
||||||
|
+ __u64 cookie;
|
||||||
|
+ } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 func_name; /* in/out */
|
||||||
|
+ __u32 name_len;
|
||||||
|
+ __u32 offset; /* offset from func_name */
|
||||||
|
+ __u64 addr;
|
||||||
|
+ __u64 missed;
|
||||||
|
+ __u64 cookie;
|
||||||
|
+ } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
|
||||||
|
+ struct {
|
||||||
|
+ __aligned_u64 tp_name; /* in/out */
|
||||||
|
+ __u32 name_len;
|
||||||
|
+ __u32 :32;
|
||||||
|
+ __u64 cookie;
|
||||||
|
+ } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
|
||||||
|
+ struct {
|
||||||
|
+ __u64 config;
|
||||||
|
+ __u32 type;
|
||||||
|
+ __u32 :32;
|
||||||
|
+ __u64 cookie;
|
||||||
|
+ } event; /* BPF_PERF_EVENT_EVENT */
|
||||||
|
+ };
|
||||||
|
+ } perf_event;
|
||||||
|
+ struct {
|
||||||
|
+ __u32 ifindex;
|
||||||
|
+ __u32 attach_type;
|
||||||
|
+ } tcx;
|
||||||
|
+ struct {
|
||||||
|
+ __u32 ifindex;
|
||||||
|
+ __u32 attach_type;
|
||||||
|
+ } netkit;
|
||||||
|
};
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
@@ -6707,6 +6975,7 @@ enum {
|
||||||
|
BPF_TCP_LISTEN,
|
||||||
|
BPF_TCP_CLOSING, /* Now a valid state */
|
||||||
|
BPF_TCP_NEW_SYN_RECV,
|
||||||
|
+ BPF_TCP_BOUND_INACTIVE,
|
||||||
|
|
||||||
|
BPF_TCP_MAX_STATES /* Leave at the end! */
|
||||||
|
};
|
||||||
|
@@ -6808,6 +7077,8 @@ enum {
|
||||||
|
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
|
||||||
|
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
|
||||||
|
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
|
||||||
|
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
|
||||||
|
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
@@ -6820,6 +7091,7 @@ enum {
|
||||||
|
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
|
||||||
|
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
|
||||||
|
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
|
||||||
|
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_fib_lookup {
|
||||||
|
@@ -6854,6 +7126,9 @@ struct bpf_fib_lookup {
|
||||||
|
__u32 rt_metric;
|
||||||
|
};
|
||||||
|
|
||||||
|
+ /* input: source address to consider for lookup
|
||||||
|
+ * output: source address result from lookup
|
||||||
|
+ */
|
||||||
|
union {
|
||||||
|
__be32 ipv4_src;
|
||||||
|
__u32 ipv6_src[4]; /* in6_addr; network order */
|
||||||
|
@@ -6868,9 +7143,19 @@ struct bpf_fib_lookup {
|
||||||
|
__u32 ipv6_dst[4]; /* in6_addr; network order */
|
||||||
|
};
|
||||||
|
|
||||||
|
- /* output */
|
||||||
|
- __be16 h_vlan_proto;
|
||||||
|
- __be16 h_vlan_TCI;
|
||||||
|
+ union {
|
||||||
|
+ struct {
|
||||||
|
+ /* output */
|
||||||
|
+ __be16 h_vlan_proto;
|
||||||
|
+ __be16 h_vlan_TCI;
|
||||||
|
+ };
|
||||||
|
+ /* input: when accompanied with the
|
||||||
|
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
|
||||||
|
+ * specific routing table to use for the fib lookup.
|
||||||
|
+ */
|
||||||
|
+ __u32 tbid;
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
__u8 smac[6]; /* ETH_ALEN */
|
||||||
|
__u8 dmac[6]; /* ETH_ALEN */
|
||||||
|
};
|
||||||
|
@@ -6956,38 +7241,31 @@ struct bpf_spin_lock {
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_timer {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[2];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_dynptr {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[2];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_list_head {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[2];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_list_node {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[3];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_rb_root {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[2];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_rb_node {
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
- __u64 :64;
|
||||||
|
+ __u64 __opaque[4];
|
||||||
|
} __attribute__((aligned(8)));
|
||||||
|
|
||||||
|
struct bpf_refcount {
|
||||||
|
- __u32 :32;
|
||||||
|
+ __u32 __opaque[1];
|
||||||
|
} __attribute__((aligned(4)));
|
||||||
|
|
||||||
|
struct bpf_sysctl {
|
||||||
|
@@ -7143,9 +7421,11 @@ struct bpf_core_relo {
|
||||||
|
* Flags to control bpf_timer_start() behaviour.
|
||||||
|
* - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
|
||||||
|
* relative to current time.
|
||||||
|
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
|
||||||
|
*/
|
||||||
|
enum {
|
||||||
|
BPF_F_TIMER_ABS = (1ULL << 0),
|
||||||
|
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
|
||||||
|
};
|
||||||
|
|
||||||
|
/* BPF numbers iterator state */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
@ -1,31 +0,0 @@
|
|||||||
From 0973fd70c1c50e57a3db0b09e239b1d1fd3f1c55 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Fri, 21 Jul 2023 16:10:18 +0200
|
|
||||||
Subject: [PATCH] Use bpf_obj_get_info_by_fd() instead of
|
|
||||||
bpf_btf_get_info_by_fd()
|
|
||||||
|
|
||||||
The libbpf version in rawhide doesn't have the typed
|
|
||||||
bpf_*_get_info_by_fd().
|
|
||||||
---
|
|
||||||
src/cc/libbpf.c | 4 ++--
|
|
||||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
|
|
||||||
index 360fd81d..a3e34da2 100644
|
|
||||||
--- a/src/cc/libbpf.c
|
|
||||||
+++ b/src/cc/libbpf.c
|
|
||||||
@@ -727,9 +727,9 @@ static int find_btf_id(const char *module_name, const char *func_name,
|
|
||||||
info.name = ptr_to_u64(name);
|
|
||||||
info.name_len = sizeof(name);
|
|
||||||
|
|
||||||
- err = bpf_btf_get_info_by_fd(fd, &info, &len);
|
|
||||||
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
|
|
||||||
if (err) {
|
|
||||||
- fprintf(stderr, "bpf_btf_get_info_by_fd failed: %d\n", err);
|
|
||||||
+ fprintf(stderr, "bpf_obj_get_info_by_fd failed: %d\n", err);
|
|
||||||
goto err_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
18
bcc.spec
18
bcc.spec
@ -10,7 +10,7 @@
|
|||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifarch x86_64 ppc64 ppc64le aarch64
|
%ifarch x86_64 ppc64 ppc64le aarch64 s390x
|
||||||
%bcond_without libbpf_tools
|
%bcond_without libbpf_tools
|
||||||
%else
|
%else
|
||||||
%bcond_with libbpf_tools
|
%bcond_with libbpf_tools
|
||||||
@ -24,16 +24,15 @@
|
|||||||
|
|
||||||
|
|
||||||
Name: bcc
|
Name: bcc
|
||||||
Version: 0.28.0
|
Version: 0.29.1
|
||||||
Release: 3%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: BPF Compiler Collection (BCC)
|
Summary: BPF Compiler Collection (BCC)
|
||||||
License: Apache-2.0
|
License: Apache-2.0
|
||||||
URL: https://github.com/iovisor/bcc
|
URL: https://github.com/iovisor/bcc
|
||||||
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
|
Source0: %{url}/archive/v%{version}/%{name}-%{version}.tar.gz
|
||||||
Patch0: Use-bpf_obj_get_info_by_fd-instead-of-bpf_btf_get_in.patch
|
Patch0: libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch
|
||||||
Patch1: libbpf-tools-add-block_io_-start-done-tracepoints-su.patch
|
Patch1: Fix-ttysnoop.py-with-newer-kernels.patch
|
||||||
Patch2: tools-Add-support-for-the-new-block_io_-tracepoints.patch
|
Patch2: Sync-with-latest-libbpf-repo-4889.patch
|
||||||
Patch3: tool-slabratetop-add-definition-of-freelist_aba_t.patch
|
|
||||||
|
|
||||||
# Arches will be included as upstream support is added and dependencies are
|
# Arches will be included as upstream support is added and dependencies are
|
||||||
# satisfied in the respective arches
|
# satisfied in the respective arches
|
||||||
@ -242,6 +241,11 @@ cp -a libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}/
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Feb 05 2024 Jerome Marchand <jmarchan@redhat.com> - 0.29.1-1
|
||||||
|
- Rebase to the latest release version (#2253688)
|
||||||
|
- Enable libbpf-tools on s390x (#2249458)
|
||||||
|
- Misc 0.29.1 fixes
|
||||||
|
|
||||||
* Tue Jan 23 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.28.0-3
|
* Tue Jan 23 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.28.0-3
|
||||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
|
- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild
|
||||||
|
|
||||||
|
114
libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch
Normal file
114
libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
From abf7b251c1461dcbe0c1e75d1d0da71662c9fae1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Hengqi Chen <hengqi.chen@gmail.com>
|
||||||
|
Date: Sun, 17 Dec 2023 11:27:10 +0000
|
||||||
|
Subject: [PATCH] libbpf-tools: Fix bindsnoop for kernel v6.6+
|
||||||
|
|
||||||
|
The freebind field in struct inet_sock gone in recent kernel
|
||||||
|
versions due to some kernel refactor works ([0]). The change
|
||||||
|
breaks the bindsnoop tool. Fix it in a CO-RE way.
|
||||||
|
|
||||||
|
This should close #4838.
|
||||||
|
|
||||||
|
[0]: https://lore.kernel.org/all/20230816081547.1272409-1-edumazet@google.com/
|
||||||
|
|
||||||
|
Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
|
||||||
|
---
|
||||||
|
libbpf-tools/bindsnoop.bpf.c | 8 +++--
|
||||||
|
libbpf-tools/core_fixes.bpf.h | 56 +++++++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 61 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libbpf-tools/bindsnoop.bpf.c b/libbpf-tools/bindsnoop.bpf.c
|
||||||
|
index 41dce942..ead19c67 100644
|
||||||
|
--- a/libbpf-tools/bindsnoop.bpf.c
|
||||||
|
+++ b/libbpf-tools/bindsnoop.bpf.c
|
||||||
|
@@ -5,7 +5,9 @@
|
||||||
|
#include <bpf/bpf_core_read.h>
|
||||||
|
#include <bpf/bpf_tracing.h>
|
||||||
|
#include <bpf/bpf_endian.h>
|
||||||
|
+
|
||||||
|
#include "bindsnoop.h"
|
||||||
|
+#include "core_fixes.bpf.h"
|
||||||
|
|
||||||
|
#define MAX_ENTRIES 10240
|
||||||
|
#define MAX_PORTS 1024
|
||||||
|
@@ -85,9 +87,9 @@ static int probe_exit(struct pt_regs *ctx, short ver)
|
||||||
|
if (filter_by_port && !port)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
- opts.fields.freebind = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, freebind);
|
||||||
|
- opts.fields.transparent = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, transparent);
|
||||||
|
- opts.fields.bind_address_no_port = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, bind_address_no_port);
|
||||||
|
+ opts.fields.freebind = get_inet_sock_freebind(inet_sock);
|
||||||
|
+ opts.fields.transparent = get_inet_sock_transparent(inet_sock);
|
||||||
|
+ opts.fields.bind_address_no_port = get_inet_sock_bind_address_no_port(inet_sock);
|
||||||
|
opts.fields.reuseaddress = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuse);
|
||||||
|
opts.fields.reuseport = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuseport);
|
||||||
|
event.opts = opts.data;
|
||||||
|
diff --git a/libbpf-tools/core_fixes.bpf.h b/libbpf-tools/core_fixes.bpf.h
|
||||||
|
index 84cb7f18..a4c84c02 100644
|
||||||
|
--- a/libbpf-tools/core_fixes.bpf.h
|
||||||
|
+++ b/libbpf-tools/core_fixes.bpf.h
|
||||||
|
@@ -249,4 +249,60 @@ static __always_inline __u64 get_sock_ident(struct sock *sk)
|
||||||
|
return (__u64)sk;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/**
|
||||||
|
+ * During kernel 6.6 development cycle, several bitfields in struct inet_sock gone,
|
||||||
|
+ * they are placed in inet_sock::inet_flags instead ([0]).
|
||||||
|
+ *
|
||||||
|
+ * References:
|
||||||
|
+ * [0]: https://lore.kernel.org/all/20230816081547.1272409-1-edumazet@google.com/
|
||||||
|
+ */
|
||||||
|
+struct inet_sock___o {
|
||||||
|
+ __u8 freebind: 1;
|
||||||
|
+ __u8 transparent: 1;
|
||||||
|
+ __u8 bind_address_no_port: 1;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+enum {
|
||||||
|
+ INET_FLAGS_FREEBIND___x = 11,
|
||||||
|
+ INET_FLAGS_TRANSPARENT___x = 15,
|
||||||
|
+ INET_FLAGS_BIND_ADDRESS_NO_PORT___x = 18,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct inet_sock___x {
|
||||||
|
+ unsigned long inet_flags;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static __always_inline __u8 get_inet_sock_freebind(void *inet_sock)
|
||||||
|
+{
|
||||||
|
+ unsigned long inet_flags;
|
||||||
|
+
|
||||||
|
+ if (bpf_core_field_exists(struct inet_sock___o, freebind))
|
||||||
|
+ return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, freebind);
|
||||||
|
+
|
||||||
|
+ inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
|
||||||
|
+ return (1 << INET_FLAGS_FREEBIND___x) & inet_flags ? 1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static __always_inline __u8 get_inet_sock_transparent(void *inet_sock)
|
||||||
|
+{
|
||||||
|
+ unsigned long inet_flags;
|
||||||
|
+
|
||||||
|
+ if (bpf_core_field_exists(struct inet_sock___o, transparent))
|
||||||
|
+ return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, transparent);
|
||||||
|
+
|
||||||
|
+ inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
|
||||||
|
+ return (1 << INET_FLAGS_TRANSPARENT___x) & inet_flags ? 1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static __always_inline __u8 get_inet_sock_bind_address_no_port(void *inet_sock)
|
||||||
|
+{
|
||||||
|
+ unsigned long inet_flags;
|
||||||
|
+
|
||||||
|
+ if (bpf_core_field_exists(struct inet_sock___o, bind_address_no_port))
|
||||||
|
+ return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, bind_address_no_port);
|
||||||
|
+
|
||||||
|
+ inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
|
||||||
|
+ return (1 << INET_FLAGS_BIND_ADDRESS_NO_PORT___x) & inet_flags ? 1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#endif /* __CORE_FIXES_BPF_H */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
@ -1,476 +0,0 @@
|
|||||||
From e1dfbe2d09583205acca1d1b5b09caefb460f2fd Mon Sep 17 00:00:00 2001
|
|
||||||
From: mickey_zhu <mickey_zhu@realsil.com.cn>
|
|
||||||
Date: Tue, 27 Jun 2023 16:32:44 +0800
|
|
||||||
Subject: [PATCH 1/2] libbpf-tools: add block_io_{start,done} tracepoints
|
|
||||||
support to bio tools
|
|
||||||
|
|
||||||
Some bio tools fail to kprobe blk_account_io_{start,done} after v5.17,
|
|
||||||
because they become inlined, see [0]. To fix this issue, tracepoints
|
|
||||||
blick_io_{start,done} are introcuded in kernel, see[1].
|
|
||||||
|
|
||||||
Update related bio tools to support new tracepoints, and also simplify
|
|
||||||
attach.
|
|
||||||
|
|
||||||
[0] Kernel commit 450b7879e345 (block: move blk_account_io_{start,done} to blk-mq.c)
|
|
||||||
[1] Kernel commit 5a80bd075f3b (block: introduce block_io_start/block_io_done tracepoints)
|
|
||||||
|
|
||||||
Change-Id: I62b957abd7ce2901eb114bd57c78938e4f083e4d
|
|
||||||
Signed-off-by: Mickey Zhu <mickey_zhu@realsil.com.cn>
|
|
||||||
---
|
|
||||||
libbpf-tools/biosnoop.bpf.c | 9 ++++
|
|
||||||
libbpf-tools/biosnoop.c | 78 +++++++++++++--------------------
|
|
||||||
libbpf-tools/biostacks.bpf.c | 46 +++++++++++++------
|
|
||||||
libbpf-tools/biostacks.c | 85 +++++++++++++++++++++---------------
|
|
||||||
libbpf-tools/biotop.bpf.c | 44 +++++++++++++++++--
|
|
||||||
libbpf-tools/biotop.c | 59 ++++++++++++++++---------
|
|
||||||
6 files changed, 199 insertions(+), 122 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biosnoop.bpf.c b/libbpf-tools/biosnoop.bpf.c
|
|
||||||
index b791555f..fcc5c5ce 100644
|
|
||||||
--- a/libbpf-tools/biosnoop.bpf.c
|
|
||||||
+++ b/libbpf-tools/biosnoop.bpf.c
|
|
||||||
@@ -76,6 +76,15 @@ int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
return trace_pid(rq);
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+ return trace_pid(rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
{
|
|
||||||
diff --git a/libbpf-tools/biosnoop.c b/libbpf-tools/biosnoop.c
|
|
||||||
index 21773729..f9468900 100644
|
|
||||||
--- a/libbpf-tools/biosnoop.c
|
|
||||||
+++ b/libbpf-tools/biosnoop.c
|
|
||||||
@@ -212,6 +212,16 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
|
|
||||||
fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ if (fentry_can_attach("blk_account_io_start", NULL))
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "blk_account_io_start");
|
|
||||||
+ else
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "__blk_account_io_start");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
const struct partition *partition;
|
|
||||||
@@ -260,12 +270,23 @@ int main(int argc, char **argv)
|
|
||||||
obj->rodata->filter_cg = env.cg;
|
|
||||||
obj->rodata->min_ns = env.min_lat_ms * 1000000;
|
|
||||||
|
|
||||||
- if (fentry_can_attach("blk_account_io_start", NULL))
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "blk_account_io_start");
|
|
||||||
- else
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "__blk_account_io_start");
|
|
||||||
+ if (tracepoint_exists("block", "block_io_start"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ else {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ blk_account_io_set_attach_target(obj);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ ksyms = ksyms__load();
|
|
||||||
+ if (!ksyms) {
|
|
||||||
+ fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
+ goto cleanup;
|
|
||||||
+ }
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
|
|
||||||
+
|
|
||||||
+ if (!env.queued)
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_rq_insert, false);
|
|
||||||
|
|
||||||
err = biosnoop_bpf__load(obj);
|
|
||||||
if (err) {
|
|
||||||
@@ -288,48 +309,9 @@ int main(int argc, char **argv)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- ksyms = ksyms__load();
|
|
||||||
- if (!ksyms) {
|
|
||||||
- err = -ENOMEM;
|
|
||||||
- fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
|
|
||||||
- obj->links.blk_account_io_merge_bio =
|
|
||||||
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
|
|
||||||
- if (!obj->links.blk_account_io_merge_bio) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- if (env.queued) {
|
|
||||||
- obj->links.block_rq_insert =
|
|
||||||
- bpf_program__attach(obj->progs.block_rq_insert);
|
|
||||||
- if (!obj->links.block_rq_insert) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
|
|
||||||
- if (!obj->links.block_rq_issue) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
- obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
|
|
||||||
- if (!obj->links.block_rq_complete) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
|
|
||||||
+ err = biosnoop_bpf__attach(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c
|
|
||||||
index c3950910..0ca69880 100644
|
|
||||||
--- a/libbpf-tools/biostacks.bpf.c
|
|
||||||
+++ b/libbpf-tools/biostacks.bpf.c
|
|
||||||
@@ -67,20 +67,8 @@ int trace_start(void *ctx, struct request *rq, bool merge_bio)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-SEC("fentry/blk_account_io_start")
|
|
||||||
-int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
-{
|
|
||||||
- return trace_start(ctx, rq, false);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
-int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
-{
|
|
||||||
- return trace_start(ctx, rq, true);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-SEC("fentry/blk_account_io_done")
|
|
||||||
-int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_done(void *ctx, struct request *rq)
|
|
||||||
{
|
|
||||||
u64 slot, ts = bpf_ktime_get_ns();
|
|
||||||
struct internal_rqinfo *i_rqinfop;
|
|
||||||
@@ -110,4 +98,34 @@ int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("kprobe/blk_account_io_merge_bio")
|
|
||||||
+int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, true);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("fentry/blk_account_io_start")
|
|
||||||
+int BPF_PROG(blk_account_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("fentry/blk_account_io_done")
|
|
||||||
+int BPF_PROG(blk_account_io_done, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_done(ctx, rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_start(ctx, rq, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_done")
|
|
||||||
+int BPF_PROG(block_io_done, struct request *rq)
|
|
||||||
+{
|
|
||||||
+ return trace_done(ctx, rq);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
char LICENSE[] SEC("license") = "GPL";
|
|
||||||
diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c
|
|
||||||
index e1878d1f..e7875f76 100644
|
|
||||||
--- a/libbpf-tools/biostacks.c
|
|
||||||
+++ b/libbpf-tools/biostacks.c
|
|
||||||
@@ -128,6 +128,39 @@ void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool has_block_io_tracepoints(void)
|
|
||||||
+{
|
|
||||||
+ return tracepoint_exists("block", "block_io_start") &&
|
|
||||||
+ tracepoint_exists("block", "block_io_done");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_block_io_tracepoints(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_blk_account_io_fentry(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void blk_account_io_set_attach_target(struct biostacks_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ if (fentry_can_attach("blk_account_io_start", NULL)) {
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "blk_account_io_start");
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
|
|
||||||
+ 0, "blk_account_io_done");
|
|
||||||
+ } else {
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_start,
|
|
||||||
+ 0, "__blk_account_io_start");
|
|
||||||
+ bpf_program__set_attach_target(obj->progs.blk_account_io_done,
|
|
||||||
+ 0, "__blk_account_io_done");
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct partitions *partitions = NULL;
|
|
||||||
@@ -172,50 +205,30 @@ int main(int argc, char **argv)
|
|
||||||
|
|
||||||
obj->rodata->targ_ms = env.milliseconds;
|
|
||||||
|
|
||||||
- if (fentry_can_attach("blk_account_io_start", NULL)) {
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "blk_account_io_start");
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
|
|
||||||
- "blk_account_io_done");
|
|
||||||
- } else {
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
|
|
||||||
- "__blk_account_io_start");
|
|
||||||
- bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
|
|
||||||
- "__blk_account_io_done");
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- err = biostacks_bpf__load(obj);
|
|
||||||
- if (err) {
|
|
||||||
- fprintf(stderr, "failed to load BPF object: %d\n", err);
|
|
||||||
- goto cleanup;
|
|
||||||
+ if (has_block_io_tracepoints())
|
|
||||||
+ disable_blk_account_io_fentry(obj);
|
|
||||||
+ else {
|
|
||||||
+ disable_block_io_tracepoints(obj);
|
|
||||||
+ blk_account_io_set_attach_target(obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
ksyms = ksyms__load();
|
|
||||||
if (!ksyms) {
|
|
||||||
fprintf(stderr, "failed to load kallsyms\n");
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
- if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
|
|
||||||
- obj->links.blk_account_io_merge_bio =
|
|
||||||
- bpf_program__attach(obj->progs.blk_account_io_merge_bio);
|
|
||||||
- if (!obj->links.blk_account_io_merge_bio) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
|
|
||||||
+
|
|
||||||
+ err = biostacks_bpf__load(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to load BPF object: %d\n", err);
|
|
||||||
+ goto cleanup;
|
|
||||||
}
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach(obj->progs.blk_account_io_done);
|
|
||||||
- if (!obj->links.blk_account_io_done) {
|
|
||||||
- err = -errno;
|
|
||||||
- fprintf(stderr, "failed to attach blk_account_io_done: %s\n",
|
|
||||||
- strerror(-err));
|
|
||||||
+
|
|
||||||
+ err = biostacks_bpf__attach(obj);
|
|
||||||
+ if (err) {
|
|
||||||
+ fprintf(stderr, "failed to attach BPF programs: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/libbpf-tools/biotop.bpf.c b/libbpf-tools/biotop.bpf.c
|
|
||||||
index 226e32d3..07631378 100644
|
|
||||||
--- a/libbpf-tools/biotop.bpf.c
|
|
||||||
+++ b/libbpf-tools/biotop.bpf.c
|
|
||||||
@@ -30,8 +30,8 @@ struct {
|
|
||||||
__type(value, struct val_t);
|
|
||||||
} counts SEC(".maps");
|
|
||||||
|
|
||||||
-SEC("kprobe")
|
|
||||||
-int BPF_KPROBE(blk_account_io_start, struct request *req)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_start(struct request *req)
|
|
||||||
{
|
|
||||||
struct who_t who = {};
|
|
||||||
|
|
||||||
@@ -56,8 +56,8 @@ int BPF_KPROBE(blk_mq_start_request, struct request *req)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
-SEC("kprobe")
|
|
||||||
-int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
|
|
||||||
+static __always_inline
|
|
||||||
+int trace_done(struct request *req)
|
|
||||||
{
|
|
||||||
struct val_t *valp, zero = {};
|
|
||||||
struct info_t info = {};
|
|
||||||
@@ -103,4 +103,40 @@ int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+SEC("kprobe/blk_account_io_start")
|
|
||||||
+int BPF_KPROBE(blk_account_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/blk_account_io_done")
|
|
||||||
+int BPF_KPROBE(blk_account_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/__blk_account_io_start")
|
|
||||||
+int BPF_KPROBE(__blk_account_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("kprobe/__blk_account_io_done")
|
|
||||||
+int BPF_KPROBE(__blk_account_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_start")
|
|
||||||
+int BPF_PROG(block_io_start, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_start(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+SEC("tp_btf/block_io_done")
|
|
||||||
+int BPF_PROG(block_io_done, struct request *req)
|
|
||||||
+{
|
|
||||||
+ return trace_done(req);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
char LICENSE[] SEC("license") = "GPL";
|
|
||||||
diff --git a/libbpf-tools/biotop.c b/libbpf-tools/biotop.c
|
|
||||||
index 75484281..5b3a7cf3 100644
|
|
||||||
--- a/libbpf-tools/biotop.c
|
|
||||||
+++ b/libbpf-tools/biotop.c
|
|
||||||
@@ -354,6 +354,38 @@ static int print_stat(struct biotop_bpf *obj)
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool has_block_io_tracepoints(void)
|
|
||||||
+{
|
|
||||||
+ return tracepoint_exists("block", "block_io_start") &&
|
|
||||||
+ tracepoint_exists("block", "block_io_done");
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_block_io_tracepoints(struct biotop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.block_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
|
|
||||||
+{
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void blk_account_io_set_autoload(struct biotop_bpf *obj,
|
|
||||||
+ struct ksyms *ksyms)
|
|
||||||
+{
|
|
||||||
+ if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
|
|
||||||
+ } else {
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
|
|
||||||
+ bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
static const struct argp argp = {
|
|
||||||
@@ -386,32 +418,19 @@ int main(int argc, char **argv)
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ if (has_block_io_tracepoints())
|
|
||||||
+ disable_blk_account_io_kprobes(obj);
|
|
||||||
+ else {
|
|
||||||
+ disable_block_io_tracepoints(obj);
|
|
||||||
+ blk_account_io_set_autoload(obj, ksyms);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
err = biotop_bpf__load(obj);
|
|
||||||
if (err) {
|
|
||||||
warn("failed to load BPF object: %d\n", err);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (ksyms__get_symbol(ksyms, "__blk_account_io_start"))
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "__blk_account_io_start");
|
|
||||||
- else
|
|
||||||
- obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "blk_account_io_start");
|
|
||||||
-
|
|
||||||
- if (!obj->links.blk_account_io_start) {
|
|
||||||
- warn("failed to load attach blk_account_io_start\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if (ksyms__get_symbol(ksyms, "__blk_account_io_done"))
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "__blk_account_io_done");
|
|
||||||
- else
|
|
||||||
- obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "blk_account_io_done");
|
|
||||||
-
|
|
||||||
- if (!obj->links.blk_account_io_done) {
|
|
||||||
- warn("failed to load attach blk_account_io_done\n");
|
|
||||||
- goto cleanup;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
err = biotop_bpf__attach(obj);
|
|
||||||
if (err) {
|
|
||||||
warn("failed to attach BPF programs: %d\n", err);
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
2
sources
2
sources
@ -1 +1 @@
|
|||||||
SHA512 (bcc-0.28.0.tar.gz) = 792ce93dba64b1f87390b2602dcaeba04ac8b2863652b06eb9a907b93bc6137a944b856cc6fa9c7a38671c89814740967561ca4f3b29c267babca7dc5e78aa02
|
SHA512 (bcc-0.29.1.tar.gz) = 9e60130ea602e19e6c6f88a8c17023cea5daf4c5bcc7af8816e9f5c662341136eb449a3fdf870ffad215495ac3bf895115c0d968d92ce79ebe2899b3e2464d24
|
||||||
|
@ -1,55 +0,0 @@
|
|||||||
From 59a1fccfc78482af189150b7937b21244f34e48a Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Thu, 3 Aug 2023 16:11:50 +0200
|
|
||||||
Subject: [PATCH] tool/slabratetop: add definition of freelist_aba_t
|
|
||||||
|
|
||||||
With recent kernel containing the commit 6801be4f2653 ("slub: Replace
|
|
||||||
cmpxchg_double()"), slabratetop fails to compiles with the following
|
|
||||||
error:
|
|
||||||
|
|
||||||
In file included from /virtual/main.c:86:
|
|
||||||
include/linux/slub_def.h:56:3: error: unknown type name 'freelist_aba_t'
|
|
||||||
freelist_aba_t freelist_tid;
|
|
||||||
^
|
|
||||||
2 warnings and 1 error generated.
|
|
||||||
Traceback (most recent call last):
|
|
||||||
File "/usr/share/bcc/tools/slabratetop", line 187, in <module>
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
^^^^^^^^^^^^^^^^^^
|
|
||||||
File "/usr/lib/python3.12/site-packages/bcc/__init__.py", line 479, in __init__
|
|
||||||
raise Exception("Failed to compile BPF module %s" % (src_file or "<text>"))
|
|
||||||
Exception: Failed to compile BPF module <text>
|
|
||||||
|
|
||||||
Adding the definition of freelist_aba_t fixes the issue.
|
|
||||||
---
|
|
||||||
tools/slabratetop.py | 14 ++++++++++++++
|
|
||||||
1 file changed, 14 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/tools/slabratetop.py b/tools/slabratetop.py
|
|
||||||
index 8fbcac5e..8a7d486e 100755
|
|
||||||
--- a/tools/slabratetop.py
|
|
||||||
+++ b/tools/slabratetop.py
|
|
||||||
@@ -141,6 +141,20 @@ static inline void *slab_address(const struct slab *slab)
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
+#ifdef CONFIG_64BIT
|
|
||||||
+typedef __uint128_t freelist_full_t;
|
|
||||||
+#else
|
|
||||||
+typedef u64 freelist_full_t;
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+typedef union {
|
|
||||||
+ struct {
|
|
||||||
+ void *freelist;
|
|
||||||
+ unsigned long counter;
|
|
||||||
+ };
|
|
||||||
+ freelist_full_t full;
|
|
||||||
+} freelist_aba_t;
|
|
||||||
+
|
|
||||||
#ifdef CONFIG_SLUB
|
|
||||||
#include <linux/slub_def.h>
|
|
||||||
#else
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
@ -1,855 +0,0 @@
|
|||||||
From 53ef33b5ad42e6a4baa37821119199f2d846beff Mon Sep 17 00:00:00 2001
|
|
||||||
From: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
Date: Thu, 27 Jul 2023 18:19:18 +0200
|
|
||||||
Subject: [PATCH 2/2] tools: Add support for the new block_io_* tracepoints
|
|
||||||
|
|
||||||
The bio tools currently depends on blk_account_io_done/start functions
|
|
||||||
that can be inlined. To fix that, a couple of tracepoints have been
|
|
||||||
added upstream (block:block_io_start/done). This patch add the support
|
|
||||||
for those tracepoints when they are available.
|
|
||||||
|
|
||||||
Unfortunately, the bio tools relies on data that is not available to
|
|
||||||
the tracepoints (mostly the struct request). So the tracepoints can't
|
|
||||||
be used as drop in replacement for blk_account_io_*. Main difference,
|
|
||||||
is that we can't use the struct request as the hash key anymore, so it
|
|
||||||
now uses the couple (dev_t, sector) for that purpose.
|
|
||||||
|
|
||||||
For the biolatency tool, the -F option is disabled when only the
|
|
||||||
tracepoints are available because the flags are not all accessible
|
|
||||||
from the tracepoints. Otherwise, all features of the tools should
|
|
||||||
remain.
|
|
||||||
|
|
||||||
Closes #4261
|
|
||||||
|
|
||||||
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
|
|
||||||
---
|
|
||||||
tools/biolatency.py | 166 ++++++++++++++++++++++++++++--------
|
|
||||||
tools/biosnoop.py | 200 +++++++++++++++++++++++++++++++++-----------
|
|
||||||
tools/biotop.py | 108 +++++++++++++++++++-----
|
|
||||||
3 files changed, 371 insertions(+), 103 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/tools/biolatency.py b/tools/biolatency.py
|
|
||||||
index 8fe43a7c..03b48a4c 100755
|
|
||||||
--- a/tools/biolatency.py
|
|
||||||
+++ b/tools/biolatency.py
|
|
||||||
@@ -11,6 +11,7 @@
|
|
||||||
#
|
|
||||||
# 20-Sep-2015 Brendan Gregg Created this.
|
|
||||||
# 31-Mar-2022 Rocky Xing Added disk filter support.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -72,7 +73,7 @@ bpf_text = """
|
|
||||||
#include <linux/blk-mq.h>
|
|
||||||
|
|
||||||
typedef struct disk_key {
|
|
||||||
- char disk[DISK_NAME_LEN];
|
|
||||||
+ dev_t dev;
|
|
||||||
u64 slot;
|
|
||||||
} disk_key_t;
|
|
||||||
|
|
||||||
@@ -86,26 +87,70 @@ typedef struct ext_val {
|
|
||||||
u64 count;
|
|
||||||
} ext_val_t;
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *);
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct start_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 _pad;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ CMD_FLAGS
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+BPF_HASH(start, struct start_key);
|
|
||||||
STORAGE
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
-int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_start(struct start_key key)
|
|
||||||
{
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
u64 ts = bpf_ktime_get_ns();
|
|
||||||
- start.update(&req, &ts);
|
|
||||||
+ start.update(&key, &ts);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ SET_FLAGS
|
|
||||||
+
|
|
||||||
+ return __trace_req_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// output
|
|
||||||
-int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_done(struct start_key key)
|
|
||||||
{
|
|
||||||
u64 *tsp, delta;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- tsp = start.lookup(&req);
|
|
||||||
+ tsp = start.lookup(&key);
|
|
||||||
if (tsp == 0) {
|
|
||||||
return 0; // missed issue
|
|
||||||
}
|
|
||||||
@@ -116,9 +161,31 @@ int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
// store as histogram
|
|
||||||
STORE
|
|
||||||
|
|
||||||
- start.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_done(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ SET_FLAGS
|
|
||||||
+
|
|
||||||
+ return __trace_req_done(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_done_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct start_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_done(key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
|
|
||||||
# code substitutions
|
|
||||||
@@ -134,21 +201,18 @@ store_str = ""
|
|
||||||
if args.disks:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
|
|
||||||
disks_str = """
|
|
||||||
- disk_key_t key = {.slot = bpf_log2l(delta)};
|
|
||||||
- void *__tmp = (void *)req->__RQ_DISK__->disk_name;
|
|
||||||
- bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
|
|
||||||
- dist.atomic_increment(key);
|
|
||||||
+ disk_key_t dkey = {};
|
|
||||||
+ dkey.dev = key.dev;
|
|
||||||
+ dkey.slot = bpf_log2l(delta);
|
|
||||||
+ dist.atomic_increment(dkey);
|
|
||||||
"""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
|
|
||||||
- else:
|
|
||||||
- store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
|
|
||||||
+ store_str += disks_str
|
|
||||||
elif args.flags:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
|
|
||||||
store_str += """
|
|
||||||
- flag_key_t key = {.slot = bpf_log2l(delta)};
|
|
||||||
- key.flags = req->cmd_flags;
|
|
||||||
- dist.atomic_increment(key);
|
|
||||||
+ flag_key_t fkey = {.slot = bpf_log2l(delta)};
|
|
||||||
+ fkey.flags = key.flags;
|
|
||||||
+ dist.atomic_increment(fkey);
|
|
||||||
"""
|
|
||||||
else:
|
|
||||||
storage_str += "BPF_HISTOGRAM(dist);"
|
|
||||||
@@ -161,21 +225,13 @@ store_str = ""
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
stat_info = os.stat(disk_path)
|
|
||||||
- major = os.major(stat_info.st_rdev)
|
|
||||||
- minor = os.minor(stat_info.st_rdev)
|
|
||||||
-
|
|
||||||
- disk_field_str = ""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- disk_field_str = 'req->rq_disk'
|
|
||||||
- else:
|
|
||||||
- disk_field_str = 'req->q->disk'
|
|
||||||
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
|
|
||||||
|
|
||||||
disk_filter_str = """
|
|
||||||
- struct gendisk *disk = %s;
|
|
||||||
- if (!(disk->major == %d && disk->first_minor == %d)) {
|
|
||||||
+ if(key.dev != %s) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
- """ % (disk_field_str, major, minor)
|
|
||||||
+ """ % (dev)
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
|
|
||||||
else:
|
|
||||||
@@ -194,6 +250,16 @@ store_str = ""
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace("STORAGE", storage_str)
|
|
||||||
bpf_text = bpf_text.replace("STORE", store_str)
|
|
||||||
+if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
|
|
||||||
+else:
|
|
||||||
+ bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
|
|
||||||
+if args.flags:
|
|
||||||
+ bpf_text = bpf_text.replace('CMD_FLAGS', 'u64 flags;')
|
|
||||||
+ bpf_text = bpf_text.replace('SET_FLAGS', 'key.flags = req->cmd_flags;')
|
|
||||||
+else:
|
|
||||||
+ bpf_text = bpf_text.replace('CMD_FLAGS', '')
|
|
||||||
+ bpf_text = bpf_text.replace('SET_FLAGS', '')
|
|
||||||
|
|
||||||
if debug or args.ebpf:
|
|
||||||
print(bpf_text)
|
|
||||||
@@ -205,25 +271,53 @@ b = BPF(text=bpf_text)
|
|
||||||
if args.queued:
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
|
|
||||||
- else:
|
|
||||||
+ elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
|
|
||||||
+ else:
|
|
||||||
+ if args.flags:
|
|
||||||
+ # Some flags are accessible in the rwbs field (RAHEAD, SYNC and META)
|
|
||||||
+ # but other aren't. Disable the -F option for tracepoint for now.
|
|
||||||
+ print("ERROR: blk_account_io_start probe not available. Can't use -F.")
|
|
||||||
+ exit()
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_req_start_tp")
|
|
||||||
else:
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
+
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
|
|
||||||
+else:
|
|
||||||
+ if args.flags:
|
|
||||||
+ print("ERROR: blk_account_io_done probe not available. Can't use -F.")
|
|
||||||
+ exit()
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_done_tp")
|
|
||||||
+
|
|
||||||
|
|
||||||
if not args.json:
|
|
||||||
print("Tracing block device I/O... Hit Ctrl-C to end.")
|
|
||||||
|
|
||||||
-def disk_print(s):
|
|
||||||
- disk = s.decode('utf-8', 'replace')
|
|
||||||
- if not disk:
|
|
||||||
- disk = "<unknown>"
|
|
||||||
- return disk
|
|
||||||
+# cache disk major,minor -> diskname
|
|
||||||
+diskstats = "/proc/diskstats"
|
|
||||||
+disklookup = {}
|
|
||||||
+with open(diskstats) as stats:
|
|
||||||
+ for line in stats:
|
|
||||||
+ a = line.split()
|
|
||||||
+ disklookup[a[0] + "," + a[1]] = a[2]
|
|
||||||
+
|
|
||||||
+def disk_print(d):
|
|
||||||
+ major = d >> 20
|
|
||||||
+ minor = d & ((1 << 20) - 1)
|
|
||||||
+
|
|
||||||
+ disk = str(major) + "," + str(minor)
|
|
||||||
+ if disk in disklookup:
|
|
||||||
+ diskname = disklookup[disk]
|
|
||||||
+ else:
|
|
||||||
+ diskname = "?"
|
|
||||||
+
|
|
||||||
+ return diskname
|
|
||||||
|
|
||||||
# see blk_fill_rwbs():
|
|
||||||
req_opf = {
|
|
||||||
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
|
|
||||||
index 33703233..f0fef98b 100755
|
|
||||||
--- a/tools/biosnoop.py
|
|
||||||
+++ b/tools/biosnoop.py
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
# 11-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT
|
|
||||||
# 21-Jun-2022 Rocky Xing Added disk filter support.
|
|
||||||
# 13-Oct-2022 Rocky Xing Added support for displaying block I/O pattern.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -64,6 +65,24 @@ struct val_t {
|
|
||||||
char name[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hash_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 rwflag;
|
|
||||||
+ sector_t sector;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+
|
|
||||||
#ifdef INCLUDE_PATTERN
|
|
||||||
struct sector_key_t {
|
|
||||||
u32 dev_major;
|
|
||||||
@@ -79,6 +98,7 @@ enum bio_pattern {
|
|
||||||
|
|
||||||
struct data_t {
|
|
||||||
u32 pid;
|
|
||||||
+ u32 dev;
|
|
||||||
u64 rwflag;
|
|
||||||
u64 delta;
|
|
||||||
u64 qdelta;
|
|
||||||
@@ -88,7 +108,6 @@ struct data_t {
|
|
||||||
enum bio_pattern pattern;
|
|
||||||
#endif
|
|
||||||
u64 ts;
|
|
||||||
- char disk_name[DISK_NAME_LEN];
|
|
||||||
char name[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -96,12 +115,45 @@ struct data_t {
|
|
||||||
BPF_HASH(last_sectors, struct sector_key_t, u64);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *, struct start_req_t);
|
|
||||||
-BPF_HASH(infobyreq, struct request *, struct val_t);
|
|
||||||
+BPF_HASH(start, struct hash_key, struct start_req_t);
|
|
||||||
+BPF_HASH(infobyreq, struct hash_key, struct val_t);
|
|
||||||
BPF_PERF_OUTPUT(events);
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
+ * it may be backported to earlier kernels) with how block request write flags
|
|
||||||
+ * are tested. We handle both pre- and post-change versions here. Please avoid
|
|
||||||
+ * kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
+ * test, and maintenance burden.
|
|
||||||
+ */
|
|
||||||
+static int get_rwflag(u32 cmd_flags) {
|
|
||||||
+#ifdef REQ_WRITE
|
|
||||||
+ return !!(cmd_flags & REQ_WRITE);
|
|
||||||
+#elif defined(REQ_OP_SHIFT)
|
|
||||||
+ return !!((cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
+#else
|
|
||||||
+ return !!((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
+#endif
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#define RWBS_LEN 8
|
|
||||||
+
|
|
||||||
+static int get_rwflag_tp(char *rwbs) {
|
|
||||||
+ for (int i = 0; i < RWBS_LEN; i++) {
|
|
||||||
+ if (rwbs[i] == 'W')
|
|
||||||
+ return 1;
|
|
||||||
+ if (rwbs[i] == '\\0')
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// cache PID and comm by-req
|
|
||||||
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_pid_start(struct hash_key key)
|
|
||||||
{
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
@@ -113,47 +165,76 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
if (##QUEUE##) {
|
|
||||||
val.ts = bpf_ktime_get_ns();
|
|
||||||
}
|
|
||||||
- infobyreq.update(&req, &val);
|
|
||||||
+ infobyreq.update(&key, &val);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+
|
|
||||||
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_pid_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .rwflag = get_rwflag_tp(args->rwbs),
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
DISK_FILTER
|
|
||||||
|
|
||||||
struct start_req_t start_req = {
|
|
||||||
.ts = bpf_ktime_get_ns(),
|
|
||||||
.data_len = req->__data_len
|
|
||||||
};
|
|
||||||
- start.update(&req, &start_req);
|
|
||||||
+ start.update(&key, &start_req);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// output
|
|
||||||
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_completion(void *ctx, struct hash_key key)
|
|
||||||
{
|
|
||||||
struct start_req_t *startp;
|
|
||||||
struct val_t *valp;
|
|
||||||
struct data_t data = {};
|
|
||||||
- struct gendisk *rq_disk;
|
|
||||||
+ //struct gendisk *rq_disk;
|
|
||||||
u64 ts;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- startp = start.lookup(&req);
|
|
||||||
+ startp = start.lookup(&key);
|
|
||||||
if (startp == 0) {
|
|
||||||
// missed tracing issue
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
ts = bpf_ktime_get_ns();
|
|
||||||
- rq_disk = req->__RQ_DISK__;
|
|
||||||
+ //rq_disk = req->__RQ_DISK__;
|
|
||||||
data.delta = ts - startp->ts;
|
|
||||||
data.ts = ts / 1000;
|
|
||||||
data.qdelta = 0;
|
|
||||||
data.len = startp->data_len;
|
|
||||||
|
|
||||||
- valp = infobyreq.lookup(&req);
|
|
||||||
+ valp = infobyreq.lookup(&key);
|
|
||||||
if (valp == 0) {
|
|
||||||
data.name[0] = '?';
|
|
||||||
data.name[1] = 0;
|
|
||||||
@@ -162,10 +243,9 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
data.qdelta = startp->ts - valp->ts;
|
|
||||||
}
|
|
||||||
data.pid = valp->pid;
|
|
||||||
- data.sector = req->__sector;
|
|
||||||
+ data.sector = key.sector;
|
|
||||||
+ data.dev = key.dev;
|
|
||||||
bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
|
|
||||||
- bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
|
|
||||||
- rq_disk->disk_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef INCLUDE_PATTERN
|
|
||||||
@@ -174,8 +254,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
u64 *sector, last_sector;
|
|
||||||
|
|
||||||
struct sector_key_t sector_key = {
|
|
||||||
- .dev_major = rq_disk->major,
|
|
||||||
- .dev_minor = rq_disk->first_minor
|
|
||||||
+ .dev_major = key.dev >> 20,
|
|
||||||
+ .dev_minor = key.dev & ((1 << 20) - 1)
|
|
||||||
};
|
|
||||||
|
|
||||||
sector = last_sectors.lookup(§or_key);
|
|
||||||
@@ -187,27 +267,36 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
last_sectors.update(§or_key, &last_sector);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
-/*
|
|
||||||
- * The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
- * it may be backported to earlier kernels) with how block request write flags
|
|
||||||
- * are tested. We handle both pre- and post-change versions here. Please avoid
|
|
||||||
- * kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
- * test, and maintenance burden.
|
|
||||||
- */
|
|
||||||
-#ifdef REQ_WRITE
|
|
||||||
- data.rwflag = !!(req->cmd_flags & REQ_WRITE);
|
|
||||||
-#elif defined(REQ_OP_SHIFT)
|
|
||||||
- data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
-#else
|
|
||||||
- data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
-#endif
|
|
||||||
+ data.rwflag = key.rwflag;
|
|
||||||
|
|
||||||
events.perf_submit(ctx, &data, sizeof(data));
|
|
||||||
- start.delete(&req);
|
|
||||||
- infobyreq.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
+ infobyreq.delete(&key);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .rwflag = get_rwflag(req->cmd_flags),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(ctx, key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_completion_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .rwflag = get_rwflag_tp(args->rwbs),
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(args, key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
if args.queue:
|
|
||||||
bpf_text = bpf_text.replace('##QUEUE##', '1')
|
|
||||||
@@ -225,21 +314,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
stat_info = os.stat(disk_path)
|
|
||||||
- major = os.major(stat_info.st_rdev)
|
|
||||||
- minor = os.minor(stat_info.st_rdev)
|
|
||||||
-
|
|
||||||
- disk_field_str = ""
|
|
||||||
- if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
|
|
||||||
- disk_field_str = 'req->rq_disk'
|
|
||||||
- else:
|
|
||||||
- disk_field_str = 'req->q->disk'
|
|
||||||
+ dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
|
|
||||||
|
|
||||||
disk_filter_str = """
|
|
||||||
- struct gendisk *disk = %s;
|
|
||||||
- if (!(disk->major == %d && disk->first_minor == %d)) {
|
|
||||||
+ if(key.dev != %s) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
- """ % (disk_field_str, major, minor)
|
|
||||||
+ """ % (dev)
|
|
||||||
|
|
||||||
bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str)
|
|
||||||
else:
|
|
||||||
@@ -254,15 +335,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
|
|
||||||
|
|
||||||
# header
|
|
||||||
print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
|
|
||||||
@@ -273,6 +358,27 @@ print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
|
|
||||||
print("%7s " % ("QUE(ms)"), end="")
|
|
||||||
print("%7s" % "LAT(ms)")
|
|
||||||
|
|
||||||
+
|
|
||||||
+# cache disk major,minor -> diskname
|
|
||||||
+diskstats = "/proc/diskstats"
|
|
||||||
+disklookup = {}
|
|
||||||
+with open(diskstats) as stats:
|
|
||||||
+ for line in stats:
|
|
||||||
+ a = line.split()
|
|
||||||
+ disklookup[a[0] + "," + a[1]] = a[2]
|
|
||||||
+
|
|
||||||
+def disk_print(d):
|
|
||||||
+ major = d >> 20
|
|
||||||
+ minor = d & ((1 << 20) - 1)
|
|
||||||
+
|
|
||||||
+ disk = str(major) + "," + str(minor)
|
|
||||||
+ if disk in disklookup:
|
|
||||||
+ diskname = disklookup[disk]
|
|
||||||
+ else:
|
|
||||||
+ diskname = "<unknown>"
|
|
||||||
+
|
|
||||||
+ return diskname
|
|
||||||
+
|
|
||||||
rwflg = ""
|
|
||||||
pattern = ""
|
|
||||||
start_ts = 0
|
|
||||||
@@ -297,9 +403,7 @@ P_RANDOM = 2
|
|
||||||
|
|
||||||
delta = float(event.ts) - start_ts
|
|
||||||
|
|
||||||
- disk_name = event.disk_name.decode('utf-8', 'replace')
|
|
||||||
- if not disk_name:
|
|
||||||
- disk_name = '<unknown>'
|
|
||||||
+ disk_name = disk_print(event.dev)
|
|
||||||
|
|
||||||
print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % (
|
|
||||||
delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid,
|
|
||||||
diff --git a/tools/biotop.py b/tools/biotop.py
|
|
||||||
index fcdd373f..2620983a 100755
|
|
||||||
--- a/tools/biotop.py
|
|
||||||
+++ b/tools/biotop.py
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
#
|
|
||||||
# 06-Feb-2016 Brendan Gregg Created this.
|
|
||||||
# 17-Mar-2022 Rocky Xing Added PID filter support.
|
|
||||||
+# 01-Aug-2023 Jerome Marchand Added support for block tracepoints
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
from bcc import BPF
|
|
||||||
@@ -88,14 +89,35 @@ struct val_t {
|
|
||||||
u32 io;
|
|
||||||
};
|
|
||||||
|
|
||||||
-BPF_HASH(start, struct request *, struct start_req_t);
|
|
||||||
-BPF_HASH(whobyreq, struct request *, struct who_t);
|
|
||||||
+struct tp_args {
|
|
||||||
+ u64 __unused__;
|
|
||||||
+ dev_t dev;
|
|
||||||
+ sector_t sector;
|
|
||||||
+ unsigned int nr_sector;
|
|
||||||
+ unsigned int bytes;
|
|
||||||
+ char rwbs[8];
|
|
||||||
+ char comm[16];
|
|
||||||
+ char cmd[];
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct hash_key {
|
|
||||||
+ dev_t dev;
|
|
||||||
+ u32 _pad;
|
|
||||||
+ sector_t sector;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+BPF_HASH(start, struct hash_key, struct start_req_t);
|
|
||||||
+BPF_HASH(whobyreq, struct hash_key, struct who_t);
|
|
||||||
BPF_HASH(counts, struct info_t, struct val_t);
|
|
||||||
|
|
||||||
+static dev_t ddevt(struct gendisk *disk) {
|
|
||||||
+ return (disk->major << 20) | disk->first_minor;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// cache PID and comm by-req
|
|
||||||
-int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_pid_start(struct hash_key key)
|
|
||||||
{
|
|
||||||
- struct who_t who = {};
|
|
||||||
+ struct who_t who;
|
|
||||||
u32 pid;
|
|
||||||
|
|
||||||
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
|
|
||||||
@@ -104,30 +126,54 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
who.pid = pid;
|
|
||||||
- whobyreq.update(&req, &who);
|
|
||||||
+ whobyreq.update(&key, &who);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
+int trace_pid_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_pid_start_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_pid_start(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// time block I/O
|
|
||||||
int trace_req_start(struct pt_regs *ctx, struct request *req)
|
|
||||||
{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
struct start_req_t start_req = {
|
|
||||||
.ts = bpf_ktime_get_ns(),
|
|
||||||
.data_len = req->__data_len
|
|
||||||
};
|
|
||||||
- start.update(&req, &start_req);
|
|
||||||
+ start.update(&key, &start_req);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// output
|
|
||||||
-int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+static int __trace_req_completion(struct hash_key key)
|
|
||||||
{
|
|
||||||
struct start_req_t *startp;
|
|
||||||
|
|
||||||
// fetch timestamp and calculate delta
|
|
||||||
- startp = start.lookup(&req);
|
|
||||||
+ startp = start.lookup(&key);
|
|
||||||
if (startp == 0) {
|
|
||||||
return 0; // missed tracing issue
|
|
||||||
}
|
|
||||||
@@ -135,12 +181,12 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
struct who_t *whop;
|
|
||||||
u32 pid;
|
|
||||||
|
|
||||||
- whop = whobyreq.lookup(&req);
|
|
||||||
+ whop = whobyreq.lookup(&key);
|
|
||||||
pid = whop != 0 ? whop->pid : 0;
|
|
||||||
if (FILTER_PID) {
|
|
||||||
- start.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
if (whop != 0) {
|
|
||||||
- whobyreq.delete(&req);
|
|
||||||
+ whobyreq.delete(&key);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -150,8 +196,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
|
|
||||||
// setup info_t key
|
|
||||||
struct info_t info = {};
|
|
||||||
- info.major = req->__RQ_DISK__->major;
|
|
||||||
- info.minor = req->__RQ_DISK__->first_minor;
|
|
||||||
+ info.major = key.dev >> 20;
|
|
||||||
+ info.minor = key.dev & ((1 << 20) - 1);
|
|
||||||
/*
|
|
||||||
* The following deals with a kernel version change (in mainline 4.7, although
|
|
||||||
* it may be backported to earlier kernels) with how block request write flags
|
|
||||||
@@ -159,13 +205,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
* kernel version tests like this as much as possible: they inflate the code,
|
|
||||||
* test, and maintenance burden.
|
|
||||||
*/
|
|
||||||
-#ifdef REQ_WRITE
|
|
||||||
+/*#ifdef REQ_WRITE
|
|
||||||
info.rwflag = !!(req->cmd_flags & REQ_WRITE);
|
|
||||||
#elif defined(REQ_OP_SHIFT)
|
|
||||||
info.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
|
|
||||||
#else
|
|
||||||
info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
|
|
||||||
-#endif
|
|
||||||
+#endif*/
|
|
||||||
|
|
||||||
if (whop == 0) {
|
|
||||||
// missed pid who, save stats as pid 0
|
|
||||||
@@ -183,11 +229,31 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
valp->io++;
|
|
||||||
}
|
|
||||||
|
|
||||||
- start.delete(&req);
|
|
||||||
- whobyreq.delete(&req);
|
|
||||||
+ start.delete(&key);
|
|
||||||
+ whobyreq.delete(&key);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = ddevt(req->__RQ_DISK__),
|
|
||||||
+ .sector = req->__sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(key);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int trace_req_completion_tp(struct tp_args *args)
|
|
||||||
+{
|
|
||||||
+ struct hash_key key = {
|
|
||||||
+ .dev = args->dev,
|
|
||||||
+ .sector = args->sector
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ return __trace_req_completion(key);
|
|
||||||
+}
|
|
||||||
"""
|
|
||||||
|
|
||||||
if args.ebpf:
|
|
||||||
@@ -207,15 +273,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
|
|
||||||
b = BPF(text=bpf_text)
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_start'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
|
|
||||||
if BPF.get_kprobe_functions(b'blk_start_request'):
|
|
||||||
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
|
|
||||||
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
|
|
||||||
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
-else:
|
|
||||||
+elif BPF.get_kprobe_functions(b'blk_account_io_done'):
|
|
||||||
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion")
|
|
||||||
+else:
|
|
||||||
+ b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
|
|
||||||
|
|
||||||
print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
|
|
||||||
|
|
||||||
--
|
|
||||||
2.41.0
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user