From 7a2f37715b27b048edef27f22cf9e338221c2366 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 7 Dec 2021 13:21:21 -0500 Subject: [PATCH] import bcc-0.20.0-7.el9 --- SOURCES/bcc-0.20.0-Fix-mdflush-on-RHEL9.patch | 42 ++ ...of-task_struct_-state-field-on-RHEL-.patch | 96 +++++ SOURCES/bcc-0.20.0-Update-cpudist.py.patch | 41 ++ ....0-sync-with-latest-libbpf-repo-3529.patch | 408 ++++++++++++++++++ ...-look-for-pthread_create-in-libc-too.patch | 34 ++ ...compatible-with-kernel-version-5.10-.patch | 109 +++++ SPECS/bcc.spec | 31 +- 7 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 SOURCES/bcc-0.20.0-Fix-mdflush-on-RHEL9.patch create mode 100644 SOURCES/bcc-0.20.0-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch create mode 100644 SOURCES/bcc-0.20.0-Update-cpudist.py.patch create mode 100644 SOURCES/bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch create mode 100644 SOURCES/bcc-0.20.0-threadsnoop-look-for-pthread_create-in-libc-too.patch create mode 100644 SOURCES/bcc-0.20.0-tools-readahead-compatible-with-kernel-version-5.10-.patch diff --git a/SOURCES/bcc-0.20.0-Fix-mdflush-on-RHEL9.patch b/SOURCES/bcc-0.20.0-Fix-mdflush-on-RHEL9.patch new file mode 100644 index 0000000..b6b9e07 --- /dev/null +++ b/SOURCES/bcc-0.20.0-Fix-mdflush-on-RHEL9.patch @@ -0,0 +1,42 @@ +From 8032bb4053ff8803371b038fc696b9fa682027f2 Mon Sep 17 00:00:00 2001 +From: Jerome Marchand +Date: Thu, 7 Oct 2021 17:31:53 +0200 +Subject: [PATCH] Fix mdflush on RHEL9 + +Since kernel commit 309dca309fc ("block: store a block_device pointer +in struct bio") struct bio points again to a block_device and not to a +gendisk directly. However mdflush is looking at the presence or not of +the bio_dev macro to check whether to get the gendisk directly from +the bio or not, which doesn't work anymore since the bio_dev macro +still exists. Since we don't have to deal other ekrnel kernel version +but our own, just use the definition that we use in our kernels. +--- + tools/mdflush.py | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/tools/mdflush.py b/tools/mdflush.py +index 2abe15cf..df0f13c1 100755 +--- a/tools/mdflush.py ++++ b/tools/mdflush.py +@@ -35,18 +35,7 @@ int kprobe__md_flush_request(struct pt_regs *ctx, void *mddev, struct bio *bio) + u32 pid = bpf_get_current_pid_tgid(); + data.pid = pid; + bpf_get_current_comm(&data.comm, sizeof(data.comm)); +-/* +- * The following deals with a kernel version change (in mainline 4.14, although +- * it may be backported to earlier kernels) with how the disk name is accessed. +- * We handle both pre- and post-change versions here. Please avoid kernel +- * version tests like this as much as possible: they inflate the code, test, +- * and maintenance burden. +- */ +-#ifdef bio_dev +- struct gendisk *bi_disk = bio->bi_disk; +-#else + struct gendisk *bi_disk = bio->bi_bdev->bd_disk; +-#endif + bpf_probe_read_kernel(&data.disk, sizeof(data.disk), bi_disk->disk_name); + events.perf_submit(ctx, &data, sizeof(data)); + return 0; +-- +2.31.1 + diff --git a/SOURCES/bcc-0.20.0-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch b/SOURCES/bcc-0.20.0-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch new file mode 100644 index 0000000..6a35839 --- /dev/null +++ b/SOURCES/bcc-0.20.0-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch @@ -0,0 +1,96 @@ +From 019615235458a9486d883a675a3ea16014ee597f Mon Sep 17 00:00:00 2001 +From: Jerome Marchand +Date: Thu, 14 Oct 2021 12:01:01 +0200 +Subject: [PATCH] Handle renaming of task_struct_>state field on RHEL 9 + +There has been some cleanup of task_struct's state field and to catch +any place that has been missed in the conversion, it has been renamed +__state. +--- + tools/offcputime.py | 4 ++-- + tools/offwaketime.py | 4 ++-- + tools/runqlat.py | 4 ++-- + tools/runqslower.py | 4 ++-- + 4 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/tools/offcputime.py b/tools/offcputime.py +index 128c6496..b93e78d2 100755 +--- a/tools/offcputime.py ++++ b/tools/offcputime.py +@@ -205,10 +205,10 @@ thread_context = "" + thread_context = "all threads" + thread_filter = '1' + if args.state == 0: +- state_filter = 'prev->state == 0' ++ state_filter = 'prev->__state == 0' + elif args.state: + # these states are sometimes bitmask checked +- state_filter = 'prev->state & %d' % args.state ++ state_filter = 'prev->__state & %d' % args.state + else: + state_filter = '1' + bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) +diff --git a/tools/offwaketime.py b/tools/offwaketime.py +index 753eee97..722c0381 100755 +--- a/tools/offwaketime.py ++++ b/tools/offwaketime.py +@@ -254,10 +254,10 @@ int oncpu(struct pt_regs *ctx, struct task_struct *p) { + else: + thread_filter = '1' + if args.state == 0: +- state_filter = 'p->state == 0' ++ state_filter = 'p->__state == 0' + elif args.state: + # these states are sometimes bitmask checked +- state_filter = 'p->state & %d' % args.state ++ state_filter = 'p->__state & %d' % args.state + else: + state_filter = '1' + bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) +diff --git a/tools/runqlat.py b/tools/runqlat.py +index b13ff2d1..8e443c3c 100755 +--- a/tools/runqlat.py ++++ b/tools/runqlat.py +@@ -116,7 +116,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev) + u32 pid, tgid; + + // ivcsw: treat like an enqueue event and store timestamp +- if (prev->state == TASK_RUNNING) { ++ if (prev->__state == TASK_RUNNING) { + tgid = prev->tgid; + pid = prev->pid; + if (!(FILTER || pid == 0)) { +@@ -170,7 +170,7 @@ RAW_TRACEPOINT_PROBE(sched_switch) + u32 pid, tgid; + + // ivcsw: treat like an enqueue event and store timestamp +- if (prev->state == TASK_RUNNING) { ++ if (prev->__state == TASK_RUNNING) { + tgid = prev->tgid; + pid = prev->pid; + if (!(FILTER || pid == 0)) { +diff --git a/tools/runqslower.py b/tools/runqslower.py +index 6df98d9f..ba71e5d3 100755 +--- a/tools/runqslower.py ++++ b/tools/runqslower.py +@@ -112,7 +112,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev) + u32 pid, tgid; + + // ivcsw: treat like an enqueue event and store timestamp +- if (prev->state == TASK_RUNNING) { ++ if (prev->__state == TASK_RUNNING) { + tgid = prev->tgid; + pid = prev->pid; + u64 ts = bpf_ktime_get_ns(); +@@ -178,7 +178,7 @@ RAW_TRACEPOINT_PROBE(sched_switch) + long state; + + // ivcsw: treat like an enqueue event and store timestamp +- bpf_probe_read_kernel(&state, sizeof(long), (const void *)&prev->state); ++ bpf_probe_read_kernel(&state, sizeof(long), (const void *)&prev->__state); + if (state == TASK_RUNNING) { + bpf_probe_read_kernel(&tgid, sizeof(prev->tgid), &prev->tgid); + bpf_probe_read_kernel(&pid, sizeof(prev->pid), &prev->pid); +-- +2.31.1 + diff --git a/SOURCES/bcc-0.20.0-Update-cpudist.py.patch b/SOURCES/bcc-0.20.0-Update-cpudist.py.patch new file mode 100644 index 0000000..492c72f --- /dev/null +++ b/SOURCES/bcc-0.20.0-Update-cpudist.py.patch @@ -0,0 +1,41 @@ +From ad56e8a5a722df2ac2a5b3ea0822fd78f9a6fe51 Mon Sep 17 00:00:00 2001 +From: Nick-nizhen <74173686+Nick-nizhen@users.noreply.github.com> +Date: Thu, 27 May 2021 13:21:59 +0800 +Subject: [PATCH] Update cpudist.py + +When calculating the ONCPU time, prev has left the CPU already. It is not necessary to judge whether the process state is TASK_RUNNING or not. +--- + tools/cpudist.py | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +diff --git a/tools/cpudist.py b/tools/cpudist.py +index eb04f590..b5a6a978 100755 +--- a/tools/cpudist.py ++++ b/tools/cpudist.py +@@ -100,19 +100,13 @@ int sched_switch(struct pt_regs *ctx, struct task_struct *prev) + u64 pid_tgid = bpf_get_current_pid_tgid(); + u32 tgid = pid_tgid >> 32, pid = pid_tgid; + ++ u32 prev_pid = prev->pid; ++ u32 prev_tgid = prev->tgid; + #ifdef ONCPU +- if (prev->state == TASK_RUNNING) { ++ update_hist(prev_tgid, prev_pid, ts); + #else +- if (1) { ++ store_start(prev_tgid, prev_pid, ts); + #endif +- u32 prev_pid = prev->pid; +- u32 prev_tgid = prev->tgid; +-#ifdef ONCPU +- update_hist(prev_tgid, prev_pid, ts); +-#else +- store_start(prev_tgid, prev_pid, ts); +-#endif +- } + + BAIL: + #ifdef ONCPU +-- +2.31.1 + diff --git a/SOURCES/bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch b/SOURCES/bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch new file mode 100644 index 0000000..10765ef --- /dev/null +++ b/SOURCES/bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch @@ -0,0 +1,408 @@ +From 0c12dfe26a362db181e6172cb56a39cd002a6892 Mon Sep 17 00:00:00 2001 +From: yonghong-song +Date: Sun, 18 Jul 2021 15:05:34 -0700 +Subject: [PATCH] sync with latest libbpf repo (#3529) + +sync with latest libbpf repo which is upto commit + 21f90f61b084 sync: latest libbpf changes from kernel + +Signed-off-by: Yonghong Song +--- + docs/kernel-versions.md | 8 ++ + introspection/bps.c | 1 + + src/cc/compat/linux/virtual_bpf.h | 167 ++++++++++++++++++++++++++++-- + src/cc/export/helpers.h | 17 +++ + src/cc/libbpf.c | 8 ++ + 5 files changed, 190 insertions(+), 11 deletions(-) + +diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md +index 9192aa43..33318624 100644 +--- a/docs/kernel-versions.md ++++ b/docs/kernel-versions.md +@@ -208,6 +208,7 @@ Helper | Kernel version | License | Commit | + -------|----------------|---------|--------| + `BPF_FUNC_bind()` | 4.17 | | [`d74bad4e74ee`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d74bad4e74ee373787a9ae24197c17b7cdc428d5) | + `BPF_FUNC_bprm_opts_set()` | 5.11 | | [`3f6719c7b62f`](https://github.com/torvalds/linux/commit/3f6719c7b62f0327c9091e26d0da10e65668229e) ++`BPF_FUNC_btf_find_by_name_kind()` | 5.14 | | [`3d78417b60fb`](https://github.com/torvalds/linux/commit/3d78417b60fba249cc555468cb72d96f5cde2964) + `BPF_FUNC_check_mtu()` | 5.12 | | [`34b2021cc616`](https://github.com/torvalds/linux/commit/34b2021cc61642d61c3cf943d9e71925b827941b) + `BPF_FUNC_clone_redirect()` | 4.2 | | [`3896d655f4d4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3896d655f4d491c67d669a15f275a39f713410f8) + `BPF_FUNC_copy_from_user()` | 5.10 | | [`07be4c4a3e7a`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=07be4c4a3e7a0db148e44b16c5190e753d1c8569) +@@ -226,6 +227,7 @@ Helper | Kernel version | License | Commit | + `BPF_FUNC_get_current_task()` | 4.8 | GPL | [`606274c5abd8`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=606274c5abd8e245add01bc7145a8cbb92b69ba8) + `BPF_FUNC_get_current_task_btf()` | 5.11 | GPL | [`3ca1032ab7ab`](https://github.com/torvalds/linux/commit/3ca1032ab7ab010eccb107aa515598788f7d93bb) + `BPF_FUNC_get_current_uid_gid()` | 4.2 | | [`ffeedafbf023`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89) ++`BPF_FUNC_get_func_ip()` | 5.15 | | [`5d8b583d04ae`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=5d8b583d04aedb3bd5f6d227a334c210c7d735f9) + `BPF_FUNC_get_hash_recalc()` | 4.8 | | [`13c5c240f789`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=13c5c240f789bbd2bcacb14a23771491485ae61f) + `BPF_FUNC_get_listener_sock()` | 5.1 | | [`dbafd7ddd623`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/dbafd7ddd62369b2f3926ab847cbf8fc40e800b7) + `BPF_FUNC_get_local_storage()` | 4.19 | | [`cd3394317653`](https://github.com/torvalds/linux/commit/cd3394317653837e2eb5c5d0904a8996102af9fc) +@@ -352,6 +354,8 @@ Helper | Kernel version | License | Commit | + `BPF_FUNC_store_hdr_opt()` | 5.10 | | [`0813a841566f`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=0813a841566f0962a5551be7749b43c45f0022a0) + `BPF_FUNC_strtol()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a) + `BPF_FUNC_strtoul()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a) ++`BPF_FUNC_sys_bpf()` | 5.14 | | [`79a7f8bdb159`](https://github.com/torvalds/linux/commit/79a7f8bdb159d9914b58740f3d31d602a6e4aca8) ++`BPF_FUNC_sys_close()` | 5.14 | | [`3abea089246f`](https://github.com/torvalds/linux/commit/3abea089246f76c1517b054ddb5946f3f1dbd2c0) + `BPF_FUNC_sysctl_get_current_value()` | 5.2 | | [`1d11b3016cec`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/1d11b3016cec4ed9770b98e82a61708c8f4926e7) + `BPF_FUNC_sysctl_get_name()` | 5.2 | | [`808649fb787d`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/808649fb787d918a48a360a668ee4ee9023f0c11) + `BPF_FUNC_sysctl_get_new_value()` | 5.2 | | [`4e63acdff864`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4e63acdff864654cee0ac5aaeda3913798ee78f6) +@@ -364,6 +368,10 @@ Helper | Kernel version | License | Commit | + `BPF_FUNC_tcp_send_ack()` | 5.5 | | [`206057fe020a`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=206057fe020ac5c037d5e2dd6562a9bd216ec765) + `BPF_FUNC_tcp_sock()` | 5.1 | | [`655a51e536c0`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=655a51e536c09d15ffa3603b1b6fce2b45b85a1f) + `BPF_FUNC_this_cpu_ptr()` | 5.10 | | [`63d9b80dcf2c`](https://github.com/torvalds/linux/commit/63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1) | ++`BPF_FUNC_timer_init()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) ++`BPF_FUNC_timer_set_callback()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) ++`BPF_FUNC_timer_start()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) ++`BPF_FUNC_timer_cancel()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) + `BPF_FUNC_trace_printk()` | 4.1 | GPL | [`9c959c863f82`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=9c959c863f8217a2ff3d7c296e8223654d240569) + `BPF_FUNC_xdp_adjust_head()` | 4.10 | | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03) + `BPF_FUNC_xdp_adjust_meta()` | 4.15 | | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da) +diff --git a/introspection/bps.c b/introspection/bps.c +index e92da3f6..25a88cbd 100644 +--- a/introspection/bps.c ++++ b/introspection/bps.c +@@ -47,6 +47,7 @@ static const char * const prog_type_strings[] = { + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", ++ [BPF_PROG_TYPE_SYSCALL] = "syscall", + }; + + static const char * const map_type_strings[] = { +diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h +index 3490bc14..bf4bc3a6 100644 +--- a/src/cc/compat/linux/virtual_bpf.h ++++ b/src/cc/compat/linux/virtual_bpf.h +@@ -325,9 +325,6 @@ union bpf_iter_link_info { + * **BPF_PROG_TYPE_SK_LOOKUP** + * *data_in* and *data_out* must be NULL. + * +- * **BPF_PROG_TYPE_XDP** +- * *ctx_in* and *ctx_out* must be NULL. +- * + * **BPF_PROG_TYPE_RAW_TRACEPOINT**, + * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** + * +@@ -528,6 +525,15 @@ union bpf_iter_link_info { + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * ++ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map ++ * types, the *flags* argument needs to be set to 0, but for other ++ * map types, it may be specified as: ++ * ++ * **BPF_F_LOCK** ++ * Look up and delete the value of a spin-locked map ++ * without returning the lock. This must be specified if ++ * the elements contain a spinlock. ++ * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. +@@ -537,6 +543,10 @@ union bpf_iter_link_info { + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** ++ * * **BPF_MAP_TYPE_HASH** ++ * * **BPF_MAP_TYPE_PERCPU_HASH** ++ * * **BPF_MAP_TYPE_LRU_HASH** ++ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* +@@ -838,6 +848,7 @@ enum bpf_cmd { + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, ++ BPF_PROG_RUN = BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, +@@ -938,6 +949,7 @@ enum bpf_prog_type { + BPF_PROG_TYPE_EXT, + BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, ++ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + }; + + enum bpf_attach_type { +@@ -980,6 +992,8 @@ enum bpf_attach_type { + BPF_SK_LOOKUP, + BPF_XDP, + BPF_SK_SKB_VERDICT, ++ BPF_SK_REUSEPORT_SELECT, ++ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + __MAX_BPF_ATTACH_TYPE + }; + +@@ -1098,8 +1112,8 @@ enum bpf_link_type { + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have + * the following extensions: + * +- * insn[0].src_reg: BPF_PSEUDO_MAP_FD +- * insn[0].imm: map fd ++ * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] ++ * insn[0].imm: map fd or fd_idx + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 +@@ -1107,15 +1121,19 @@ enum bpf_link_type { + * verifier type: CONST_PTR_TO_MAP + */ + #define BPF_PSEUDO_MAP_FD 1 +-/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE +- * insn[0].imm: map fd ++#define BPF_PSEUDO_MAP_IDX 5 ++ ++/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE ++ * insn[0].imm: map fd or fd_idx + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ +-#define BPF_PSEUDO_MAP_VALUE 2 ++#define BPF_PSEUDO_MAP_VALUE 2 ++#define BPF_PSEUDO_MAP_IDX_VALUE 6 ++ + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 +@@ -1315,6 +1333,8 @@ union bpf_attr { + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; ++ __u32 :32; /* pad */ ++ __aligned_u64 fd_array; /* array of FDs */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ +@@ -2535,8 +2555,12 @@ union bpf_attr { + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to **XDP_TX**, as chosen +- * by the caller. Any higher bits in the *flags* argument must be +- * unset. ++ * by the caller. The higher bits of *flags* can be set to ++ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. ++ * ++ * With BPF_F_BROADCAST the packet will be broadcasted to all the ++ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress ++ * interface will be excluded when do broadcasting. + * + * See also **bpf_redirect**\ (), which only supports redirecting + * to an ifindex, but doesn't require a map to do so. +@@ -3223,7 +3247,7 @@ union bpf_attr { + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * Description + * Select a **SO_REUSEPORT** socket from a +- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. ++ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. + * Return +@@ -4736,6 +4760,94 @@ union bpf_attr { + * be zero-terminated except when **str_size** is 0. + * + * Or **-EBUSY** if the per-CPU memory copy buffer is busy. ++ * ++ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) ++ * Description ++ * Execute bpf syscall with given arguments. ++ * Return ++ * A syscall result. ++ * ++ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) ++ * Description ++ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. ++ * Return ++ * Returns btf_id and btf_obj_fd in lower and upper 32 bits. ++ * ++ * long bpf_sys_close(u32 fd) ++ * Description ++ * Execute close syscall for given FD. ++ * Return ++ * A syscall result. ++ * ++ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) ++ * Description ++ * Initialize the timer. ++ * First 4 bits of *flags* specify clockid. ++ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. ++ * All other bits of *flags* are reserved. ++ * The verifier will reject the program if *timer* is not from ++ * the same *map*. ++ * Return ++ * 0 on success. ++ * **-EBUSY** if *timer* is already initialized. ++ * **-EINVAL** if invalid *flags* are passed. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ * ++ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) ++ * Description ++ * Configure the timer to call *callback_fn* static function. ++ * Return ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EPERM** if *timer* is in a map that doesn't have any user references. ++ * The user space should either hold a file descriptor to a map with timers ++ * or pin such map in bpffs. When map is unpinned or file descriptor is ++ * closed all timers in the map will be cancelled and freed. ++ * ++ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) ++ * Description ++ * Set timer expiration N nanoseconds from the current time. The ++ * configured callback will be invoked in soft irq context on some cpu ++ * and will not repeat unless another bpf_timer_start() is made. ++ * In such case the next invocation can migrate to a different cpu. ++ * Since struct bpf_timer is a field inside map element the map ++ * owns the timer. The bpf_timer_set_callback() will increment refcnt ++ * of BPF program to make sure that callback_fn code stays valid. ++ * When user space reference to a map reaches zero all timers ++ * in a map are cancelled and corresponding program's refcnts are ++ * decremented. This is done to make sure that Ctrl-C of a user ++ * process doesn't leave any timers running. If map is pinned in ++ * bpffs the callback_fn can re-arm itself indefinitely. ++ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands ++ * cancel and free the timer in the given map element. ++ * The map can contain timers that invoke callback_fn-s from different ++ * programs. The same callback_fn can serve different timers from ++ * different maps if key/value layout matches across maps. ++ * Every bpf_timer_set_callback() can have different callback_fn. ++ * ++ * Return ++ * 0 on success. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier ++ * or invalid *flags* are passed. ++ * ++ * long bpf_timer_cancel(struct bpf_timer *timer) ++ * Description ++ * Cancel the timer and wait for callback_fn to finish if it was running. ++ * Return ++ * 0 if the timer was not active. ++ * 1 if the timer was active. ++ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. ++ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its ++ * own timer which would have led to a deadlock otherwise. ++ * ++ * u64 bpf_get_func_ip(void *ctx) ++ * Description ++ * Get address of the traced function (for tracing and kprobe programs). ++ * Return ++ * Address of the traced function. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -4904,6 +5016,14 @@ union bpf_attr { + FN(check_mtu), \ + FN(for_each_map_elem), \ + FN(snprintf), \ ++ FN(sys_bpf), \ ++ FN(btf_find_by_name_kind), \ ++ FN(sys_close), \ ++ FN(timer_init), \ ++ FN(timer_set_callback), \ ++ FN(timer_start), \ ++ FN(timer_cancel), \ ++ FN(get_func_ip), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +@@ -5081,6 +5201,12 @@ enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), + }; + ++/* Flags for bpf_redirect_map helper */ ++enum { ++ BPF_F_BROADCAST = (1ULL << 3), ++ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), ++}; ++ + #define __bpf_md_ptr(type, name) \ + union { \ + type name; \ +@@ -5365,6 +5491,20 @@ struct sk_reuseport_md { + __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ + __u32 bind_inany; /* Is sock bound to an INANY address? */ + __u32 hash; /* A hash of the packet 4 tuples */ ++ /* When reuse->migrating_sk is NULL, it is selecting a sk for the ++ * new incoming connection request (e.g. selecting a listen sk for ++ * the received SYN in the TCP case). reuse->sk is one of the sk ++ * in the reuseport group. The bpf prog can use reuse->sk to learn ++ * the local listening ip/port without looking into the skb. ++ * ++ * When reuse->migrating_sk is not NULL, reuse->sk is closed and ++ * reuse->migrating_sk is the socket that needs to be migrated ++ * to another listening socket. migrating_sk could be a fullsock ++ * sk that is fully established or a reqsk that is in-the-middle ++ * of 3-way handshake. ++ */ ++ __bpf_md_ptr(struct bpf_sock *, sk); ++ __bpf_md_ptr(struct bpf_sock *, migrating_sk); + }; + + #define BPF_TAG_SIZE 8 +@@ -6010,6 +6150,11 @@ struct bpf_spin_lock { + __u32 val; + }; + ++struct bpf_timer { ++ __u64 :64; ++ __u64 :64; ++} __attribute__((aligned(8))); ++ + struct bpf_sysctl { + __u32 write; /* Sysctl is being read (= 0) or written (= 1). + * Allows 1,2,4-byte read, but no write. +diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h +index e9137f7f..a4e9b705 100644 +--- a/src/cc/export/helpers.h ++++ b/src/cc/export/helpers.h +@@ -847,6 +847,23 @@ static long (*bpf_snprintf)(char *str, __u32 str_size, const char *fmt, + __u64 *data, __u32 data_len) = + (void *)BPF_FUNC_snprintf; + ++static long (*bpf_sys_bpf)(__u32 cmd, void *attr, __u32 attr_size) = ++ (void *)BPF_FUNC_sys_bpf; ++static long (*bpf_btf_find_by_name_kind)(char *name, int name_sz, __u32 kind, int flags) = ++ (void *)BPF_FUNC_btf_find_by_name_kind; ++static long (*bpf_sys_close)(__u32 fd) = (void *)BPF_FUNC_sys_close; ++ ++struct bpf_timer; ++static long (*bpf_timer_init)(struct bpf_timer *timer, void *map, __u64 flags) = ++ (void *)BPF_FUNC_timer_init; ++static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn) = ++ (void *)BPF_FUNC_timer_set_callback; ++static long (*bpf_timer_start)(struct bpf_timer *timer, __u64 nsecs, __u64 flags) = ++ (void *)BPF_FUNC_timer_start; ++static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *)BPF_FUNC_timer_cancel; ++ ++static __u64 (*bpf_get_func_ip)(void *ctx) = (void *)BPF_FUNC_get_func_ip; ++ + /* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c +index b83d68fd..f3608cfe 100644 +--- a/src/cc/libbpf.c ++++ b/src/cc/libbpf.c +@@ -270,6 +270,14 @@ static struct bpf_helper helpers[] = { + {"check_mtu", "5.12"}, + {"for_each_map_elem", "5.13"}, + {"snprintf", "5.13"}, ++ {"sys_bpf", "5.14"}, ++ {"btf_find_by_name_kind", "5.14"}, ++ {"sys_close", "5.14"}, ++ {"timer_init", "5.15"}, ++ {"timer_set_callback", "5.15"}, ++ {"timer_start", "5.15"}, ++ {"timer_cancel", "5.15"}, ++ {"get_func_ip", "5.15"}, + }; + + static uint64_t ptr_to_u64(void *ptr) +-- +2.31.1 + diff --git a/SOURCES/bcc-0.20.0-threadsnoop-look-for-pthread_create-in-libc-too.patch b/SOURCES/bcc-0.20.0-threadsnoop-look-for-pthread_create-in-libc-too.patch new file mode 100644 index 0000000..2170eb8 --- /dev/null +++ b/SOURCES/bcc-0.20.0-threadsnoop-look-for-pthread_create-in-libc-too.patch @@ -0,0 +1,34 @@ +From 460a71ab24ad511318342077ac9ef57df543375f Mon Sep 17 00:00:00 2001 +From: Jerome Marchand +Date: Thu, 16 Sep 2021 14:44:23 +0200 +Subject: [PATCH] threadsnoop: look for pthread_create in libc too + +Since glibc 2.34, pthread features are integrated in libc directly. +Look for pthread_create there too when it is not found in libpthread. + +Fixes #3623 +--- + tools/threadsnoop.py | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/tools/threadsnoop.py b/tools/threadsnoop.py +index 04c5e680..471b0c3c 100755 +--- a/tools/threadsnoop.py ++++ b/tools/threadsnoop.py +@@ -38,7 +38,12 @@ void do_entry(struct pt_regs *ctx) { + events.perf_submit(ctx, &data, sizeof(data)); + }; + """) +-b.attach_uprobe(name="pthread", sym="pthread_create", fn_name="do_entry") ++ ++# Since version 2.34, pthread features are integrated in libc ++try: ++ b.attach_uprobe(name="pthread", sym="pthread_create", fn_name="do_entry") ++except Exception: ++ b.attach_uprobe(name="c", sym="pthread_create", fn_name="do_entry") + + print("%-10s %-6s %-16s %s" % ("TIME(ms)", "PID", "COMM", "FUNC")) + +-- +2.31.1 + diff --git a/SOURCES/bcc-0.20.0-tools-readahead-compatible-with-kernel-version-5.10-.patch b/SOURCES/bcc-0.20.0-tools-readahead-compatible-with-kernel-version-5.10-.patch new file mode 100644 index 0000000..b658d56 --- /dev/null +++ b/SOURCES/bcc-0.20.0-tools-readahead-compatible-with-kernel-version-5.10-.patch @@ -0,0 +1,109 @@ +From 6c9d91c2196e69682a611dbfc10a0731f86deada Mon Sep 17 00:00:00 2001 +From: zcy +Date: Fri, 25 Jun 2021 10:16:53 +0800 +Subject: [PATCH] tools/readahead compatible with kernel version >= 5.10 + (#3507) + +After kernel version 5.10, __do_page_cache_readahead() was renamed to do_page_cache_ra(), +let us try both in readahead.py. +--- + tools/readahead.py | 12 ++++++++---- + tools/readahead_example.txt | 22 +++++++++++----------- + 2 files changed, 19 insertions(+), 15 deletions(-) + +diff --git a/tools/readahead.py b/tools/readahead.py +index 14182d5a..b338261f 100755 +--- a/tools/readahead.py ++++ b/tools/readahead.py +@@ -20,7 +20,7 @@ import argparse + + # arguments + examples = """examples: +- ./readahead -d 20 # monitor for 10 seconds and generate stats ++ ./readahead -d 20 # monitor for 20 seconds and generate stats + """ + + parser = argparse.ArgumentParser( +@@ -95,15 +95,19 @@ int entry_mark_page_accessed(struct pt_regs *ctx) { + """ + + b = BPF(text=program) +-b.attach_kprobe(event="__do_page_cache_readahead", fn_name="entry__do_page_cache_readahead") +-b.attach_kretprobe(event="__do_page_cache_readahead", fn_name="exit__do_page_cache_readahead") ++if BPF.get_kprobe_functions(b"__do_page_cache_readahead"): ++ ra_event = "__do_page_cache_readahead" ++else: ++ ra_event = "do_page_cache_ra" ++b.attach_kprobe(event=ra_event, fn_name="entry__do_page_cache_readahead") ++b.attach_kretprobe(event=ra_event, fn_name="exit__do_page_cache_readahead") + b.attach_kretprobe(event="__page_cache_alloc", fn_name="exit__page_cache_alloc") + b.attach_kprobe(event="mark_page_accessed", fn_name="entry_mark_page_accessed") + + # header + print("Tracing... Hit Ctrl-C to end.") + +-# print ++# print + def print_stats(): + print() + print("Read-ahead unused pages: %d" % (b["pages"][ct.c_ulong(0)].value)) +diff --git a/tools/readahead_example.txt b/tools/readahead_example.txt +index 079dbaae..6d675c13 100644 +--- a/tools/readahead_example.txt ++++ b/tools/readahead_example.txt +@@ -2,20 +2,20 @@ Demonstration of readahead, the Linux eBPF/bcc version + + Read-ahead mechanism is used by operation sytems to optimize sequential operations + by reading ahead some pages to avoid more expensive filesystem operations. This tool +-shows the performance of the read-ahead caching on the system under a given load to ++shows the performance of the read-ahead caching on the system under a given load to + investigate any caching issues. It shows a count for unused pages in the cache and + also prints a histogram showing how long they have remianed there. + + Usage Scenario + ============== + +-Consider that you are developing a React Native application which performs aggressive ++Consider that you are developing a React Native application which performs aggressive + reads while re-encoding a video in local-storage. Usually such an app would be multi- +-layered and have transitional library dependencies. The actual read may be performed +-by some unknown native library which may or may not be using hints to the OS, such as +-madvise(p, LEN, MADV_SEQUENTIAL). If high IOPS is observed in such an app, running +-readahead may pin the issue much faster in this case as the developer digs deeper +-into what may be causing this. ++layered and have transitional library dependencies. The actual read may be performed ++by some unknown native library which may or may not be using hints to the OS, such as ++madvise(p, LEN, MADV_SEQUENTIAL). If high IOPS is observed in such an app, running ++readahead may pin the issue much faster in this case as the developer digs deeper ++into what may be causing this. + + An example where such an issue can surface is: https://github.com/boltdb/bolt/issues/691 + +@@ -40,7 +40,7 @@ Read-ahead unused pages: 6765 + 2048 -> 4095 : 439 |**** | + 4096 -> 8191 : 188 |* | + +-In the example above, we recorded system-wide stats for 30 seconds. We can observe that ++In the example above, we recorded system-wide stats for 30 seconds. We can observe that + while most of the pages stayed in the readahead cache for quite less time, after 30 + seconds 6765 pages still remained in the cache, yet unaccessed. + +@@ -49,12 +49,12 @@ Note on Kprobes Usage + + This tool uses Kprobes on the following kernel functions: + +-__do_page_cache_readahead() ++__do_page_cache_readahead()/do_page_cache_ra() (After kernel version 5.10 (include), __do_page_cache_readahead was renamed to do_page_cache_ra) + __page_cache_alloc() + mark_page_accessed() + +-Since the tool uses Kprobes, depending on your linux kernel's compilation, these +-functions may be inlined and hence not available for Kprobes. To see whether you have ++Since the tool uses Kprobes, depending on your linux kernel's compilation, these ++functions may be inlined and hence not available for Kprobes. To see whether you have + the functions available, check vmlinux source and binary to confirm whether inlining is + happening or not. You can also check /proc/kallsyms on the host and verify if the target + functions are present there before using this tool. +-- +2.31.1 + diff --git a/SPECS/bcc.spec b/SPECS/bcc.spec index ec54750..e0f90e9 100644 --- a/SPECS/bcc.spec +++ b/SPECS/bcc.spec @@ -27,7 +27,7 @@ Name: bcc Version: 0.20.0 -Release: 6%{?dist} +Release: 7%{?dist} Summary: BPF Compiler Collection (BCC) License: ASL 2.0 URL: https://github.com/iovisor/bcc @@ -36,6 +36,12 @@ Patch0: %{name}-%{version}-libbpf-tool-don-t-ignore-LDFLAGS.patch Patch1: %{name}-%{version}-libbpf-tools-readahead-don-t-mark-struct-hist-as-sta.patch Patch2: %{name}-%{version}-Define-KERNEL_VERSION.patch Patch3: %{name}-%{version}-Revert-libbpf-tools-remove-unecessary-custom-NULL-de.patch +Patch4: %{name}-%{version}-sync-with-latest-libbpf-repo-3529.patch +Patch5: %{name}-%{version}-threadsnoop-look-for-pthread_create-in-libc-too.patch +Patch6: %{name}-%{version}-Update-cpudist.py.patch +Patch7: %{name}-%{version}-tools-readahead-compatible-with-kernel-version-5.10-.patch +Patch8: %{name}-%{version}-Fix-mdflush-on-RHEL9.patch +Patch9: %{name}-%{version}-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch # Arches will be included as upstream support is added and dependencies are # satisfied in the respective arches @@ -217,6 +223,23 @@ install libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir} %dir %{_datadir}/%{name} %{_datadir}/%{name}/tools/ %{_datadir}/%{name}/introspection/ +%if 0%{?rhel} > 0 +# inject relies on BPF_KPROBE_OVERRIDE which is not set on RHEL +%exclude %{_datadir}/%{name}/tools/inject +%exclude %{_datadir}/%{name}/tools/doc/inject_example.txt +%exclude %{_mandir}/man8/bcc-inject.8.gz +# Neither btrfs nor zfs are available on RHEL +%exclude %{_datadir}/%{name}/tools/btrfs* +%exclude %{_datadir}/%{name}/tools/doc/btrfs* +%exclude %{_mandir}/man8/bcc-btrfs* +%exclude %{_datadir}/%{name}/tools/zfs* +%exclude %{_datadir}/%{name}/tools/doc/zfs* +%exclude %{_mandir}/man8/bcc-zfs* +# criticalstat relies on CONFIG_PREEMPTIRQ_EVENTS which is disabled on RHEL +%exclude %{_datadir}/%{name}/tools/criticalstat +%exclude %{_datadir}/%{name}/tools/doc/criticalstat_example.txt +%exclude %{_mandir}/man8/bcc-criticalstat.8.gz +%endif %{_mandir}/man8/* %if %{with lua} @@ -230,6 +253,12 @@ install libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir} %endif %changelog +* Thu Oct 14 2021 Jerome Marchand - 0.20.0-7 +- Sync with latest libbpf (fixes BPF_F_BROADCAST breakages of rhbz#1992430) +- Fix cpudist, mdflush, readahead and threadsnoop (rhbz#1992430) +- Handle the renaming of task_struct_>state field +- Drop tools that relies on features disabled on RHEL + * Mon Aug 09 2021 Mohan Boddu - 0.20.0-6 - Rebuilt for IMA sigs, glibc 2.34, aarch64 flags Related: rhbz#1991688