Misc fixes (bz1992430)
- Sync with latest libbpf (fixes multiple BPF_F_BROADCAST breakages) - Fix cpudist, mdflush, readahead, threadsnoop. - Handle the renaming of task_struct_>state field on RHEL 9 (fixes offcputime, offwaketime, runqlat and runqslower) - Drop several tools that relies on features disabled on RHEL Resolves: rhbz#1992430 Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
This commit is contained in:
parent
40ee81eae2
commit
019ff871c7
42
bcc-0.20.0-Fix-mdflush-on-RHEL9.patch
Normal file
42
bcc-0.20.0-Fix-mdflush-on-RHEL9.patch
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
From 8032bb4053ff8803371b038fc696b9fa682027f2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jerome Marchand <jmarchan@redhat.com>
|
||||||
|
Date: Thu, 7 Oct 2021 17:31:53 +0200
|
||||||
|
Subject: [PATCH] Fix mdflush on RHEL9
|
||||||
|
|
||||||
|
Since kernel commit 309dca309fc ("block: store a block_device pointer
|
||||||
|
in struct bio") struct bio points again to a block_device and not to a
|
||||||
|
gendisk directly. However mdflush is looking at the presence or not of
|
||||||
|
the bio_dev macro to check whether to get the gendisk directly from
|
||||||
|
the bio or not, which doesn't work anymore since the bio_dev macro
|
||||||
|
still exists. Since we don't have to deal other ekrnel kernel version
|
||||||
|
but our own, just use the definition that we use in our kernels.
|
||||||
|
---
|
||||||
|
tools/mdflush.py | 11 -----------
|
||||||
|
1 file changed, 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/mdflush.py b/tools/mdflush.py
|
||||||
|
index 2abe15cf..df0f13c1 100755
|
||||||
|
--- a/tools/mdflush.py
|
||||||
|
+++ b/tools/mdflush.py
|
||||||
|
@@ -35,18 +35,7 @@ int kprobe__md_flush_request(struct pt_regs *ctx, void *mddev, struct bio *bio)
|
||||||
|
u32 pid = bpf_get_current_pid_tgid();
|
||||||
|
data.pid = pid;
|
||||||
|
bpf_get_current_comm(&data.comm, sizeof(data.comm));
|
||||||
|
-/*
|
||||||
|
- * The following deals with a kernel version change (in mainline 4.14, although
|
||||||
|
- * it may be backported to earlier kernels) with how the disk name is accessed.
|
||||||
|
- * We handle both pre- and post-change versions here. Please avoid kernel
|
||||||
|
- * version tests like this as much as possible: they inflate the code, test,
|
||||||
|
- * and maintenance burden.
|
||||||
|
- */
|
||||||
|
-#ifdef bio_dev
|
||||||
|
- struct gendisk *bi_disk = bio->bi_disk;
|
||||||
|
-#else
|
||||||
|
struct gendisk *bi_disk = bio->bi_bdev->bd_disk;
|
||||||
|
-#endif
|
||||||
|
bpf_probe_read_kernel(&data.disk, sizeof(data.disk), bi_disk->disk_name);
|
||||||
|
events.perf_submit(ctx, &data, sizeof(data));
|
||||||
|
return 0;
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,96 @@
|
|||||||
|
From 019615235458a9486d883a675a3ea16014ee597f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jerome Marchand <jmarchan@redhat.com>
|
||||||
|
Date: Thu, 14 Oct 2021 12:01:01 +0200
|
||||||
|
Subject: [PATCH] Handle renaming of task_struct_>state field on RHEL 9
|
||||||
|
|
||||||
|
There has been some cleanup of task_struct's state field and to catch
|
||||||
|
any place that has been missed in the conversion, it has been renamed
|
||||||
|
__state.
|
||||||
|
---
|
||||||
|
tools/offcputime.py | 4 ++--
|
||||||
|
tools/offwaketime.py | 4 ++--
|
||||||
|
tools/runqlat.py | 4 ++--
|
||||||
|
tools/runqslower.py | 4 ++--
|
||||||
|
4 files changed, 8 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/offcputime.py b/tools/offcputime.py
|
||||||
|
index 128c6496..b93e78d2 100755
|
||||||
|
--- a/tools/offcputime.py
|
||||||
|
+++ b/tools/offcputime.py
|
||||||
|
@@ -205,10 +205,10 @@ thread_context = ""
|
||||||
|
thread_context = "all threads"
|
||||||
|
thread_filter = '1'
|
||||||
|
if args.state == 0:
|
||||||
|
- state_filter = 'prev->state == 0'
|
||||||
|
+ state_filter = 'prev->__state == 0'
|
||||||
|
elif args.state:
|
||||||
|
# these states are sometimes bitmask checked
|
||||||
|
- state_filter = 'prev->state & %d' % args.state
|
||||||
|
+ state_filter = 'prev->__state & %d' % args.state
|
||||||
|
else:
|
||||||
|
state_filter = '1'
|
||||||
|
bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter)
|
||||||
|
diff --git a/tools/offwaketime.py b/tools/offwaketime.py
|
||||||
|
index 753eee97..722c0381 100755
|
||||||
|
--- a/tools/offwaketime.py
|
||||||
|
+++ b/tools/offwaketime.py
|
||||||
|
@@ -254,10 +254,10 @@ int oncpu(struct pt_regs *ctx, struct task_struct *p) {
|
||||||
|
else:
|
||||||
|
thread_filter = '1'
|
||||||
|
if args.state == 0:
|
||||||
|
- state_filter = 'p->state == 0'
|
||||||
|
+ state_filter = 'p->__state == 0'
|
||||||
|
elif args.state:
|
||||||
|
# these states are sometimes bitmask checked
|
||||||
|
- state_filter = 'p->state & %d' % args.state
|
||||||
|
+ state_filter = 'p->__state & %d' % args.state
|
||||||
|
else:
|
||||||
|
state_filter = '1'
|
||||||
|
bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter)
|
||||||
|
diff --git a/tools/runqlat.py b/tools/runqlat.py
|
||||||
|
index b13ff2d1..8e443c3c 100755
|
||||||
|
--- a/tools/runqlat.py
|
||||||
|
+++ b/tools/runqlat.py
|
||||||
|
@@ -116,7 +116,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev)
|
||||||
|
u32 pid, tgid;
|
||||||
|
|
||||||
|
// ivcsw: treat like an enqueue event and store timestamp
|
||||||
|
- if (prev->state == TASK_RUNNING) {
|
||||||
|
+ if (prev->__state == TASK_RUNNING) {
|
||||||
|
tgid = prev->tgid;
|
||||||
|
pid = prev->pid;
|
||||||
|
if (!(FILTER || pid == 0)) {
|
||||||
|
@@ -170,7 +170,7 @@ RAW_TRACEPOINT_PROBE(sched_switch)
|
||||||
|
u32 pid, tgid;
|
||||||
|
|
||||||
|
// ivcsw: treat like an enqueue event and store timestamp
|
||||||
|
- if (prev->state == TASK_RUNNING) {
|
||||||
|
+ if (prev->__state == TASK_RUNNING) {
|
||||||
|
tgid = prev->tgid;
|
||||||
|
pid = prev->pid;
|
||||||
|
if (!(FILTER || pid == 0)) {
|
||||||
|
diff --git a/tools/runqslower.py b/tools/runqslower.py
|
||||||
|
index 6df98d9f..ba71e5d3 100755
|
||||||
|
--- a/tools/runqslower.py
|
||||||
|
+++ b/tools/runqslower.py
|
||||||
|
@@ -112,7 +112,7 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev)
|
||||||
|
u32 pid, tgid;
|
||||||
|
|
||||||
|
// ivcsw: treat like an enqueue event and store timestamp
|
||||||
|
- if (prev->state == TASK_RUNNING) {
|
||||||
|
+ if (prev->__state == TASK_RUNNING) {
|
||||||
|
tgid = prev->tgid;
|
||||||
|
pid = prev->pid;
|
||||||
|
u64 ts = bpf_ktime_get_ns();
|
||||||
|
@@ -178,7 +178,7 @@ RAW_TRACEPOINT_PROBE(sched_switch)
|
||||||
|
long state;
|
||||||
|
|
||||||
|
// ivcsw: treat like an enqueue event and store timestamp
|
||||||
|
- bpf_probe_read_kernel(&state, sizeof(long), (const void *)&prev->state);
|
||||||
|
+ bpf_probe_read_kernel(&state, sizeof(long), (const void *)&prev->__state);
|
||||||
|
if (state == TASK_RUNNING) {
|
||||||
|
bpf_probe_read_kernel(&tgid, sizeof(prev->tgid), &prev->tgid);
|
||||||
|
bpf_probe_read_kernel(&pid, sizeof(prev->pid), &prev->pid);
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
41
bcc-0.20.0-Update-cpudist.py.patch
Normal file
41
bcc-0.20.0-Update-cpudist.py.patch
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
From ad56e8a5a722df2ac2a5b3ea0822fd78f9a6fe51 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Nick-nizhen <74173686+Nick-nizhen@users.noreply.github.com>
|
||||||
|
Date: Thu, 27 May 2021 13:21:59 +0800
|
||||||
|
Subject: [PATCH] Update cpudist.py
|
||||||
|
|
||||||
|
When calculating the ONCPU time, prev has left the CPU already. It is not necessary to judge whether the process state is TASK_RUNNING or not.
|
||||||
|
---
|
||||||
|
tools/cpudist.py | 14 ++++----------
|
||||||
|
1 file changed, 4 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/cpudist.py b/tools/cpudist.py
|
||||||
|
index eb04f590..b5a6a978 100755
|
||||||
|
--- a/tools/cpudist.py
|
||||||
|
+++ b/tools/cpudist.py
|
||||||
|
@@ -100,19 +100,13 @@ int sched_switch(struct pt_regs *ctx, struct task_struct *prev)
|
||||||
|
u64 pid_tgid = bpf_get_current_pid_tgid();
|
||||||
|
u32 tgid = pid_tgid >> 32, pid = pid_tgid;
|
||||||
|
|
||||||
|
+ u32 prev_pid = prev->pid;
|
||||||
|
+ u32 prev_tgid = prev->tgid;
|
||||||
|
#ifdef ONCPU
|
||||||
|
- if (prev->state == TASK_RUNNING) {
|
||||||
|
+ update_hist(prev_tgid, prev_pid, ts);
|
||||||
|
#else
|
||||||
|
- if (1) {
|
||||||
|
+ store_start(prev_tgid, prev_pid, ts);
|
||||||
|
#endif
|
||||||
|
- u32 prev_pid = prev->pid;
|
||||||
|
- u32 prev_tgid = prev->tgid;
|
||||||
|
-#ifdef ONCPU
|
||||||
|
- update_hist(prev_tgid, prev_pid, ts);
|
||||||
|
-#else
|
||||||
|
- store_start(prev_tgid, prev_pid, ts);
|
||||||
|
-#endif
|
||||||
|
- }
|
||||||
|
|
||||||
|
BAIL:
|
||||||
|
#ifdef ONCPU
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
408
bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch
Normal file
408
bcc-0.20.0-sync-with-latest-libbpf-repo-3529.patch
Normal file
@ -0,0 +1,408 @@
|
|||||||
|
From 0c12dfe26a362db181e6172cb56a39cd002a6892 Mon Sep 17 00:00:00 2001
|
||||||
|
From: yonghong-song <yhs@fb.com>
|
||||||
|
Date: Sun, 18 Jul 2021 15:05:34 -0700
|
||||||
|
Subject: [PATCH] sync with latest libbpf repo (#3529)
|
||||||
|
|
||||||
|
sync with latest libbpf repo which is upto commit
|
||||||
|
21f90f61b084 sync: latest libbpf changes from kernel
|
||||||
|
|
||||||
|
Signed-off-by: Yonghong Song <yhs@fb.com>
|
||||||
|
---
|
||||||
|
docs/kernel-versions.md | 8 ++
|
||||||
|
introspection/bps.c | 1 +
|
||||||
|
src/cc/compat/linux/virtual_bpf.h | 167 ++++++++++++++++++++++++++++--
|
||||||
|
src/cc/export/helpers.h | 17 +++
|
||||||
|
src/cc/libbpf.c | 8 ++
|
||||||
|
5 files changed, 190 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
|
||||||
|
index 9192aa43..33318624 100644
|
||||||
|
--- a/docs/kernel-versions.md
|
||||||
|
+++ b/docs/kernel-versions.md
|
||||||
|
@@ -208,6 +208,7 @@ Helper | Kernel version | License | Commit |
|
||||||
|
-------|----------------|---------|--------|
|
||||||
|
`BPF_FUNC_bind()` | 4.17 | | [`d74bad4e74ee`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d74bad4e74ee373787a9ae24197c17b7cdc428d5) |
|
||||||
|
`BPF_FUNC_bprm_opts_set()` | 5.11 | | [`3f6719c7b62f`](https://github.com/torvalds/linux/commit/3f6719c7b62f0327c9091e26d0da10e65668229e)
|
||||||
|
+`BPF_FUNC_btf_find_by_name_kind()` | 5.14 | | [`3d78417b60fb`](https://github.com/torvalds/linux/commit/3d78417b60fba249cc555468cb72d96f5cde2964)
|
||||||
|
`BPF_FUNC_check_mtu()` | 5.12 | | [`34b2021cc616`](https://github.com/torvalds/linux/commit/34b2021cc61642d61c3cf943d9e71925b827941b)
|
||||||
|
`BPF_FUNC_clone_redirect()` | 4.2 | | [`3896d655f4d4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3896d655f4d491c67d669a15f275a39f713410f8)
|
||||||
|
`BPF_FUNC_copy_from_user()` | 5.10 | | [`07be4c4a3e7a`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=07be4c4a3e7a0db148e44b16c5190e753d1c8569)
|
||||||
|
@@ -226,6 +227,7 @@ Helper | Kernel version | License | Commit |
|
||||||
|
`BPF_FUNC_get_current_task()` | 4.8 | GPL | [`606274c5abd8`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=606274c5abd8e245add01bc7145a8cbb92b69ba8)
|
||||||
|
`BPF_FUNC_get_current_task_btf()` | 5.11 | GPL | [`3ca1032ab7ab`](https://github.com/torvalds/linux/commit/3ca1032ab7ab010eccb107aa515598788f7d93bb)
|
||||||
|
`BPF_FUNC_get_current_uid_gid()` | 4.2 | | [`ffeedafbf023`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89)
|
||||||
|
+`BPF_FUNC_get_func_ip()` | 5.15 | | [`5d8b583d04ae`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=5d8b583d04aedb3bd5f6d227a334c210c7d735f9)
|
||||||
|
`BPF_FUNC_get_hash_recalc()` | 4.8 | | [`13c5c240f789`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=13c5c240f789bbd2bcacb14a23771491485ae61f)
|
||||||
|
`BPF_FUNC_get_listener_sock()` | 5.1 | | [`dbafd7ddd623`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/dbafd7ddd62369b2f3926ab847cbf8fc40e800b7)
|
||||||
|
`BPF_FUNC_get_local_storage()` | 4.19 | | [`cd3394317653`](https://github.com/torvalds/linux/commit/cd3394317653837e2eb5c5d0904a8996102af9fc)
|
||||||
|
@@ -352,6 +354,8 @@ Helper | Kernel version | License | Commit |
|
||||||
|
`BPF_FUNC_store_hdr_opt()` | 5.10 | | [`0813a841566f`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=0813a841566f0962a5551be7749b43c45f0022a0)
|
||||||
|
`BPF_FUNC_strtol()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a)
|
||||||
|
`BPF_FUNC_strtoul()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a)
|
||||||
|
+`BPF_FUNC_sys_bpf()` | 5.14 | | [`79a7f8bdb159`](https://github.com/torvalds/linux/commit/79a7f8bdb159d9914b58740f3d31d602a6e4aca8)
|
||||||
|
+`BPF_FUNC_sys_close()` | 5.14 | | [`3abea089246f`](https://github.com/torvalds/linux/commit/3abea089246f76c1517b054ddb5946f3f1dbd2c0)
|
||||||
|
`BPF_FUNC_sysctl_get_current_value()` | 5.2 | | [`1d11b3016cec`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/1d11b3016cec4ed9770b98e82a61708c8f4926e7)
|
||||||
|
`BPF_FUNC_sysctl_get_name()` | 5.2 | | [`808649fb787d`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/808649fb787d918a48a360a668ee4ee9023f0c11)
|
||||||
|
`BPF_FUNC_sysctl_get_new_value()` | 5.2 | | [`4e63acdff864`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4e63acdff864654cee0ac5aaeda3913798ee78f6)
|
||||||
|
@@ -364,6 +368,10 @@ Helper | Kernel version | License | Commit |
|
||||||
|
`BPF_FUNC_tcp_send_ack()` | 5.5 | | [`206057fe020a`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=206057fe020ac5c037d5e2dd6562a9bd216ec765)
|
||||||
|
`BPF_FUNC_tcp_sock()` | 5.1 | | [`655a51e536c0`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=655a51e536c09d15ffa3603b1b6fce2b45b85a1f)
|
||||||
|
`BPF_FUNC_this_cpu_ptr()` | 5.10 | | [`63d9b80dcf2c`](https://github.com/torvalds/linux/commit/63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1) |
|
||||||
|
+`BPF_FUNC_timer_init()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4)
|
||||||
|
+`BPF_FUNC_timer_set_callback()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4)
|
||||||
|
+`BPF_FUNC_timer_start()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4)
|
||||||
|
+`BPF_FUNC_timer_cancel()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4)
|
||||||
|
`BPF_FUNC_trace_printk()` | 4.1 | GPL | [`9c959c863f82`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=9c959c863f8217a2ff3d7c296e8223654d240569)
|
||||||
|
`BPF_FUNC_xdp_adjust_head()` | 4.10 | | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03)
|
||||||
|
`BPF_FUNC_xdp_adjust_meta()` | 4.15 | | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da)
|
||||||
|
diff --git a/introspection/bps.c b/introspection/bps.c
|
||||||
|
index e92da3f6..25a88cbd 100644
|
||||||
|
--- a/introspection/bps.c
|
||||||
|
+++ b/introspection/bps.c
|
||||||
|
@@ -47,6 +47,7 @@ static const char * const prog_type_strings[] = {
|
||||||
|
[BPF_PROG_TYPE_EXT] = "ext",
|
||||||
|
[BPF_PROG_TYPE_LSM] = "lsm",
|
||||||
|
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
|
||||||
|
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const map_type_strings[] = {
|
||||||
|
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
index 3490bc14..bf4bc3a6 100644
|
||||||
|
--- a/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
+++ b/src/cc/compat/linux/virtual_bpf.h
|
||||||
|
@@ -325,9 +325,6 @@ union bpf_iter_link_info {
|
||||||
|
* **BPF_PROG_TYPE_SK_LOOKUP**
|
||||||
|
* *data_in* and *data_out* must be NULL.
|
||||||
|
*
|
||||||
|
- * **BPF_PROG_TYPE_XDP**
|
||||||
|
- * *ctx_in* and *ctx_out* must be NULL.
|
||||||
|
- *
|
||||||
|
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
|
||||||
|
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
|
||||||
|
*
|
||||||
|
@@ -528,6 +525,15 @@ union bpf_iter_link_info {
|
||||||
|
* Look up an element with the given *key* in the map referred to
|
||||||
|
* by the file descriptor *fd*, and if found, delete the element.
|
||||||
|
*
|
||||||
|
+ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
|
||||||
|
+ * types, the *flags* argument needs to be set to 0, but for other
|
||||||
|
+ * map types, it may be specified as:
|
||||||
|
+ *
|
||||||
|
+ * **BPF_F_LOCK**
|
||||||
|
+ * Look up and delete the value of a spin-locked map
|
||||||
|
+ * without returning the lock. This must be specified if
|
||||||
|
+ * the elements contain a spinlock.
|
||||||
|
+ *
|
||||||
|
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||||
|
* implement this command as a "pop" operation, deleting the top
|
||||||
|
* element rather than one corresponding to *key*.
|
||||||
|
@@ -537,6 +543,10 @@ union bpf_iter_link_info {
|
||||||
|
* This command is only valid for the following map types:
|
||||||
|
* * **BPF_MAP_TYPE_QUEUE**
|
||||||
|
* * **BPF_MAP_TYPE_STACK**
|
||||||
|
+ * * **BPF_MAP_TYPE_HASH**
|
||||||
|
+ * * **BPF_MAP_TYPE_PERCPU_HASH**
|
||||||
|
+ * * **BPF_MAP_TYPE_LRU_HASH**
|
||||||
|
+ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
|
||||||
|
*
|
||||||
|
* Return
|
||||||
|
* Returns zero on success. On error, -1 is returned and *errno*
|
||||||
|
@@ -838,6 +848,7 @@ enum bpf_cmd {
|
||||||
|
BPF_PROG_ATTACH,
|
||||||
|
BPF_PROG_DETACH,
|
||||||
|
BPF_PROG_TEST_RUN,
|
||||||
|
+ BPF_PROG_RUN = BPF_PROG_TEST_RUN,
|
||||||
|
BPF_PROG_GET_NEXT_ID,
|
||||||
|
BPF_MAP_GET_NEXT_ID,
|
||||||
|
BPF_PROG_GET_FD_BY_ID,
|
||||||
|
@@ -938,6 +949,7 @@ enum bpf_prog_type {
|
||||||
|
BPF_PROG_TYPE_EXT,
|
||||||
|
BPF_PROG_TYPE_LSM,
|
||||||
|
BPF_PROG_TYPE_SK_LOOKUP,
|
||||||
|
+ BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
|
||||||
|
};
|
||||||
|
|
||||||
|
enum bpf_attach_type {
|
||||||
|
@@ -980,6 +992,8 @@ enum bpf_attach_type {
|
||||||
|
BPF_SK_LOOKUP,
|
||||||
|
BPF_XDP,
|
||||||
|
BPF_SK_SKB_VERDICT,
|
||||||
|
+ BPF_SK_REUSEPORT_SELECT,
|
||||||
|
+ BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
|
||||||
|
__MAX_BPF_ATTACH_TYPE
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -1098,8 +1112,8 @@ enum bpf_link_type {
|
||||||
|
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
|
||||||
|
* the following extensions:
|
||||||
|
*
|
||||||
|
- * insn[0].src_reg: BPF_PSEUDO_MAP_FD
|
||||||
|
- * insn[0].imm: map fd
|
||||||
|
+ * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX]
|
||||||
|
+ * insn[0].imm: map fd or fd_idx
|
||||||
|
* insn[1].imm: 0
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
@@ -1107,15 +1121,19 @@ enum bpf_link_type {
|
||||||
|
* verifier type: CONST_PTR_TO_MAP
|
||||||
|
*/
|
||||||
|
#define BPF_PSEUDO_MAP_FD 1
|
||||||
|
-/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE
|
||||||
|
- * insn[0].imm: map fd
|
||||||
|
+#define BPF_PSEUDO_MAP_IDX 5
|
||||||
|
+
|
||||||
|
+/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE
|
||||||
|
+ * insn[0].imm: map fd or fd_idx
|
||||||
|
* insn[1].imm: offset into value
|
||||||
|
* insn[0].off: 0
|
||||||
|
* insn[1].off: 0
|
||||||
|
* ldimm64 rewrite: address of map[0]+offset
|
||||||
|
* verifier type: PTR_TO_MAP_VALUE
|
||||||
|
*/
|
||||||
|
-#define BPF_PSEUDO_MAP_VALUE 2
|
||||||
|
+#define BPF_PSEUDO_MAP_VALUE 2
|
||||||
|
+#define BPF_PSEUDO_MAP_IDX_VALUE 6
|
||||||
|
+
|
||||||
|
/* insn[0].src_reg: BPF_PSEUDO_BTF_ID
|
||||||
|
* insn[0].imm: kernel btd id of VAR
|
||||||
|
* insn[1].imm: 0
|
||||||
|
@@ -1315,6 +1333,8 @@ union bpf_attr {
|
||||||
|
/* or valid module BTF object fd or 0 to attach to vmlinux */
|
||||||
|
__u32 attach_btf_obj_fd;
|
||||||
|
};
|
||||||
|
+ __u32 :32; /* pad */
|
||||||
|
+ __aligned_u64 fd_array; /* array of FDs */
|
||||||
|
};
|
||||||
|
|
||||||
|
struct { /* anonymous struct used by BPF_OBJ_* commands */
|
||||||
|
@@ -2535,8 +2555,12 @@ union bpf_attr {
|
||||||
|
* The lower two bits of *flags* are used as the return code if
|
||||||
|
* the map lookup fails. This is so that the return value can be
|
||||||
|
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
||||||
|
- * by the caller. Any higher bits in the *flags* argument must be
|
||||||
|
- * unset.
|
||||||
|
+ * by the caller. The higher bits of *flags* can be set to
|
||||||
|
+ * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
|
||||||
|
+ *
|
||||||
|
+ * With BPF_F_BROADCAST the packet will be broadcasted to all the
|
||||||
|
+ * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
|
||||||
|
+ * interface will be excluded when do broadcasting.
|
||||||
|
*
|
||||||
|
* See also **bpf_redirect**\ (), which only supports redirecting
|
||||||
|
* to an ifindex, but doesn't require a map to do so.
|
||||||
|
@@ -3223,7 +3247,7 @@ union bpf_attr {
|
||||||
|
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Select a **SO_REUSEPORT** socket from a
|
||||||
|
- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
|
||||||
|
+ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
|
||||||
|
* It checks the selected socket is matching the incoming
|
||||||
|
* request in the socket buffer.
|
||||||
|
* Return
|
||||||
|
@@ -4736,6 +4760,94 @@ union bpf_attr {
|
||||||
|
* be zero-terminated except when **str_size** is 0.
|
||||||
|
*
|
||||||
|
* Or **-EBUSY** if the per-CPU memory copy buffer is busy.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
|
||||||
|
+ * Description
|
||||||
|
+ * Execute bpf syscall with given arguments.
|
||||||
|
+ * Return
|
||||||
|
+ * A syscall result.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags)
|
||||||
|
+ * Description
|
||||||
|
+ * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs.
|
||||||
|
+ * Return
|
||||||
|
+ * Returns btf_id and btf_obj_fd in lower and upper 32 bits.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_sys_close(u32 fd)
|
||||||
|
+ * Description
|
||||||
|
+ * Execute close syscall for given FD.
|
||||||
|
+ * Return
|
||||||
|
+ * A syscall result.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
|
||||||
|
+ * Description
|
||||||
|
+ * Initialize the timer.
|
||||||
|
+ * First 4 bits of *flags* specify clockid.
|
||||||
|
+ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
|
||||||
|
+ * All other bits of *flags* are reserved.
|
||||||
|
+ * The verifier will reject the program if *timer* is not from
|
||||||
|
+ * the same *map*.
|
||||||
|
+ * Return
|
||||||
|
+ * 0 on success.
|
||||||
|
+ * **-EBUSY** if *timer* is already initialized.
|
||||||
|
+ * **-EINVAL** if invalid *flags* are passed.
|
||||||
|
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||||
|
+ * The user space should either hold a file descriptor to a map with timers
|
||||||
|
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||||
|
+ * closed all timers in the map will be cancelled and freed.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
|
||||||
|
+ * Description
|
||||||
|
+ * Configure the timer to call *callback_fn* static function.
|
||||||
|
+ * Return
|
||||||
|
+ * 0 on success.
|
||||||
|
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||||
|
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
|
||||||
|
+ * The user space should either hold a file descriptor to a map with timers
|
||||||
|
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
|
||||||
|
+ * closed all timers in the map will be cancelled and freed.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
|
||||||
|
+ * Description
|
||||||
|
+ * Set timer expiration N nanoseconds from the current time. The
|
||||||
|
+ * configured callback will be invoked in soft irq context on some cpu
|
||||||
|
+ * and will not repeat unless another bpf_timer_start() is made.
|
||||||
|
+ * In such case the next invocation can migrate to a different cpu.
|
||||||
|
+ * Since struct bpf_timer is a field inside map element the map
|
||||||
|
+ * owns the timer. The bpf_timer_set_callback() will increment refcnt
|
||||||
|
+ * of BPF program to make sure that callback_fn code stays valid.
|
||||||
|
+ * When user space reference to a map reaches zero all timers
|
||||||
|
+ * in a map are cancelled and corresponding program's refcnts are
|
||||||
|
+ * decremented. This is done to make sure that Ctrl-C of a user
|
||||||
|
+ * process doesn't leave any timers running. If map is pinned in
|
||||||
|
+ * bpffs the callback_fn can re-arm itself indefinitely.
|
||||||
|
+ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands
|
||||||
|
+ * cancel and free the timer in the given map element.
|
||||||
|
+ * The map can contain timers that invoke callback_fn-s from different
|
||||||
|
+ * programs. The same callback_fn can serve different timers from
|
||||||
|
+ * different maps if key/value layout matches across maps.
|
||||||
|
+ * Every bpf_timer_set_callback() can have different callback_fn.
|
||||||
|
+ *
|
||||||
|
+ * Return
|
||||||
|
+ * 0 on success.
|
||||||
|
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
|
||||||
|
+ * or invalid *flags* are passed.
|
||||||
|
+ *
|
||||||
|
+ * long bpf_timer_cancel(struct bpf_timer *timer)
|
||||||
|
+ * Description
|
||||||
|
+ * Cancel the timer and wait for callback_fn to finish if it was running.
|
||||||
|
+ * Return
|
||||||
|
+ * 0 if the timer was not active.
|
||||||
|
+ * 1 if the timer was active.
|
||||||
|
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
|
||||||
|
+ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
|
||||||
|
+ * own timer which would have led to a deadlock otherwise.
|
||||||
|
+ *
|
||||||
|
+ * u64 bpf_get_func_ip(void *ctx)
|
||||||
|
+ * Description
|
||||||
|
+ * Get address of the traced function (for tracing and kprobe programs).
|
||||||
|
+ * Return
|
||||||
|
+ * Address of the traced function.
|
||||||
|
*/
|
||||||
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
|
FN(unspec), \
|
||||||
|
@@ -4904,6 +5016,14 @@ union bpf_attr {
|
||||||
|
FN(check_mtu), \
|
||||||
|
FN(for_each_map_elem), \
|
||||||
|
FN(snprintf), \
|
||||||
|
+ FN(sys_bpf), \
|
||||||
|
+ FN(btf_find_by_name_kind), \
|
||||||
|
+ FN(sys_close), \
|
||||||
|
+ FN(timer_init), \
|
||||||
|
+ FN(timer_set_callback), \
|
||||||
|
+ FN(timer_start), \
|
||||||
|
+ FN(timer_cancel), \
|
||||||
|
+ FN(get_func_ip), \
|
||||||
|
/* */
|
||||||
|
|
||||||
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
|
@@ -5081,6 +5201,12 @@ enum {
|
||||||
|
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* Flags for bpf_redirect_map helper */
|
||||||
|
+enum {
|
||||||
|
+ BPF_F_BROADCAST = (1ULL << 3),
|
||||||
|
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
#define __bpf_md_ptr(type, name) \
|
||||||
|
union { \
|
||||||
|
type name; \
|
||||||
|
@@ -5365,6 +5491,20 @@ struct sk_reuseport_md {
|
||||||
|
__u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
|
||||||
|
__u32 bind_inany; /* Is sock bound to an INANY address? */
|
||||||
|
__u32 hash; /* A hash of the packet 4 tuples */
|
||||||
|
+ /* When reuse->migrating_sk is NULL, it is selecting a sk for the
|
||||||
|
+ * new incoming connection request (e.g. selecting a listen sk for
|
||||||
|
+ * the received SYN in the TCP case). reuse->sk is one of the sk
|
||||||
|
+ * in the reuseport group. The bpf prog can use reuse->sk to learn
|
||||||
|
+ * the local listening ip/port without looking into the skb.
|
||||||
|
+ *
|
||||||
|
+ * When reuse->migrating_sk is not NULL, reuse->sk is closed and
|
||||||
|
+ * reuse->migrating_sk is the socket that needs to be migrated
|
||||||
|
+ * to another listening socket. migrating_sk could be a fullsock
|
||||||
|
+ * sk that is fully established or a reqsk that is in-the-middle
|
||||||
|
+ * of 3-way handshake.
|
||||||
|
+ */
|
||||||
|
+ __bpf_md_ptr(struct bpf_sock *, sk);
|
||||||
|
+ __bpf_md_ptr(struct bpf_sock *, migrating_sk);
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BPF_TAG_SIZE 8
|
||||||
|
@@ -6010,6 +6150,11 @@ struct bpf_spin_lock {
|
||||||
|
__u32 val;
|
||||||
|
};
|
||||||
|
|
||||||
|
+struct bpf_timer {
|
||||||
|
+ __u64 :64;
|
||||||
|
+ __u64 :64;
|
||||||
|
+} __attribute__((aligned(8)));
|
||||||
|
+
|
||||||
|
struct bpf_sysctl {
|
||||||
|
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
|
||||||
|
* Allows 1,2,4-byte read, but no write.
|
||||||
|
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
|
||||||
|
index e9137f7f..a4e9b705 100644
|
||||||
|
--- a/src/cc/export/helpers.h
|
||||||
|
+++ b/src/cc/export/helpers.h
|
||||||
|
@@ -847,6 +847,23 @@ static long (*bpf_snprintf)(char *str, __u32 str_size, const char *fmt,
|
||||||
|
__u64 *data, __u32 data_len) =
|
||||||
|
(void *)BPF_FUNC_snprintf;
|
||||||
|
|
||||||
|
+static long (*bpf_sys_bpf)(__u32 cmd, void *attr, __u32 attr_size) =
|
||||||
|
+ (void *)BPF_FUNC_sys_bpf;
|
||||||
|
+static long (*bpf_btf_find_by_name_kind)(char *name, int name_sz, __u32 kind, int flags) =
|
||||||
|
+ (void *)BPF_FUNC_btf_find_by_name_kind;
|
||||||
|
+static long (*bpf_sys_close)(__u32 fd) = (void *)BPF_FUNC_sys_close;
|
||||||
|
+
|
||||||
|
+struct bpf_timer;
|
||||||
|
+static long (*bpf_timer_init)(struct bpf_timer *timer, void *map, __u64 flags) =
|
||||||
|
+ (void *)BPF_FUNC_timer_init;
|
||||||
|
+static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn) =
|
||||||
|
+ (void *)BPF_FUNC_timer_set_callback;
|
||||||
|
+static long (*bpf_timer_start)(struct bpf_timer *timer, __u64 nsecs, __u64 flags) =
|
||||||
|
+ (void *)BPF_FUNC_timer_start;
|
||||||
|
+static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *)BPF_FUNC_timer_cancel;
|
||||||
|
+
|
||||||
|
+static __u64 (*bpf_get_func_ip)(void *ctx) = (void *)BPF_FUNC_get_func_ip;
|
||||||
|
+
|
||||||
|
/* llvm builtin functions that eBPF C program may use to
|
||||||
|
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||||
|
*/
|
||||||
|
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
|
||||||
|
index b83d68fd..f3608cfe 100644
|
||||||
|
--- a/src/cc/libbpf.c
|
||||||
|
+++ b/src/cc/libbpf.c
|
||||||
|
@@ -270,6 +270,14 @@ static struct bpf_helper helpers[] = {
|
||||||
|
{"check_mtu", "5.12"},
|
||||||
|
{"for_each_map_elem", "5.13"},
|
||||||
|
{"snprintf", "5.13"},
|
||||||
|
+ {"sys_bpf", "5.14"},
|
||||||
|
+ {"btf_find_by_name_kind", "5.14"},
|
||||||
|
+ {"sys_close", "5.14"},
|
||||||
|
+ {"timer_init", "5.15"},
|
||||||
|
+ {"timer_set_callback", "5.15"},
|
||||||
|
+ {"timer_start", "5.15"},
|
||||||
|
+ {"timer_cancel", "5.15"},
|
||||||
|
+ {"get_func_ip", "5.15"},
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint64_t ptr_to_u64(void *ptr)
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,34 @@
|
|||||||
|
From 460a71ab24ad511318342077ac9ef57df543375f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jerome Marchand <jmarchan@redhat.com>
|
||||||
|
Date: Thu, 16 Sep 2021 14:44:23 +0200
|
||||||
|
Subject: [PATCH] threadsnoop: look for pthread_create in libc too
|
||||||
|
|
||||||
|
Since glibc 2.34, pthread features are integrated in libc directly.
|
||||||
|
Look for pthread_create there too when it is not found in libpthread.
|
||||||
|
|
||||||
|
Fixes #3623
|
||||||
|
---
|
||||||
|
tools/threadsnoop.py | 7 ++++++-
|
||||||
|
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/tools/threadsnoop.py b/tools/threadsnoop.py
|
||||||
|
index 04c5e680..471b0c3c 100755
|
||||||
|
--- a/tools/threadsnoop.py
|
||||||
|
+++ b/tools/threadsnoop.py
|
||||||
|
@@ -38,7 +38,12 @@ void do_entry(struct pt_regs *ctx) {
|
||||||
|
events.perf_submit(ctx, &data, sizeof(data));
|
||||||
|
};
|
||||||
|
""")
|
||||||
|
-b.attach_uprobe(name="pthread", sym="pthread_create", fn_name="do_entry")
|
||||||
|
+
|
||||||
|
+# Since version 2.34, pthread features are integrated in libc
|
||||||
|
+try:
|
||||||
|
+ b.attach_uprobe(name="pthread", sym="pthread_create", fn_name="do_entry")
|
||||||
|
+except Exception:
|
||||||
|
+ b.attach_uprobe(name="c", sym="pthread_create", fn_name="do_entry")
|
||||||
|
|
||||||
|
print("%-10s %-6s %-16s %s" % ("TIME(ms)", "PID", "COMM", "FUNC"))
|
||||||
|
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -0,0 +1,109 @@
|
|||||||
|
From 6c9d91c2196e69682a611dbfc10a0731f86deada Mon Sep 17 00:00:00 2001
|
||||||
|
From: zcy <zcy.chenyue.zhou@gmail.com>
|
||||||
|
Date: Fri, 25 Jun 2021 10:16:53 +0800
|
||||||
|
Subject: [PATCH] tools/readahead compatible with kernel version >= 5.10
|
||||||
|
(#3507)
|
||||||
|
|
||||||
|
After kernel version 5.10, __do_page_cache_readahead() was renamed to do_page_cache_ra(),
|
||||||
|
let us try both in readahead.py.
|
||||||
|
---
|
||||||
|
tools/readahead.py | 12 ++++++++----
|
||||||
|
tools/readahead_example.txt | 22 +++++++++++-----------
|
||||||
|
2 files changed, 19 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/readahead.py b/tools/readahead.py
|
||||||
|
index 14182d5a..b338261f 100755
|
||||||
|
--- a/tools/readahead.py
|
||||||
|
+++ b/tools/readahead.py
|
||||||
|
@@ -20,7 +20,7 @@ import argparse
|
||||||
|
|
||||||
|
# arguments
|
||||||
|
examples = """examples:
|
||||||
|
- ./readahead -d 20 # monitor for 10 seconds and generate stats
|
||||||
|
+ ./readahead -d 20 # monitor for 20 seconds and generate stats
|
||||||
|
"""
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
@@ -95,15 +95,19 @@ int entry_mark_page_accessed(struct pt_regs *ctx) {
|
||||||
|
"""
|
||||||
|
|
||||||
|
b = BPF(text=program)
|
||||||
|
-b.attach_kprobe(event="__do_page_cache_readahead", fn_name="entry__do_page_cache_readahead")
|
||||||
|
-b.attach_kretprobe(event="__do_page_cache_readahead", fn_name="exit__do_page_cache_readahead")
|
||||||
|
+if BPF.get_kprobe_functions(b"__do_page_cache_readahead"):
|
||||||
|
+ ra_event = "__do_page_cache_readahead"
|
||||||
|
+else:
|
||||||
|
+ ra_event = "do_page_cache_ra"
|
||||||
|
+b.attach_kprobe(event=ra_event, fn_name="entry__do_page_cache_readahead")
|
||||||
|
+b.attach_kretprobe(event=ra_event, fn_name="exit__do_page_cache_readahead")
|
||||||
|
b.attach_kretprobe(event="__page_cache_alloc", fn_name="exit__page_cache_alloc")
|
||||||
|
b.attach_kprobe(event="mark_page_accessed", fn_name="entry_mark_page_accessed")
|
||||||
|
|
||||||
|
# header
|
||||||
|
print("Tracing... Hit Ctrl-C to end.")
|
||||||
|
|
||||||
|
-# print
|
||||||
|
+# print
|
||||||
|
def print_stats():
|
||||||
|
print()
|
||||||
|
print("Read-ahead unused pages: %d" % (b["pages"][ct.c_ulong(0)].value))
|
||||||
|
diff --git a/tools/readahead_example.txt b/tools/readahead_example.txt
|
||||||
|
index 079dbaae..6d675c13 100644
|
||||||
|
--- a/tools/readahead_example.txt
|
||||||
|
+++ b/tools/readahead_example.txt
|
||||||
|
@@ -2,20 +2,20 @@ Demonstration of readahead, the Linux eBPF/bcc version
|
||||||
|
|
||||||
|
Read-ahead mechanism is used by operation sytems to optimize sequential operations
|
||||||
|
by reading ahead some pages to avoid more expensive filesystem operations. This tool
|
||||||
|
-shows the performance of the read-ahead caching on the system under a given load to
|
||||||
|
+shows the performance of the read-ahead caching on the system under a given load to
|
||||||
|
investigate any caching issues. It shows a count for unused pages in the cache and
|
||||||
|
also prints a histogram showing how long they have remianed there.
|
||||||
|
|
||||||
|
Usage Scenario
|
||||||
|
==============
|
||||||
|
|
||||||
|
-Consider that you are developing a React Native application which performs aggressive
|
||||||
|
+Consider that you are developing a React Native application which performs aggressive
|
||||||
|
reads while re-encoding a video in local-storage. Usually such an app would be multi-
|
||||||
|
-layered and have transitional library dependencies. The actual read may be performed
|
||||||
|
-by some unknown native library which may or may not be using hints to the OS, such as
|
||||||
|
-madvise(p, LEN, MADV_SEQUENTIAL). If high IOPS is observed in such an app, running
|
||||||
|
-readahead may pin the issue much faster in this case as the developer digs deeper
|
||||||
|
-into what may be causing this.
|
||||||
|
+layered and have transitional library dependencies. The actual read may be performed
|
||||||
|
+by some unknown native library which may or may not be using hints to the OS, such as
|
||||||
|
+madvise(p, LEN, MADV_SEQUENTIAL). If high IOPS is observed in such an app, running
|
||||||
|
+readahead may pin the issue much faster in this case as the developer digs deeper
|
||||||
|
+into what may be causing this.
|
||||||
|
|
||||||
|
An example where such an issue can surface is: https://github.com/boltdb/bolt/issues/691
|
||||||
|
|
||||||
|
@@ -40,7 +40,7 @@ Read-ahead unused pages: 6765
|
||||||
|
2048 -> 4095 : 439 |**** |
|
||||||
|
4096 -> 8191 : 188 |* |
|
||||||
|
|
||||||
|
-In the example above, we recorded system-wide stats for 30 seconds. We can observe that
|
||||||
|
+In the example above, we recorded system-wide stats for 30 seconds. We can observe that
|
||||||
|
while most of the pages stayed in the readahead cache for quite less time, after 30
|
||||||
|
seconds 6765 pages still remained in the cache, yet unaccessed.
|
||||||
|
|
||||||
|
@@ -49,12 +49,12 @@ Note on Kprobes Usage
|
||||||
|
|
||||||
|
This tool uses Kprobes on the following kernel functions:
|
||||||
|
|
||||||
|
-__do_page_cache_readahead()
|
||||||
|
+__do_page_cache_readahead()/do_page_cache_ra() (After kernel version 5.10 (include), __do_page_cache_readahead was renamed to do_page_cache_ra)
|
||||||
|
__page_cache_alloc()
|
||||||
|
mark_page_accessed()
|
||||||
|
|
||||||
|
-Since the tool uses Kprobes, depending on your linux kernel's compilation, these
|
||||||
|
-functions may be inlined and hence not available for Kprobes. To see whether you have
|
||||||
|
+Since the tool uses Kprobes, depending on your linux kernel's compilation, these
|
||||||
|
+functions may be inlined and hence not available for Kprobes. To see whether you have
|
||||||
|
the functions available, check vmlinux source and binary to confirm whether inlining is
|
||||||
|
happening or not. You can also check /proc/kallsyms on the host and verify if the target
|
||||||
|
functions are present there before using this tool.
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
31
bcc.spec
31
bcc.spec
@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
Name: bcc
|
Name: bcc
|
||||||
Version: 0.20.0
|
Version: 0.20.0
|
||||||
Release: 6%{?dist}
|
Release: 7%{?dist}
|
||||||
Summary: BPF Compiler Collection (BCC)
|
Summary: BPF Compiler Collection (BCC)
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
URL: https://github.com/iovisor/bcc
|
URL: https://github.com/iovisor/bcc
|
||||||
@ -36,6 +36,12 @@ Patch0: %{name}-%{version}-libbpf-tool-don-t-ignore-LDFLAGS.patch
|
|||||||
Patch1: %{name}-%{version}-libbpf-tools-readahead-don-t-mark-struct-hist-as-sta.patch
|
Patch1: %{name}-%{version}-libbpf-tools-readahead-don-t-mark-struct-hist-as-sta.patch
|
||||||
Patch2: %{name}-%{version}-Define-KERNEL_VERSION.patch
|
Patch2: %{name}-%{version}-Define-KERNEL_VERSION.patch
|
||||||
Patch3: %{name}-%{version}-Revert-libbpf-tools-remove-unecessary-custom-NULL-de.patch
|
Patch3: %{name}-%{version}-Revert-libbpf-tools-remove-unecessary-custom-NULL-de.patch
|
||||||
|
Patch4: %{name}-%{version}-sync-with-latest-libbpf-repo-3529.patch
|
||||||
|
Patch5: %{name}-%{version}-threadsnoop-look-for-pthread_create-in-libc-too.patch
|
||||||
|
Patch6: %{name}-%{version}-Update-cpudist.py.patch
|
||||||
|
Patch7: %{name}-%{version}-tools-readahead-compatible-with-kernel-version-5.10-.patch
|
||||||
|
Patch8: %{name}-%{version}-Fix-mdflush-on-RHEL9.patch
|
||||||
|
Patch9: %{name}-%{version}-Handle-renaming-of-task_struct_-state-field-on-RHEL-.patch
|
||||||
|
|
||||||
# Arches will be included as upstream support is added and dependencies are
|
# Arches will be included as upstream support is added and dependencies are
|
||||||
# satisfied in the respective arches
|
# satisfied in the respective arches
|
||||||
@ -217,6 +223,23 @@ install libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}
|
|||||||
%dir %{_datadir}/%{name}
|
%dir %{_datadir}/%{name}
|
||||||
%{_datadir}/%{name}/tools/
|
%{_datadir}/%{name}/tools/
|
||||||
%{_datadir}/%{name}/introspection/
|
%{_datadir}/%{name}/introspection/
|
||||||
|
%if 0%{?rhel} > 0
|
||||||
|
# inject relies on BPF_KPROBE_OVERRIDE which is not set on RHEL
|
||||||
|
%exclude %{_datadir}/%{name}/tools/inject
|
||||||
|
%exclude %{_datadir}/%{name}/tools/doc/inject_example.txt
|
||||||
|
%exclude %{_mandir}/man8/bcc-inject.8.gz
|
||||||
|
# Neither btrfs nor zfs are available on RHEL
|
||||||
|
%exclude %{_datadir}/%{name}/tools/btrfs*
|
||||||
|
%exclude %{_datadir}/%{name}/tools/doc/btrfs*
|
||||||
|
%exclude %{_mandir}/man8/bcc-btrfs*
|
||||||
|
%exclude %{_datadir}/%{name}/tools/zfs*
|
||||||
|
%exclude %{_datadir}/%{name}/tools/doc/zfs*
|
||||||
|
%exclude %{_mandir}/man8/bcc-zfs*
|
||||||
|
# criticalstat relies on CONFIG_PREEMPTIRQ_EVENTS which is disabled on RHEL
|
||||||
|
%exclude %{_datadir}/%{name}/tools/criticalstat
|
||||||
|
%exclude %{_datadir}/%{name}/tools/doc/criticalstat_example.txt
|
||||||
|
%exclude %{_mandir}/man8/bcc-criticalstat.8.gz
|
||||||
|
%endif
|
||||||
%{_mandir}/man8/*
|
%{_mandir}/man8/*
|
||||||
|
|
||||||
%if %{with lua}
|
%if %{with lua}
|
||||||
@ -230,6 +253,12 @@ install libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Oct 14 2021 Jerome Marchand <jmarchan@redhat.com> - 0.20.0-7
|
||||||
|
- Sync with latest libbpf (fixes BPF_F_BROADCAST breakages of rhbz#1992430)
|
||||||
|
- Fix cpudist, mdflush, readahead and threadsnoop (rhbz#1992430)
|
||||||
|
- Handle the renaming of task_struct_>state field
|
||||||
|
- Drop tools that relies on features disabled on RHEL
|
||||||
|
|
||||||
* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 0.20.0-6
|
* Mon Aug 09 2021 Mohan Boddu <mboddu@redhat.com> - 0.20.0-6
|
||||||
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
|
- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags
|
||||||
Related: rhbz#1991688
|
Related: rhbz#1991688
|
||||||
|
Loading…
Reference in New Issue
Block a user