Rebase bcc to v0.29.1 and enable libbpf-tools on s390x
Also fix bpf-bindsnoop, ttysnoop and sync libbpf. Resolves: bz#2253688 Resolves: bz#2249458
This commit is contained in:
		
							parent
							
								
									eae885cfcc
								
							
						
					
					
						commit
						110e48716f
					
				
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -21,3 +21,4 @@ | |||||||
| /bcc-0.26.0.tar.gz | /bcc-0.26.0.tar.gz | ||||||
| /bcc-0.27.0.tar.gz | /bcc-0.27.0.tar.gz | ||||||
| /bcc-0.28.0.tar.gz | /bcc-0.28.0.tar.gz | ||||||
|  | /bcc-0.29.1.tar.gz | ||||||
|  | |||||||
							
								
								
									
										132
									
								
								Fix-ttysnoop.py-with-newer-kernels.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								Fix-ttysnoop.py-with-newer-kernels.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,132 @@ | |||||||
|  | From 89126c7452c29736d38dc072a952b0b0c831fade Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Yonghong Song <yonghong.song@linux.dev> | ||||||
|  | Date: Mon, 29 Jan 2024 16:13:30 -0800 | ||||||
|  | Subject: [PATCH] [PATCH] Fix ttysnoop.py with newer kernels | ||||||
|  | 
 | ||||||
|  | Jerome Marchand reported that ttysnoop.py won't work properly | ||||||
|  | with newer kernels (#4884). I did some investigation and found | ||||||
|  | that some kernel data structure change caused verification failure. | ||||||
|  | The failure is caused by the following: | ||||||
|  |   ; kvec  = from->kvec; | ||||||
|  |   // R1=ptr_iov_iter() | ||||||
|  |   15: (79) r1 = *(u64 *)(r1 +16)        ; R1_w=scalar() | ||||||
|  |   ; count = kvec->iov_len; | ||||||
|  |   16: (bf) r2 = r1                      ; R1_w=scalar(id=1) R2_w=scalar(id=1) | ||||||
|  |   17: (07) r2 += 8                      ; R2_w=scalar() | ||||||
|  |   18: (05) goto pc+3 | ||||||
|  |   ; | ||||||
|  |   22: (79) r2 = *(u64 *)(r2 +0) | ||||||
|  |   R2 invalid mem access 'scalar' | ||||||
|  | 
 | ||||||
|  | So basically, loading 'iov_iter + 16' returns a scalar but verifier | ||||||
|  | expects it to be a pointer. | ||||||
|  | 
 | ||||||
|  | In v6.4, we have | ||||||
|  |     struct iovec | ||||||
|  |     { | ||||||
|  |         void __user *iov_base;  /* BSD uses caddr_t (1003.1g requires void *) */ | ||||||
|  |         __kernel_size_t iov_len; /* Must be size_t (1003.1g) */ | ||||||
|  |     }; | ||||||
|  |     struct iov_iter { | ||||||
|  |         u8 iter_type; | ||||||
|  |         bool copy_mc; | ||||||
|  |         bool nofault; | ||||||
|  |         bool data_source; | ||||||
|  |         bool user_backed; | ||||||
|  |         union { | ||||||
|  |                 size_t iov_offset; | ||||||
|  |                 int last_offset; | ||||||
|  |         }; | ||||||
|  |         union { | ||||||
|  |                 struct iovec __ubuf_iovec; | ||||||
|  |                 struct { | ||||||
|  |                         union { | ||||||
|  |                                 const struct iovec *__iov; | ||||||
|  |                                 const struct kvec *kvec; | ||||||
|  |                                 const struct bio_vec *bvec; | ||||||
|  |                                 struct xarray *xarray; | ||||||
|  |                                 struct pipe_inode_info *pipe; | ||||||
|  |                                 void __user *ubuf; | ||||||
|  |                         }; | ||||||
|  |                         size_t count; | ||||||
|  |                 }; | ||||||
|  |         }; | ||||||
|  |         union { | ||||||
|  |                 unsigned long nr_segs; | ||||||
|  |                 struct { | ||||||
|  |                         unsigned int head; | ||||||
|  |                         unsigned int start_head; | ||||||
|  |                 }; | ||||||
|  |                 loff_t xarray_start; | ||||||
|  |         }; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  | The kernel traversal chain will be | ||||||
|  |    "struct iov_iter" -> "struct iovec __ubuf_iovec" -> "void __user *iov_base". | ||||||
|  | Since the "iov_base" type is a ptr to void, the kernel considers the | ||||||
|  | loaded value as a scalar which caused verification failure. | ||||||
|  | 
 | ||||||
|  | But for old kernel like 5.19, we do not have this issue. | ||||||
|  |     struct iovec | ||||||
|  |     { | ||||||
|  |         void __user *iov_base;  /* BSD uses caddr_t (1003.1g requires void *) */ | ||||||
|  |         __kernel_size_t iov_len; /* Must be size_t (1003.1g) */ | ||||||
|  |     }; | ||||||
|  |     struct iov_iter { | ||||||
|  |         u8 iter_type; | ||||||
|  |         bool nofault; | ||||||
|  |         bool data_source; | ||||||
|  |         bool user_backed; | ||||||
|  |         size_t iov_offset; | ||||||
|  |         size_t count; | ||||||
|  |         union { | ||||||
|  |                 const struct iovec *iov; | ||||||
|  |                 const struct kvec *kvec; | ||||||
|  |                 const struct bio_vec *bvec; | ||||||
|  |                 struct xarray *xarray; | ||||||
|  |                 struct pipe_inode_info *pipe; | ||||||
|  |                 void __user *ubuf; | ||||||
|  |         }; | ||||||
|  |         union { | ||||||
|  |                 unsigned long nr_segs; | ||||||
|  |                 struct { | ||||||
|  |                         unsigned int head; | ||||||
|  |                         unsigned int start_head; | ||||||
|  |                 }; | ||||||
|  |                 loff_t xarray_start; | ||||||
|  |         }; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  | The kernel traversal chain will be | ||||||
|  |     "struct iov_iter" -> "const struct iovec *iov" | ||||||
|  | Note that "const struct iovec *iov" is used since it is the *first* member | ||||||
|  | inside the union. The traversal stops once we hit a pointer. | ||||||
|  | So the kernel verifier returns a 'struct iovec' object (untrusted, cannot | ||||||
|  | be used as a parameter to a call) and verifier can proceed. | ||||||
|  | 
 | ||||||
|  | To fix the problem, let us use bpf_probe_read_kernel() instead | ||||||
|  | so ttysnoop.py can continue to work with newer kernel. | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Yonghong Song <yonghong.song@linux.dev> | ||||||
|  | ---
 | ||||||
|  |  tools/ttysnoop.py | 4 ++-- | ||||||
|  |  1 file changed, 2 insertions(+), 2 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/tools/ttysnoop.py b/tools/ttysnoop.py
 | ||||||
|  | index 77f97b7c..aca09db4 100755
 | ||||||
|  | --- a/tools/ttysnoop.py
 | ||||||
|  | +++ b/tools/ttysnoop.py
 | ||||||
|  | @@ -162,8 +162,8 @@ PROBE_TTY_WRITE
 | ||||||
|  |       */ | ||||||
|  |      case CASE_ITER_IOVEC_NAME: | ||||||
|  |          kvec  = from->kvec; | ||||||
|  | -        buf   = kvec->iov_base;
 | ||||||
|  | -        count = kvec->iov_len;
 | ||||||
|  | +        bpf_probe_read_kernel(&buf, sizeof(buf), &kvec->iov_base);
 | ||||||
|  | +        bpf_probe_read_kernel(&count, sizeof(count), &kvec->iov_len);
 | ||||||
|  |          break; | ||||||
|  |      CASE_ITER_UBUF_TEXT | ||||||
|  |      /* TODO: Support more type */ | ||||||
|  | -- 
 | ||||||
|  | 2.43.0 | ||||||
|  | 
 | ||||||
							
								
								
									
										727
									
								
								Sync-with-latest-libbpf-repo-4889.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										727
									
								
								Sync-with-latest-libbpf-repo-4889.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,727 @@ | |||||||
|  | From c0691e35cd65d5400f0b792d5eba81f8eae236dc Mon Sep 17 00:00:00 2001 | ||||||
|  | From: yonghong-song <ys114321@gmail.com> | ||||||
|  | Date: Tue, 30 Jan 2024 09:14:30 -0800 | ||||||
|  | Subject: [PATCH] Sync with latest libbpf repo (#4889) | ||||||
|  | 
 | ||||||
|  | Sync with latest libbpf repo. | ||||||
|  | The top libbpf commit is: | ||||||
|  |   3b0973892891  sync: remove NETDEV_XSK_FLAGS_MASK which is not in bpf/bpf-next anymore | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Yonghong Song <yonghong.song@linux.dev> | ||||||
|  | ---
 | ||||||
|  |  introspection/bps.c               |   1 + | ||||||
|  |  src/cc/compat/linux/virtual_bpf.h | 368 ++++++++++++++++++++++++++---- | ||||||
|  |  src/cc/libbpf                     |   2 +- | ||||||
|  |  3 files changed, 326 insertions(+), 45 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/introspection/bps.c b/introspection/bps.c
 | ||||||
|  | index 3956fbf2..8cdef54a 100644
 | ||||||
|  | --- a/introspection/bps.c
 | ||||||
|  | +++ b/introspection/bps.c
 | ||||||
|  | @@ -48,6 +48,7 @@ static const char * const prog_type_strings[] = {
 | ||||||
|  |    [BPF_PROG_TYPE_LSM] = "lsm", | ||||||
|  |    [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", | ||||||
|  |    [BPF_PROG_TYPE_SYSCALL] = "syscall", | ||||||
|  | +  [BPF_PROG_TYPE_NETFILTER] = "netfilter",
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  static const char * const map_type_strings[] = { | ||||||
|  | diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
 | ||||||
|  | index a182123e..fcabe71a 100644
 | ||||||
|  | --- a/src/cc/compat/linux/virtual_bpf.h
 | ||||||
|  | +++ b/src/cc/compat/linux/virtual_bpf.h
 | ||||||
|  | @@ -20,6 +20,7 @@ R"********(
 | ||||||
|  |   | ||||||
|  |  /* ld/ldx fields */ | ||||||
|  |  #define BPF_DW		0x18	/* double word (64-bit) */ | ||||||
|  | +#define BPF_MEMSX	0x80	/* load with sign extension */
 | ||||||
|  |  #define BPF_ATOMIC	0xc0	/* atomic memory ops - op type in immediate */ | ||||||
|  |  #define BPF_XADD	0xc0	/* exclusive add - legacy name */ | ||||||
|  |   | ||||||
|  | @@ -847,6 +848,36 @@ union bpf_iter_link_info {
 | ||||||
|  |   *		Returns zero on success. On error, -1 is returned and *errno* | ||||||
|  |   *		is set appropriately. | ||||||
|  |   * | ||||||
|  | + * BPF_TOKEN_CREATE
 | ||||||
|  | + *	Description
 | ||||||
|  | + *		Create BPF token with embedded information about what
 | ||||||
|  | + *		BPF-related functionality it allows:
 | ||||||
|  | + *		- a set of allowed bpf() syscall commands;
 | ||||||
|  | + *		- a set of allowed BPF map types to be created with
 | ||||||
|  | + *		BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
 | ||||||
|  | + *		- a set of allowed BPF program types and BPF program attach
 | ||||||
|  | + *		types to be loaded with BPF_PROG_LOAD command, if
 | ||||||
|  | + *		BPF_PROG_LOAD itself is allowed.
 | ||||||
|  | + *
 | ||||||
|  | + *		BPF token is created (derived) from an instance of BPF FS,
 | ||||||
|  | + *		assuming it has necessary delegation mount options specified.
 | ||||||
|  | + *		This BPF token can be passed as an extra parameter to various
 | ||||||
|  | + *		bpf() syscall commands to grant BPF subsystem functionality to
 | ||||||
|  | + *		unprivileged processes.
 | ||||||
|  | + *
 | ||||||
|  | + *		When created, BPF token is "associated" with the owning
 | ||||||
|  | + *		user namespace of BPF FS instance (super block) that it was
 | ||||||
|  | + *		derived from, and subsequent BPF operations performed with
 | ||||||
|  | + *		BPF token would be performing capabilities checks (i.e.,
 | ||||||
|  | + *		CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
 | ||||||
|  | + *		that user namespace. Without BPF token, such capabilities
 | ||||||
|  | + *		have to be granted in init user namespace, making bpf()
 | ||||||
|  | + *		syscall incompatible with user namespace, for the most part.
 | ||||||
|  | + *
 | ||||||
|  | + *	Return
 | ||||||
|  | + *		A new file descriptor (a nonnegative integer), or -1 if an
 | ||||||
|  | + *		error occurred (in which case, *errno* is set appropriately).
 | ||||||
|  | + *
 | ||||||
|  |   * NOTES | ||||||
|  |   *	eBPF objects (maps and programs) can be shared between processes. | ||||||
|  |   * | ||||||
|  | @@ -901,6 +932,8 @@ enum bpf_cmd {
 | ||||||
|  |  	BPF_ITER_CREATE, | ||||||
|  |  	BPF_LINK_DETACH, | ||||||
|  |  	BPF_PROG_BIND_MAP, | ||||||
|  | +	BPF_TOKEN_CREATE,
 | ||||||
|  | +	__MAX_BPF_CMD,
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  enum bpf_map_type { | ||||||
|  | @@ -932,7 +965,14 @@ enum bpf_map_type {
 | ||||||
|  |  	 */ | ||||||
|  |  	BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, | ||||||
|  |  	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, | ||||||
|  | -	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 | ||||||
|  | +	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
 | ||||||
|  | +	/* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
 | ||||||
|  | +	 * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
 | ||||||
|  | +	 * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
 | ||||||
|  | +	 * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
 | ||||||
|  | +	 * deprecated.
 | ||||||
|  | +	 */
 | ||||||
|  | +	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
 | ||||||
|  |  	BPF_MAP_TYPE_QUEUE, | ||||||
|  |  	BPF_MAP_TYPE_STACK, | ||||||
|  |  	BPF_MAP_TYPE_SK_STORAGE, | ||||||
|  | @@ -944,6 +984,7 @@ enum bpf_map_type {
 | ||||||
|  |  	BPF_MAP_TYPE_BLOOM_FILTER, | ||||||
|  |  	BPF_MAP_TYPE_USER_RINGBUF, | ||||||
|  |  	BPF_MAP_TYPE_CGRP_STORAGE, | ||||||
|  | +	__MAX_BPF_MAP_TYPE
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  /* Note that tracing related programs such as | ||||||
|  | @@ -987,6 +1028,8 @@ enum bpf_prog_type {
 | ||||||
|  |  	BPF_PROG_TYPE_LSM, | ||||||
|  |  	BPF_PROG_TYPE_SK_LOOKUP, | ||||||
|  |  	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ | ||||||
|  | +	BPF_PROG_TYPE_NETFILTER,
 | ||||||
|  | +	__MAX_BPF_PROG_TYPE
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  enum bpf_attach_type { | ||||||
|  | @@ -1035,6 +1078,17 @@ enum bpf_attach_type {
 | ||||||
|  |  	BPF_TRACE_KPROBE_MULTI, | ||||||
|  |  	BPF_LSM_CGROUP, | ||||||
|  |  	BPF_STRUCT_OPS, | ||||||
|  | +	BPF_NETFILTER,
 | ||||||
|  | +	BPF_TCX_INGRESS,
 | ||||||
|  | +	BPF_TCX_EGRESS,
 | ||||||
|  | +	BPF_TRACE_UPROBE_MULTI,
 | ||||||
|  | +	BPF_CGROUP_UNIX_CONNECT,
 | ||||||
|  | +	BPF_CGROUP_UNIX_SENDMSG,
 | ||||||
|  | +	BPF_CGROUP_UNIX_RECVMSG,
 | ||||||
|  | +	BPF_CGROUP_UNIX_GETPEERNAME,
 | ||||||
|  | +	BPF_CGROUP_UNIX_GETSOCKNAME,
 | ||||||
|  | +	BPF_NETKIT_PRIMARY,
 | ||||||
|  | +	BPF_NETKIT_PEER,
 | ||||||
|  |  	__MAX_BPF_ATTACH_TYPE | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | @@ -1051,8 +1105,23 @@ enum bpf_link_type {
 | ||||||
|  |  	BPF_LINK_TYPE_PERF_EVENT = 7, | ||||||
|  |  	BPF_LINK_TYPE_KPROBE_MULTI = 8, | ||||||
|  |  	BPF_LINK_TYPE_STRUCT_OPS = 9, | ||||||
|  | +	BPF_LINK_TYPE_NETFILTER = 10,
 | ||||||
|  | +	BPF_LINK_TYPE_TCX = 11,
 | ||||||
|  | +	BPF_LINK_TYPE_UPROBE_MULTI = 12,
 | ||||||
|  | +	BPF_LINK_TYPE_NETKIT = 13,
 | ||||||
|  | +	__MAX_BPF_LINK_TYPE,
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
 | ||||||
|  |   | ||||||
|  | -	MAX_BPF_LINK_TYPE,
 | ||||||
|  | +enum bpf_perf_event_type {
 | ||||||
|  | +	BPF_PERF_EVENT_UNSPEC = 0,
 | ||||||
|  | +	BPF_PERF_EVENT_UPROBE = 1,
 | ||||||
|  | +	BPF_PERF_EVENT_URETPROBE = 2,
 | ||||||
|  | +	BPF_PERF_EVENT_KPROBE = 3,
 | ||||||
|  | +	BPF_PERF_EVENT_KRETPROBE = 4,
 | ||||||
|  | +	BPF_PERF_EVENT_TRACEPOINT = 5,
 | ||||||
|  | +	BPF_PERF_EVENT_EVENT = 6,
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command | ||||||
|  | @@ -1101,7 +1170,12 @@ enum bpf_link_type {
 | ||||||
|  |   */ | ||||||
|  |  #define BPF_F_ALLOW_OVERRIDE	(1U << 0) | ||||||
|  |  #define BPF_F_ALLOW_MULTI	(1U << 1) | ||||||
|  | +/* Generic attachment flags. */
 | ||||||
|  |  #define BPF_F_REPLACE		(1U << 2) | ||||||
|  | +#define BPF_F_BEFORE		(1U << 3)
 | ||||||
|  | +#define BPF_F_AFTER		(1U << 4)
 | ||||||
|  | +#define BPF_F_ID		(1U << 5)
 | ||||||
|  | +#define BPF_F_LINK		BPF_F_LINK /* 1 << 13 */
 | ||||||
|  |   | ||||||
|  |  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the | ||||||
|  |   * verifier will perform strict alignment checking as if the kernel | ||||||
|  | @@ -1163,10 +1237,27 @@ enum bpf_link_type {
 | ||||||
|  |   */ | ||||||
|  |  #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6) | ||||||
|  |   | ||||||
|  | +/* The verifier internal test flag. Behavior is undefined */
 | ||||||
|  | +#define BPF_F_TEST_REG_INVARIANTS	(1U << 7)
 | ||||||
|  | +
 | ||||||
|  |  /* link_create.kprobe_multi.flags used in LINK_CREATE command for | ||||||
|  |   * BPF_TRACE_KPROBE_MULTI attach type to create return probe. | ||||||
|  |   */ | ||||||
|  | -#define BPF_F_KPROBE_MULTI_RETURN	(1U << 0)
 | ||||||
|  | +enum {
 | ||||||
|  | +	BPF_F_KPROBE_MULTI_RETURN = (1U << 0)
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +/* link_create.uprobe_multi.flags used in LINK_CREATE command for
 | ||||||
|  | + * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
 | ||||||
|  | + */
 | ||||||
|  | +enum {
 | ||||||
|  | +	BPF_F_UPROBE_MULTI_RETURN = (1U << 0)
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +/* link_create.netfilter.flags used in LINK_CREATE command for
 | ||||||
|  | + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
 | ||||||
|  | + */
 | ||||||
|  | +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
 | ||||||
|  |   | ||||||
|  |  /* When BPF ldimm64's insn[0].src_reg != 0 then this can have | ||||||
|  |   * the following extensions: | ||||||
|  | @@ -1271,6 +1362,15 @@ enum {
 | ||||||
|  |   | ||||||
|  |  /* Create a map that will be registered/unregesitered by the backed bpf_link */ | ||||||
|  |  	BPF_F_LINK		= (1U << 13), | ||||||
|  | +
 | ||||||
|  | +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
 | ||||||
|  | +	BPF_F_PATH_FD		= (1U << 14),
 | ||||||
|  | +
 | ||||||
|  | +/* Flag for value_type_btf_obj_fd, the fd is available */
 | ||||||
|  | +	BPF_F_VTYPE_BTF_OBJ_FD	= (1U << 15),
 | ||||||
|  | +
 | ||||||
|  | +/* BPF token FD is passed in a corresponding command's token_fd field */
 | ||||||
|  | +	BPF_F_TOKEN_FD          = (1U << 16),
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  /* Flags for BPF_PROG_QUERY. */ | ||||||
|  | @@ -1344,6 +1444,15 @@ union bpf_attr {
 | ||||||
|  |  		 * to using 5 hash functions). | ||||||
|  |  		 */ | ||||||
|  |  		__u64	map_extra; | ||||||
|  | +
 | ||||||
|  | +		__s32   value_type_btf_obj_fd;	/* fd pointing to a BTF
 | ||||||
|  | +						 * type data for
 | ||||||
|  | +						 * btf_vmlinux_value_type_id.
 | ||||||
|  | +						 */
 | ||||||
|  | +		/* BPF token FD to use with BPF_MAP_CREATE operation.
 | ||||||
|  | +		 * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
 | ||||||
|  | +		 */
 | ||||||
|  | +		__s32	map_token_fd;
 | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ | ||||||
|  | @@ -1413,23 +1522,39 @@ union bpf_attr {
 | ||||||
|  |  		 * truncated), or smaller (if log buffer wasn't filled completely). | ||||||
|  |  		 */ | ||||||
|  |  		__u32		log_true_size; | ||||||
|  | +		/* BPF token FD to use with BPF_PROG_LOAD operation.
 | ||||||
|  | +		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 | ||||||
|  | +		 */
 | ||||||
|  | +		__s32		prog_token_fd;
 | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_OBJ_* commands */ | ||||||
|  |  		__aligned_u64	pathname; | ||||||
|  |  		__u32		bpf_fd; | ||||||
|  |  		__u32		file_flags; | ||||||
|  | +		/* Same as dirfd in openat() syscall; see openat(2)
 | ||||||
|  | +		 * manpage for details of path FD and pathname semantics;
 | ||||||
|  | +		 * path_fd should accompanied by BPF_F_PATH_FD flag set in
 | ||||||
|  | +		 * file_flags field, otherwise it should be set to zero;
 | ||||||
|  | +		 * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
 | ||||||
|  | +		 */
 | ||||||
|  | +		__s32		path_fd;
 | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ | ||||||
|  | -		__u32		target_fd;	/* container object to attach to */
 | ||||||
|  | -		__u32		attach_bpf_fd;	/* eBPF program to attach */
 | ||||||
|  | +		union {
 | ||||||
|  | +			__u32	target_fd;	/* target object to attach to or ... */
 | ||||||
|  | +			__u32	target_ifindex;	/* target ifindex */
 | ||||||
|  | +		};
 | ||||||
|  | +		__u32		attach_bpf_fd;
 | ||||||
|  |  		__u32		attach_type; | ||||||
|  |  		__u32		attach_flags; | ||||||
|  | -		__u32		replace_bpf_fd;	/* previously attached eBPF
 | ||||||
|  | -						 * program to replace if
 | ||||||
|  | -						 * BPF_F_REPLACE is used
 | ||||||
|  | -						 */
 | ||||||
|  | +		__u32		replace_bpf_fd;
 | ||||||
|  | +		union {
 | ||||||
|  | +			__u32	relative_fd;
 | ||||||
|  | +			__u32	relative_id;
 | ||||||
|  | +		};
 | ||||||
|  | +		__u64		expected_revision;
 | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ | ||||||
|  | @@ -1475,16 +1600,26 @@ union bpf_attr {
 | ||||||
|  |  	} info; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_PROG_QUERY command */ | ||||||
|  | -		__u32		target_fd;	/* container object to query */
 | ||||||
|  | +		union {
 | ||||||
|  | +			__u32	target_fd;	/* target object to query or ... */
 | ||||||
|  | +			__u32	target_ifindex;	/* target ifindex */
 | ||||||
|  | +		};
 | ||||||
|  |  		__u32		attach_type; | ||||||
|  |  		__u32		query_flags; | ||||||
|  |  		__u32		attach_flags; | ||||||
|  |  		__aligned_u64	prog_ids; | ||||||
|  | -		__u32		prog_cnt;
 | ||||||
|  | +		union {
 | ||||||
|  | +			__u32	prog_cnt;
 | ||||||
|  | +			__u32	count;
 | ||||||
|  | +		};
 | ||||||
|  | +		__u32		:32;
 | ||||||
|  |  		/* output: per-program attach_flags. | ||||||
|  |  		 * not allowed to be set during effective query. | ||||||
|  |  		 */ | ||||||
|  |  		__aligned_u64	prog_attach_flags; | ||||||
|  | +		__aligned_u64	link_ids;
 | ||||||
|  | +		__aligned_u64	link_attach_flags;
 | ||||||
|  | +		__u64		revision;
 | ||||||
|  |  	} query; | ||||||
|  |   | ||||||
|  |  	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ | ||||||
|  | @@ -1503,6 +1638,11 @@ union bpf_attr {
 | ||||||
|  |  		 * truncated), or smaller (if log buffer wasn't filled completely). | ||||||
|  |  		 */ | ||||||
|  |  		__u32		btf_log_true_size; | ||||||
|  | +		__u32		btf_flags;
 | ||||||
|  | +		/* BPF token FD to use with BPF_BTF_LOAD operation.
 | ||||||
|  | +		 * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
 | ||||||
|  | +		 */
 | ||||||
|  | +		__s32		btf_token_fd;
 | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  |  	struct { | ||||||
|  | @@ -1527,13 +1667,13 @@ union bpf_attr {
 | ||||||
|  |  			__u32		map_fd;		/* struct_ops to attach */ | ||||||
|  |  		}; | ||||||
|  |  		union { | ||||||
|  | -			__u32		target_fd;	/* object to attach to */
 | ||||||
|  | -			__u32		target_ifindex; /* target ifindex */
 | ||||||
|  | +			__u32	target_fd;	/* target object to attach to or ... */
 | ||||||
|  | +			__u32	target_ifindex; /* target ifindex */
 | ||||||
|  |  		}; | ||||||
|  |  		__u32		attach_type;	/* attach type */ | ||||||
|  |  		__u32		flags;		/* extra flags */ | ||||||
|  |  		union { | ||||||
|  | -			__u32		target_btf_id;	/* btf_id of target to attach to */
 | ||||||
|  | +			__u32	target_btf_id;	/* btf_id of target to attach to */
 | ||||||
|  |  			struct { | ||||||
|  |  				__aligned_u64	iter_info;	/* extra bpf_iter_link_info */ | ||||||
|  |  				__u32		iter_info_len;	/* iter_info length */ | ||||||
|  | @@ -1561,6 +1701,35 @@ union bpf_attr {
 | ||||||
|  |  				 */ | ||||||
|  |  				__u64		cookie; | ||||||
|  |  			} tracing; | ||||||
|  | +			struct {
 | ||||||
|  | +				__u32		pf;
 | ||||||
|  | +				__u32		hooknum;
 | ||||||
|  | +				__s32		priority;
 | ||||||
|  | +				__u32		flags;
 | ||||||
|  | +			} netfilter;
 | ||||||
|  | +			struct {
 | ||||||
|  | +				union {
 | ||||||
|  | +					__u32	relative_fd;
 | ||||||
|  | +					__u32	relative_id;
 | ||||||
|  | +				};
 | ||||||
|  | +				__u64		expected_revision;
 | ||||||
|  | +			} tcx;
 | ||||||
|  | +			struct {
 | ||||||
|  | +				__aligned_u64	path;
 | ||||||
|  | +				__aligned_u64	offsets;
 | ||||||
|  | +				__aligned_u64	ref_ctr_offsets;
 | ||||||
|  | +				__aligned_u64	cookies;
 | ||||||
|  | +				__u32		cnt;
 | ||||||
|  | +				__u32		flags;
 | ||||||
|  | +				__u32		pid;
 | ||||||
|  | +			} uprobe_multi;
 | ||||||
|  | +			struct {
 | ||||||
|  | +				union {
 | ||||||
|  | +					__u32	relative_fd;
 | ||||||
|  | +					__u32	relative_id;
 | ||||||
|  | +				};
 | ||||||
|  | +				__u64		expected_revision;
 | ||||||
|  | +			} netkit;
 | ||||||
|  |  		}; | ||||||
|  |  	} link_create; | ||||||
|  |   | ||||||
|  | @@ -1604,6 +1773,11 @@ union bpf_attr {
 | ||||||
|  |  		__u32		flags;		/* extra flags */ | ||||||
|  |  	} prog_bind_map; | ||||||
|  |   | ||||||
|  | +	struct { /* struct used by BPF_TOKEN_CREATE command */
 | ||||||
|  | +		__u32		flags;
 | ||||||
|  | +		__u32		bpffs_fd;
 | ||||||
|  | +	} token_create;
 | ||||||
|  | +
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  /* The description below is an attempt at providing documentation to eBPF | ||||||
|  | @@ -1879,7 +2053,9 @@ union bpf_attr {
 | ||||||
|  |   * 		performed again, if the helper is used in combination with | ||||||
|  |   * 		direct packet access. | ||||||
|  |   * 	Return | ||||||
|  | - * 		0 on success, or a negative error in case of failure.
 | ||||||
|  | + * 		0 on success, or a negative error in case of failure. Positive
 | ||||||
|  | + * 		error indicates a potential drop or congestion in the target
 | ||||||
|  | + * 		device. The particular positive error codes are not defined.
 | ||||||
|  |   * | ||||||
|  |   * u64 bpf_get_current_pid_tgid(void) | ||||||
|  |   * 	Description | ||||||
|  | @@ -2612,8 +2788,8 @@ union bpf_attr {
 | ||||||
|  |   * 		*bpf_socket* should be one of the following: | ||||||
|  |   * | ||||||
|  |   * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. | ||||||
|  | - * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
 | ||||||
|  | - * 		  and **BPF_CGROUP_INET6_CONNECT**.
 | ||||||
|  | + *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
 | ||||||
|  | + *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
 | ||||||
|  |   * | ||||||
|  |   * 		This helper actually implements a subset of **setsockopt()**. | ||||||
|  |   * 		It supports the following *level*\ s: | ||||||
|  | @@ -2851,8 +3027,8 @@ union bpf_attr {
 | ||||||
|  |   * 		*bpf_socket* should be one of the following: | ||||||
|  |   * | ||||||
|  |   * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. | ||||||
|  | - * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
 | ||||||
|  | - * 		  and **BPF_CGROUP_INET6_CONNECT**.
 | ||||||
|  | + *		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
 | ||||||
|  | + *		  **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
 | ||||||
|  |   * | ||||||
|  |   * 		This helper actually implements a subset of **getsockopt()**. | ||||||
|  |   * 		It supports the same set of *optname*\ s that is supported by | ||||||
|  | @@ -3160,6 +3336,10 @@ union bpf_attr {
 | ||||||
|  |   *		**BPF_FIB_LOOKUP_DIRECT** | ||||||
|  |   *			Do a direct table lookup vs full lookup using FIB | ||||||
|  |   *			rules. | ||||||
|  | + *		**BPF_FIB_LOOKUP_TBID**
 | ||||||
|  | + *			Used with BPF_FIB_LOOKUP_DIRECT.
 | ||||||
|  | + *			Use the routing table ID present in *params*->tbid
 | ||||||
|  | + *			for the fib lookup.
 | ||||||
|  |   *		**BPF_FIB_LOOKUP_OUTPUT** | ||||||
|  |   *			Perform lookup from an egress perspective (default is | ||||||
|  |   *			ingress). | ||||||
|  | @@ -3168,6 +3348,11 @@ union bpf_attr {
 | ||||||
|  |   *			and *params*->smac will not be set as output. A common | ||||||
|  |   *			use case is to call **bpf_redirect_neigh**\ () after | ||||||
|  |   *			doing **bpf_fib_lookup**\ (). | ||||||
|  | + *		**BPF_FIB_LOOKUP_SRC**
 | ||||||
|  | + *			Derive and set source IP addr in *params*->ipv{4,6}_src
 | ||||||
|  | + *			for the nexthop. If the src addr cannot be derived,
 | ||||||
|  | + *			**BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
 | ||||||
|  | + *			case, *params*->dmac and *params*->smac are not set either.
 | ||||||
|  |   * | ||||||
|  |   *		*ctx* is either **struct xdp_md** for XDP programs or | ||||||
|  |   *		**struct sk_buff** tc cls_act programs. | ||||||
|  | @@ -4137,9 +4322,6 @@ union bpf_attr {
 | ||||||
|  |   *		**-EOPNOTSUPP** if the operation is not supported, for example | ||||||
|  |   *		a call from outside of TC ingress. | ||||||
|  |   * | ||||||
|  | - *		**-ESOCKTNOSUPPORT** if the socket type is not supported
 | ||||||
|  | - *		(reuseport).
 | ||||||
|  | - *
 | ||||||
|  |   * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) | ||||||
|  |   *	Description | ||||||
|  |   *		Helper is overloaded depending on BPF program type. This | ||||||
|  | @@ -4404,6 +4586,8 @@ union bpf_attr {
 | ||||||
|  |   * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) | ||||||
|  |   *	Description | ||||||
|  |   *		Return a user or a kernel stack in bpf program provided buffer. | ||||||
|  | + *		Note: the user stack will only be populated if the *task* is
 | ||||||
|  | + *		the current task; all other tasks will return -EOPNOTSUPP.
 | ||||||
|  |   *		To achieve this, the helper needs *task*, which is a valid | ||||||
|  |   *		pointer to **struct task_struct**. To store the stacktrace, the | ||||||
|  |   *		bpf program provides *buf* with a nonnegative *size*. | ||||||
|  | @@ -4415,6 +4599,7 @@ union bpf_attr {
 | ||||||
|  |   * | ||||||
|  |   *		**BPF_F_USER_STACK** | ||||||
|  |   *			Collect a user space stack instead of a kernel stack. | ||||||
|  | + *			The *task* must be the current task.
 | ||||||
|  |   *		**BPF_F_USER_BUILD_ID** | ||||||
|  |   *			Collect buildid+offset instead of ips for user stack, | ||||||
|  |   *			only valid if **BPF_F_USER_STACK** is also specified. | ||||||
|  | @@ -4718,9 +4903,9 @@ union bpf_attr {
 | ||||||
|  |   * 		going through the CPU's backlog queue. | ||||||
|  |   * | ||||||
|  |   * 		The *flags* argument is reserved and must be 0. The helper is | ||||||
|  | - * 		currently only supported for tc BPF program types at the ingress
 | ||||||
|  | - * 		hook and for veth device types. The peer device must reside in a
 | ||||||
|  | - * 		different network namespace.
 | ||||||
|  | + * 		currently only supported for tc BPF program types at the
 | ||||||
|  | + * 		ingress hook and for veth and netkit target device types. The
 | ||||||
|  | + * 		peer device must reside in a different network namespace.
 | ||||||
|  |   * 	Return | ||||||
|  |   * 		The helper returns **TC_ACT_REDIRECT** on success or | ||||||
|  |   * 		**TC_ACT_SHOT** on error. | ||||||
|  | @@ -5003,6 +5188,8 @@ union bpf_attr {
 | ||||||
|  |   *		**BPF_F_TIMER_ABS** | ||||||
|  |   *			Start the timer in absolute expire value instead of the | ||||||
|  |   *			default relative one. | ||||||
|  | + *		**BPF_F_TIMER_CPU_PIN**
 | ||||||
|  | + *			Timer will be pinned to the CPU of the caller.
 | ||||||
|  |   * | ||||||
|  |   *	Return | ||||||
|  |   *		0 on success. | ||||||
|  | @@ -5022,9 +5209,14 @@ union bpf_attr {
 | ||||||
|  |   * u64 bpf_get_func_ip(void *ctx) | ||||||
|  |   * 	Description | ||||||
|  |   * 		Get address of the traced function (for tracing and kprobe programs). | ||||||
|  | + *
 | ||||||
|  | + * 		When called for kprobe program attached as uprobe it returns
 | ||||||
|  | + * 		probe address for both entry and return uprobe.
 | ||||||
|  | + *
 | ||||||
|  |   * 	Return | ||||||
|  | - * 		Address of the traced function.
 | ||||||
|  | + * 		Address of the traced function for kprobe.
 | ||||||
|  |   * 		0 for kprobes placed within the function (not at the entry). | ||||||
|  | + * 		Address of the probe for uprobe and return uprobe.
 | ||||||
|  |   * | ||||||
|  |   * u64 bpf_get_attach_cookie(void *ctx) | ||||||
|  |   * 	Description | ||||||
|  | @@ -6165,6 +6357,19 @@ struct bpf_sock_tuple {
 | ||||||
|  |  	}; | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | +/* (Simplified) user return codes for tcx prog type.
 | ||||||
|  | + * A valid tcx program must return one of these defined values. All other
 | ||||||
|  | + * return codes are reserved for future use. Must remain compatible with
 | ||||||
|  | + * their TC_ACT_* counter-parts. For compatibility in behavior, unknown
 | ||||||
|  | + * return codes are mapped to TCX_NEXT.
 | ||||||
|  | + */
 | ||||||
|  | +enum tcx_action_base {
 | ||||||
|  | +	TCX_NEXT	= -1,
 | ||||||
|  | +	TCX_PASS	= 0,
 | ||||||
|  | +	TCX_DROP	= 2,
 | ||||||
|  | +	TCX_REDIRECT	= 7,
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  |  struct bpf_xdp_sock { | ||||||
|  |  	__u32 queue_id; | ||||||
|  |  }; | ||||||
|  | @@ -6346,7 +6551,7 @@ struct bpf_map_info {
 | ||||||
|  |  	__u32 btf_id; | ||||||
|  |  	__u32 btf_key_type_id; | ||||||
|  |  	__u32 btf_value_type_id; | ||||||
|  | -	__u32 :32;	/* alignment pad */
 | ||||||
|  | +	__u32 btf_vmlinux_id;
 | ||||||
|  |  	__u64 map_extra; | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  | @@ -6411,6 +6616,69 @@ struct bpf_link_info {
 | ||||||
|  |  		struct { | ||||||
|  |  			__u32 map_id; | ||||||
|  |  		} struct_ops; | ||||||
|  | +		struct {
 | ||||||
|  | +			__u32 pf;
 | ||||||
|  | +			__u32 hooknum;
 | ||||||
|  | +			__s32 priority;
 | ||||||
|  | +			__u32 flags;
 | ||||||
|  | +		} netfilter;
 | ||||||
|  | +		struct {
 | ||||||
|  | +			__aligned_u64 addrs;
 | ||||||
|  | +			__u32 count; /* in/out: kprobe_multi function count */
 | ||||||
|  | +			__u32 flags;
 | ||||||
|  | +			__u64 missed;
 | ||||||
|  | +			__aligned_u64 cookies;
 | ||||||
|  | +		} kprobe_multi;
 | ||||||
|  | +		struct {
 | ||||||
|  | +			__aligned_u64 path;
 | ||||||
|  | +			__aligned_u64 offsets;
 | ||||||
|  | +			__aligned_u64 ref_ctr_offsets;
 | ||||||
|  | +			__aligned_u64 cookies;
 | ||||||
|  | +			__u32 path_size; /* in/out: real path size on success, including zero byte */
 | ||||||
|  | +			__u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */
 | ||||||
|  | +			__u32 flags;
 | ||||||
|  | +			__u32 pid;
 | ||||||
|  | +		} uprobe_multi;
 | ||||||
|  | +		struct {
 | ||||||
|  | +			__u32 type; /* enum bpf_perf_event_type */
 | ||||||
|  | +			__u32 :32;
 | ||||||
|  | +			union {
 | ||||||
|  | +				struct {
 | ||||||
|  | +					__aligned_u64 file_name; /* in/out */
 | ||||||
|  | +					__u32 name_len;
 | ||||||
|  | +					__u32 offset; /* offset from file_name */
 | ||||||
|  | +					__u64 cookie;
 | ||||||
|  | +				} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
 | ||||||
|  | +				struct {
 | ||||||
|  | +					__aligned_u64 func_name; /* in/out */
 | ||||||
|  | +					__u32 name_len;
 | ||||||
|  | +					__u32 offset; /* offset from func_name */
 | ||||||
|  | +					__u64 addr;
 | ||||||
|  | +					__u64 missed;
 | ||||||
|  | +					__u64 cookie;
 | ||||||
|  | +				} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
 | ||||||
|  | +				struct {
 | ||||||
|  | +					__aligned_u64 tp_name;   /* in/out */
 | ||||||
|  | +					__u32 name_len;
 | ||||||
|  | +					__u32 :32;
 | ||||||
|  | +					__u64 cookie;
 | ||||||
|  | +				} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
 | ||||||
|  | +				struct {
 | ||||||
|  | +					__u64 config;
 | ||||||
|  | +					__u32 type;
 | ||||||
|  | +					__u32 :32;
 | ||||||
|  | +					__u64 cookie;
 | ||||||
|  | +				} event; /* BPF_PERF_EVENT_EVENT */
 | ||||||
|  | +			};
 | ||||||
|  | +		} perf_event;
 | ||||||
|  | +		struct {
 | ||||||
|  | +			__u32 ifindex;
 | ||||||
|  | +			__u32 attach_type;
 | ||||||
|  | +		} tcx;
 | ||||||
|  | +		struct {
 | ||||||
|  | +			__u32 ifindex;
 | ||||||
|  | +			__u32 attach_type;
 | ||||||
|  | +		} netkit;
 | ||||||
|  |  	}; | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  | @@ -6707,6 +6975,7 @@ enum {
 | ||||||
|  |  	BPF_TCP_LISTEN, | ||||||
|  |  	BPF_TCP_CLOSING,	/* Now a valid state */ | ||||||
|  |  	BPF_TCP_NEW_SYN_RECV, | ||||||
|  | +	BPF_TCP_BOUND_INACTIVE,
 | ||||||
|  |   | ||||||
|  |  	BPF_TCP_MAX_STATES	/* Leave at the end! */ | ||||||
|  |  }; | ||||||
|  | @@ -6808,6 +7077,8 @@ enum {
 | ||||||
|  |  	BPF_FIB_LOOKUP_DIRECT  = (1U << 0), | ||||||
|  |  	BPF_FIB_LOOKUP_OUTPUT  = (1U << 1), | ||||||
|  |  	BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), | ||||||
|  | +	BPF_FIB_LOOKUP_TBID    = (1U << 3),
 | ||||||
|  | +	BPF_FIB_LOOKUP_SRC     = (1U << 4),
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  enum { | ||||||
|  | @@ -6820,6 +7091,7 @@ enum {
 | ||||||
|  |  	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */ | ||||||
|  |  	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */ | ||||||
|  |  	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */ | ||||||
|  | +	BPF_FIB_LKUP_RET_NO_SRC_ADDR,  /* failed to derive IP src addr */
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  struct bpf_fib_lookup { | ||||||
|  | @@ -6854,6 +7126,9 @@ struct bpf_fib_lookup {
 | ||||||
|  |  		__u32	rt_metric; | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  | +	/* input: source address to consider for lookup
 | ||||||
|  | +	 * output: source address result from lookup
 | ||||||
|  | +	 */
 | ||||||
|  |  	union { | ||||||
|  |  		__be32		ipv4_src; | ||||||
|  |  		__u32		ipv6_src[4];  /* in6_addr; network order */ | ||||||
|  | @@ -6868,9 +7143,19 @@ struct bpf_fib_lookup {
 | ||||||
|  |  		__u32		ipv6_dst[4];  /* in6_addr; network order */ | ||||||
|  |  	}; | ||||||
|  |   | ||||||
|  | -	/* output */
 | ||||||
|  | -	__be16	h_vlan_proto;
 | ||||||
|  | -	__be16	h_vlan_TCI;
 | ||||||
|  | +	union {
 | ||||||
|  | +		struct {
 | ||||||
|  | +			/* output */
 | ||||||
|  | +			__be16	h_vlan_proto;
 | ||||||
|  | +			__be16	h_vlan_TCI;
 | ||||||
|  | +		};
 | ||||||
|  | +		/* input: when accompanied with the
 | ||||||
|  | +		 * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
 | ||||||
|  | +		 * specific routing table to use for the fib lookup.
 | ||||||
|  | +		 */
 | ||||||
|  | +		__u32	tbid;
 | ||||||
|  | +	};
 | ||||||
|  | +
 | ||||||
|  |  	__u8	smac[6];     /* ETH_ALEN */ | ||||||
|  |  	__u8	dmac[6];     /* ETH_ALEN */ | ||||||
|  |  }; | ||||||
|  | @@ -6956,38 +7241,31 @@ struct bpf_spin_lock {
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  struct bpf_timer { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[2];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_dynptr { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[2];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_list_head { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[2];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_list_node { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[3];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_rb_root { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[2];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_rb_node { | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | -	__u64 :64;
 | ||||||
|  | +	__u64 __opaque[4];
 | ||||||
|  |  } __attribute__((aligned(8))); | ||||||
|  |   | ||||||
|  |  struct bpf_refcount { | ||||||
|  | -	__u32 :32;
 | ||||||
|  | +	__u32 __opaque[1];
 | ||||||
|  |  } __attribute__((aligned(4))); | ||||||
|  |   | ||||||
|  |  struct bpf_sysctl { | ||||||
|  | @@ -7143,9 +7421,11 @@ struct bpf_core_relo {
 | ||||||
|  |   * Flags to control bpf_timer_start() behaviour. | ||||||
|  |   *     - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is | ||||||
|  |   *       relative to current time. | ||||||
|  | + *     - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
 | ||||||
|  |   */ | ||||||
|  |  enum { | ||||||
|  |  	BPF_F_TIMER_ABS = (1ULL << 0), | ||||||
|  | +	BPF_F_TIMER_CPU_PIN = (1ULL << 1),
 | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  /* BPF numbers iterator state */ | ||||||
|  | -- 
 | ||||||
|  | 2.43.0 | ||||||
|  | 
 | ||||||
| @ -1,31 +0,0 @@ | |||||||
| From 0973fd70c1c50e57a3db0b09e239b1d1fd3f1c55 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: Jerome Marchand <jmarchan@redhat.com> |  | ||||||
| Date: Fri, 21 Jul 2023 16:10:18 +0200 |  | ||||||
| Subject: [PATCH] Use bpf_obj_get_info_by_fd() instead of |  | ||||||
|  bpf_btf_get_info_by_fd() |  | ||||||
| 
 |  | ||||||
| The libbpf version in rawhide doesn't have the typed |  | ||||||
| bpf_*_get_info_by_fd(). |  | ||||||
| ---
 |  | ||||||
|  src/cc/libbpf.c | 4 ++-- |  | ||||||
|  1 file changed, 2 insertions(+), 2 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
 |  | ||||||
| index 360fd81d..a3e34da2 100644
 |  | ||||||
| --- a/src/cc/libbpf.c
 |  | ||||||
| +++ b/src/cc/libbpf.c
 |  | ||||||
| @@ -727,9 +727,9 @@ static int find_btf_id(const char *module_name, const char *func_name,
 |  | ||||||
|      info.name = ptr_to_u64(name); |  | ||||||
|      info.name_len = sizeof(name); |  | ||||||
|   |  | ||||||
| -    err = bpf_btf_get_info_by_fd(fd, &info, &len);
 |  | ||||||
| +    err = bpf_obj_get_info_by_fd(fd, &info, &len);
 |  | ||||||
|      if (err) { |  | ||||||
| -      fprintf(stderr, "bpf_btf_get_info_by_fd failed: %d\n", err);
 |  | ||||||
| +      fprintf(stderr, "bpf_obj_get_info_by_fd failed: %d\n", err);
 |  | ||||||
|        goto err_out; |  | ||||||
|      } |  | ||||||
|   |  | ||||||
| -- 
 |  | ||||||
| 2.41.0 |  | ||||||
| 
 |  | ||||||
							
								
								
									
										18
									
								
								bcc.spec
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								bcc.spec
									
									
									
									
									
								
							| @ -10,7 +10,7 @@ | |||||||
| %endif | %endif | ||||||
| %endif | %endif | ||||||
| 
 | 
 | ||||||
| %ifarch x86_64 ppc64 ppc64le aarch64 | %ifarch x86_64 ppc64 ppc64le aarch64 s390x | ||||||
| %bcond_without libbpf_tools | %bcond_without libbpf_tools | ||||||
| %else | %else | ||||||
| %bcond_with libbpf_tools | %bcond_with libbpf_tools | ||||||
| @ -24,16 +24,15 @@ | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| Name:           bcc | Name:           bcc | ||||||
| Version:        0.28.0 | Version:        0.29.1 | ||||||
| Release:        3%{?dist} | Release:        1%{?dist} | ||||||
| Summary:        BPF Compiler Collection (BCC) | Summary:        BPF Compiler Collection (BCC) | ||||||
| License:        Apache-2.0 | License:        Apache-2.0 | ||||||
| URL:            https://github.com/iovisor/bcc | URL:            https://github.com/iovisor/bcc | ||||||
| Source0:        %{url}/archive/v%{version}/%{name}-%{version}.tar.gz | Source0:        %{url}/archive/v%{version}/%{name}-%{version}.tar.gz | ||||||
| Patch0:         Use-bpf_obj_get_info_by_fd-instead-of-bpf_btf_get_in.patch | Patch0:         libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch | ||||||
| Patch1:         libbpf-tools-add-block_io_-start-done-tracepoints-su.patch | Patch1:         Fix-ttysnoop.py-with-newer-kernels.patch | ||||||
| Patch2:         tools-Add-support-for-the-new-block_io_-tracepoints.patch | Patch2:         Sync-with-latest-libbpf-repo-4889.patch | ||||||
| Patch3:         tool-slabratetop-add-definition-of-freelist_aba_t.patch |  | ||||||
| 
 | 
 | ||||||
| # Arches will be included as upstream support is added and dependencies are | # Arches will be included as upstream support is added and dependencies are | ||||||
| # satisfied in the respective arches | # satisfied in the respective arches | ||||||
| @ -242,6 +241,11 @@ cp -a libbpf-tools/tmp-install/bin/* %{buildroot}/%{_sbindir}/ | |||||||
| %endif | %endif | ||||||
| 
 | 
 | ||||||
| %changelog | %changelog | ||||||
|  | * Mon Feb 05 2024 Jerome Marchand <jmarchan@redhat.com> - 0.29.1-1 | ||||||
|  | - Rebase to the latest release version (#2253688) | ||||||
|  | - Enable libbpf-tools on s390x (#2249458) | ||||||
|  | - Misc 0.29.1 fixes | ||||||
|  | 
 | ||||||
| * Tue Jan 23 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.28.0-3 | * Tue Jan 23 2024 Fedora Release Engineering <releng@fedoraproject.org> - 0.28.0-3 | ||||||
| - Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild | - Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild | ||||||
| 
 | 
 | ||||||
|  | |||||||
							
								
								
									
										114
									
								
								libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								libbpf-tools-Fix-bindsnoop-for-kernel-v6.6.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,114 @@ | |||||||
|  | From abf7b251c1461dcbe0c1e75d1d0da71662c9fae1 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Hengqi Chen <hengqi.chen@gmail.com> | ||||||
|  | Date: Sun, 17 Dec 2023 11:27:10 +0000 | ||||||
|  | Subject: [PATCH] libbpf-tools: Fix bindsnoop for kernel v6.6+ | ||||||
|  | 
 | ||||||
|  | The freebind field in struct inet_sock gone in recent kernel | ||||||
|  | versions due to some kernel refactor works ([0]). The change | ||||||
|  | breaks the bindsnoop tool. Fix it in a CO-RE way. | ||||||
|  | 
 | ||||||
|  | This should close #4838. | ||||||
|  | 
 | ||||||
|  |   [0]: https://lore.kernel.org/all/20230816081547.1272409-1-edumazet@google.com/ | ||||||
|  | 
 | ||||||
|  | Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com> | ||||||
|  | ---
 | ||||||
|  |  libbpf-tools/bindsnoop.bpf.c  |  8 +++-- | ||||||
|  |  libbpf-tools/core_fixes.bpf.h | 56 +++++++++++++++++++++++++++++++++++ | ||||||
|  |  2 files changed, 61 insertions(+), 3 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/libbpf-tools/bindsnoop.bpf.c b/libbpf-tools/bindsnoop.bpf.c
 | ||||||
|  | index 41dce942..ead19c67 100644
 | ||||||
|  | --- a/libbpf-tools/bindsnoop.bpf.c
 | ||||||
|  | +++ b/libbpf-tools/bindsnoop.bpf.c
 | ||||||
|  | @@ -5,7 +5,9 @@
 | ||||||
|  |  #include <bpf/bpf_core_read.h> | ||||||
|  |  #include <bpf/bpf_tracing.h> | ||||||
|  |  #include <bpf/bpf_endian.h> | ||||||
|  | +
 | ||||||
|  |  #include "bindsnoop.h" | ||||||
|  | +#include "core_fixes.bpf.h"
 | ||||||
|  |   | ||||||
|  |  #define MAX_ENTRIES	10240 | ||||||
|  |  #define MAX_PORTS	1024 | ||||||
|  | @@ -85,9 +87,9 @@ static int probe_exit(struct pt_regs *ctx, short ver)
 | ||||||
|  |  	if (filter_by_port && !port) | ||||||
|  |  		goto cleanup; | ||||||
|  |   | ||||||
|  | -	opts.fields.freebind             = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, freebind);
 | ||||||
|  | -	opts.fields.transparent          = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, transparent);
 | ||||||
|  | -	opts.fields.bind_address_no_port = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, bind_address_no_port);
 | ||||||
|  | +	opts.fields.freebind             = get_inet_sock_freebind(inet_sock);
 | ||||||
|  | +	opts.fields.transparent          = get_inet_sock_transparent(inet_sock);
 | ||||||
|  | +	opts.fields.bind_address_no_port = get_inet_sock_bind_address_no_port(inet_sock);
 | ||||||
|  |  	opts.fields.reuseaddress         = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuse); | ||||||
|  |  	opts.fields.reuseport            = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuseport); | ||||||
|  |  	event.opts = opts.data; | ||||||
|  | diff --git a/libbpf-tools/core_fixes.bpf.h b/libbpf-tools/core_fixes.bpf.h
 | ||||||
|  | index 84cb7f18..a4c84c02 100644
 | ||||||
|  | --- a/libbpf-tools/core_fixes.bpf.h
 | ||||||
|  | +++ b/libbpf-tools/core_fixes.bpf.h
 | ||||||
|  | @@ -249,4 +249,60 @@ static __always_inline __u64 get_sock_ident(struct sock *sk)
 | ||||||
|  |  	return (__u64)sk; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | +/**
 | ||||||
|  | + * During kernel 6.6 development cycle, several bitfields in struct inet_sock gone,
 | ||||||
|  | + * they are placed in inet_sock::inet_flags instead ([0]).
 | ||||||
|  | + *
 | ||||||
|  | + * References:
 | ||||||
|  | + *   [0]: https://lore.kernel.org/all/20230816081547.1272409-1-edumazet@google.com/
 | ||||||
|  | + */
 | ||||||
|  | +struct inet_sock___o {
 | ||||||
|  | +	__u8 freebind: 1;
 | ||||||
|  | +	__u8 transparent: 1;
 | ||||||
|  | +	__u8 bind_address_no_port: 1;
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +enum {
 | ||||||
|  | +	INET_FLAGS_FREEBIND___x = 11,
 | ||||||
|  | +	INET_FLAGS_TRANSPARENT___x = 15,
 | ||||||
|  | +	INET_FLAGS_BIND_ADDRESS_NO_PORT___x = 18,
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +struct inet_sock___x {
 | ||||||
|  | +	unsigned long inet_flags;
 | ||||||
|  | +};
 | ||||||
|  | +
 | ||||||
|  | +static __always_inline __u8 get_inet_sock_freebind(void *inet_sock)
 | ||||||
|  | +{
 | ||||||
|  | +	unsigned long inet_flags;
 | ||||||
|  | +
 | ||||||
|  | +	if (bpf_core_field_exists(struct inet_sock___o, freebind))
 | ||||||
|  | +		return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, freebind);
 | ||||||
|  | +
 | ||||||
|  | +	inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
 | ||||||
|  | +	return (1 << INET_FLAGS_FREEBIND___x) & inet_flags ? 1 : 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static __always_inline __u8 get_inet_sock_transparent(void *inet_sock)
 | ||||||
|  | +{
 | ||||||
|  | +	unsigned long inet_flags;
 | ||||||
|  | +
 | ||||||
|  | +	if (bpf_core_field_exists(struct inet_sock___o, transparent))
 | ||||||
|  | +		return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, transparent);
 | ||||||
|  | +
 | ||||||
|  | +	inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
 | ||||||
|  | +	return (1 << INET_FLAGS_TRANSPARENT___x) & inet_flags ? 1 : 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  | +static __always_inline __u8 get_inet_sock_bind_address_no_port(void *inet_sock)
 | ||||||
|  | +{
 | ||||||
|  | +	unsigned long inet_flags;
 | ||||||
|  | +
 | ||||||
|  | +	if (bpf_core_field_exists(struct inet_sock___o, bind_address_no_port))
 | ||||||
|  | +		return BPF_CORE_READ_BITFIELD_PROBED((struct inet_sock___o *)inet_sock, bind_address_no_port);
 | ||||||
|  | +
 | ||||||
|  | +	inet_flags = BPF_CORE_READ((struct inet_sock___x *)inet_sock, inet_flags);
 | ||||||
|  | +	return (1 << INET_FLAGS_BIND_ADDRESS_NO_PORT___x) & inet_flags ? 1 : 0;
 | ||||||
|  | +}
 | ||||||
|  | +
 | ||||||
|  |  #endif /* __CORE_FIXES_BPF_H */ | ||||||
|  | -- 
 | ||||||
|  | 2.43.0 | ||||||
|  | 
 | ||||||
| @ -1,476 +0,0 @@ | |||||||
| From e1dfbe2d09583205acca1d1b5b09caefb460f2fd Mon Sep 17 00:00:00 2001 |  | ||||||
| From: mickey_zhu <mickey_zhu@realsil.com.cn> |  | ||||||
| Date: Tue, 27 Jun 2023 16:32:44 +0800 |  | ||||||
| Subject: [PATCH 1/2] libbpf-tools: add block_io_{start,done} tracepoints |  | ||||||
|  support to bio tools |  | ||||||
| 
 |  | ||||||
| Some bio tools fail to kprobe blk_account_io_{start,done} after v5.17, |  | ||||||
| because they become inlined, see [0]. To fix this issue, tracepoints |  | ||||||
| blick_io_{start,done} are introcuded in kernel, see[1]. |  | ||||||
| 
 |  | ||||||
| Update related bio tools to support new tracepoints, and also simplify |  | ||||||
| attach. |  | ||||||
| 
 |  | ||||||
| [0] Kernel commit 450b7879e345 (block: move blk_account_io_{start,done} to blk-mq.c) |  | ||||||
| [1] Kernel commit 5a80bd075f3b (block: introduce block_io_start/block_io_done tracepoints) |  | ||||||
| 
 |  | ||||||
| Change-Id: I62b957abd7ce2901eb114bd57c78938e4f083e4d |  | ||||||
| Signed-off-by: Mickey Zhu <mickey_zhu@realsil.com.cn> |  | ||||||
| ---
 |  | ||||||
|  libbpf-tools/biosnoop.bpf.c  |  9 ++++ |  | ||||||
|  libbpf-tools/biosnoop.c      | 78 +++++++++++++-------------------- |  | ||||||
|  libbpf-tools/biostacks.bpf.c | 46 +++++++++++++------ |  | ||||||
|  libbpf-tools/biostacks.c     | 85 +++++++++++++++++++++--------------- |  | ||||||
|  libbpf-tools/biotop.bpf.c    | 44 +++++++++++++++++-- |  | ||||||
|  libbpf-tools/biotop.c        | 59 ++++++++++++++++--------- |  | ||||||
|  6 files changed, 199 insertions(+), 122 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/libbpf-tools/biosnoop.bpf.c b/libbpf-tools/biosnoop.bpf.c
 |  | ||||||
| index b791555f..fcc5c5ce 100644
 |  | ||||||
| --- a/libbpf-tools/biosnoop.bpf.c
 |  | ||||||
| +++ b/libbpf-tools/biosnoop.bpf.c
 |  | ||||||
| @@ -76,6 +76,15 @@ int BPF_PROG(blk_account_io_start, struct request *rq)
 |  | ||||||
|  	return trace_pid(rq); |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +SEC("tp_btf/block_io_start")
 |  | ||||||
| +int BPF_PROG(block_io_start, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
 |  | ||||||
| +		return 0;
 |  | ||||||
| +
 |  | ||||||
| +	return trace_pid(rq);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  SEC("kprobe/blk_account_io_merge_bio") |  | ||||||
|  int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq) |  | ||||||
|  { |  | ||||||
| diff --git a/libbpf-tools/biosnoop.c b/libbpf-tools/biosnoop.c
 |  | ||||||
| index 21773729..f9468900 100644
 |  | ||||||
| --- a/libbpf-tools/biosnoop.c
 |  | ||||||
| +++ b/libbpf-tools/biosnoop.c
 |  | ||||||
| @@ -212,6 +212,16 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
 |  | ||||||
|  	fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu); |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +static void blk_account_io_set_attach_target(struct biosnoop_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	if (fentry_can_attach("blk_account_io_start", NULL))
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
 |  | ||||||
| +					       0, "blk_account_io_start");
 |  | ||||||
| +	else
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
 |  | ||||||
| +					       0, "__blk_account_io_start");
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  int main(int argc, char **argv) |  | ||||||
|  { |  | ||||||
|  	const struct partition *partition; |  | ||||||
| @@ -260,12 +270,23 @@ int main(int argc, char **argv)
 |  | ||||||
|  	obj->rodata->filter_cg = env.cg; |  | ||||||
|  	obj->rodata->min_ns = env.min_lat_ms * 1000000; |  | ||||||
|   |  | ||||||
| -	if (fentry_can_attach("blk_account_io_start", NULL))
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
 |  | ||||||
| -					       "blk_account_io_start");
 |  | ||||||
| -	else
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
 |  | ||||||
| -					       "__blk_account_io_start");
 |  | ||||||
| +	if (tracepoint_exists("block", "block_io_start"))
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
 |  | ||||||
| +	else {
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.block_io_start, false);
 |  | ||||||
| +		blk_account_io_set_attach_target(obj);
 |  | ||||||
| +	}
 |  | ||||||
| +
 |  | ||||||
| +	ksyms = ksyms__load();
 |  | ||||||
| +	if (!ksyms) {
 |  | ||||||
| +		fprintf(stderr, "failed to load kallsyms\n");
 |  | ||||||
| +		goto cleanup;
 |  | ||||||
| +	}
 |  | ||||||
| +	if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
 |  | ||||||
| +
 |  | ||||||
| +	if (!env.queued)
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.block_rq_insert, false);
 |  | ||||||
|   |  | ||||||
|  	err = biosnoop_bpf__load(obj); |  | ||||||
|  	if (err) { |  | ||||||
| @@ -288,48 +309,9 @@ int main(int argc, char **argv)
 |  | ||||||
|  		} |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| -	obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
 |  | ||||||
| -	if (!obj->links.blk_account_io_start) {
 |  | ||||||
| -		err = -errno;
 |  | ||||||
| -		fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
 |  | ||||||
| -			strerror(-err));
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
| -	ksyms = ksyms__load();
 |  | ||||||
| -	if (!ksyms) {
 |  | ||||||
| -		err = -ENOMEM;
 |  | ||||||
| -		fprintf(stderr, "failed to load kallsyms\n");
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
| -	if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
 |  | ||||||
| -		obj->links.blk_account_io_merge_bio =
 |  | ||||||
| -			bpf_program__attach(obj->progs.blk_account_io_merge_bio);
 |  | ||||||
| -		if (!obj->links.blk_account_io_merge_bio) {
 |  | ||||||
| -			err = -errno;
 |  | ||||||
| -			fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
 |  | ||||||
| -				strerror(-err));
 |  | ||||||
| -			goto cleanup;
 |  | ||||||
| -		}
 |  | ||||||
| -	}
 |  | ||||||
| -	if (env.queued) {
 |  | ||||||
| -		obj->links.block_rq_insert =
 |  | ||||||
| -			bpf_program__attach(obj->progs.block_rq_insert);
 |  | ||||||
| -		if (!obj->links.block_rq_insert) {
 |  | ||||||
| -			err = -errno;
 |  | ||||||
| -			fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
 |  | ||||||
| -			goto cleanup;
 |  | ||||||
| -		}
 |  | ||||||
| -	}
 |  | ||||||
| -	obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
 |  | ||||||
| -	if (!obj->links.block_rq_issue) {
 |  | ||||||
| -		err = -errno;
 |  | ||||||
| -		fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
| -	obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
 |  | ||||||
| -	if (!obj->links.block_rq_complete) {
 |  | ||||||
| -		err = -errno;
 |  | ||||||
| -		fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
 |  | ||||||
| +	err = biosnoop_bpf__attach(obj);
 |  | ||||||
| +	if (err) {
 |  | ||||||
| +		fprintf(stderr, "failed to attach BPF programs: %d\n", err);
 |  | ||||||
|  		goto cleanup; |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| diff --git a/libbpf-tools/biostacks.bpf.c b/libbpf-tools/biostacks.bpf.c
 |  | ||||||
| index c3950910..0ca69880 100644
 |  | ||||||
| --- a/libbpf-tools/biostacks.bpf.c
 |  | ||||||
| +++ b/libbpf-tools/biostacks.bpf.c
 |  | ||||||
| @@ -67,20 +67,8 @@ int trace_start(void *ctx, struct request *rq, bool merge_bio)
 |  | ||||||
|  	return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| -SEC("fentry/blk_account_io_start")
 |  | ||||||
| -int BPF_PROG(blk_account_io_start, struct request *rq)
 |  | ||||||
| -{
 |  | ||||||
| -	return trace_start(ctx, rq, false);
 |  | ||||||
| -}
 |  | ||||||
| -
 |  | ||||||
| -SEC("kprobe/blk_account_io_merge_bio")
 |  | ||||||
| -int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
 |  | ||||||
| -{
 |  | ||||||
| -	return trace_start(ctx, rq, true);
 |  | ||||||
| -}
 |  | ||||||
| -
 |  | ||||||
| -SEC("fentry/blk_account_io_done")
 |  | ||||||
| -int BPF_PROG(blk_account_io_done, struct request *rq)
 |  | ||||||
| +static __always_inline
 |  | ||||||
| +int trace_done(void *ctx, struct request *rq)
 |  | ||||||
|  { |  | ||||||
|  	u64 slot, ts = bpf_ktime_get_ns(); |  | ||||||
|  	struct internal_rqinfo *i_rqinfop; |  | ||||||
| @@ -110,4 +98,34 @@ int BPF_PROG(blk_account_io_done, struct request *rq)
 |  | ||||||
|  	return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +SEC("kprobe/blk_account_io_merge_bio")
 |  | ||||||
| +int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(ctx, rq, true);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("fentry/blk_account_io_start")
 |  | ||||||
| +int BPF_PROG(blk_account_io_start, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(ctx, rq, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("fentry/blk_account_io_done")
 |  | ||||||
| +int BPF_PROG(blk_account_io_done, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_done(ctx, rq);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("tp_btf/block_io_start")
 |  | ||||||
| +int BPF_PROG(block_io_start, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(ctx, rq, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("tp_btf/block_io_done")
 |  | ||||||
| +int BPF_PROG(block_io_done, struct request *rq)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_done(ctx, rq);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  char LICENSE[] SEC("license") = "GPL"; |  | ||||||
| diff --git a/libbpf-tools/biostacks.c b/libbpf-tools/biostacks.c
 |  | ||||||
| index e1878d1f..e7875f76 100644
 |  | ||||||
| --- a/libbpf-tools/biostacks.c
 |  | ||||||
| +++ b/libbpf-tools/biostacks.c
 |  | ||||||
| @@ -128,6 +128,39 @@ void print_map(struct ksyms *ksyms, struct partitions *partitions, int fd)
 |  | ||||||
|  	return; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +static bool has_block_io_tracepoints(void)
 |  | ||||||
| +{
 |  | ||||||
| +	return tracepoint_exists("block", "block_io_start") &&
 |  | ||||||
| +		tracepoint_exists("block", "block_io_done");
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void disable_block_io_tracepoints(struct biostacks_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.block_io_start, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.block_io_done, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void disable_blk_account_io_fentry(struct biostacks_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void blk_account_io_set_attach_target(struct biostacks_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	if (fentry_can_attach("blk_account_io_start", NULL)) {
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
 |  | ||||||
| +					       0, "blk_account_io_start");
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_done,
 |  | ||||||
| +					       0, "blk_account_io_done");
 |  | ||||||
| +	} else {
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_start,
 |  | ||||||
| +					       0, "__blk_account_io_start");
 |  | ||||||
| +		bpf_program__set_attach_target(obj->progs.blk_account_io_done,
 |  | ||||||
| +					       0, "__blk_account_io_done");
 |  | ||||||
| +	}
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  int main(int argc, char **argv) |  | ||||||
|  { |  | ||||||
|  	struct partitions *partitions = NULL; |  | ||||||
| @@ -172,50 +205,30 @@ int main(int argc, char **argv)
 |  | ||||||
|   |  | ||||||
|  	obj->rodata->targ_ms = env.milliseconds; |  | ||||||
|   |  | ||||||
| -	if (fentry_can_attach("blk_account_io_start", NULL)) {
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
 |  | ||||||
| -					       "blk_account_io_start");
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
 |  | ||||||
| -					       "blk_account_io_done");
 |  | ||||||
| -	} else {
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_start, 0,
 |  | ||||||
| -					       "__blk_account_io_start");
 |  | ||||||
| -		bpf_program__set_attach_target(obj->progs.blk_account_io_done, 0,
 |  | ||||||
| -					       "__blk_account_io_done");
 |  | ||||||
| -	}
 |  | ||||||
| -
 |  | ||||||
| -	err = biostacks_bpf__load(obj);
 |  | ||||||
| -	if (err) {
 |  | ||||||
| -		fprintf(stderr, "failed to load BPF object: %d\n", err);
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| +	if (has_block_io_tracepoints())
 |  | ||||||
| +		disable_blk_account_io_fentry(obj);
 |  | ||||||
| +	else {
 |  | ||||||
| +		disable_block_io_tracepoints(obj);
 |  | ||||||
| +		blk_account_io_set_attach_target(obj);
 |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| -	obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
 |  | ||||||
| -	if (!obj->links.blk_account_io_start) {
 |  | ||||||
| -		err = -errno;
 |  | ||||||
| -		fprintf(stderr, "failed to attach blk_account_io_start: %s\n", strerror(-err));
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
|  	ksyms = ksyms__load(); |  | ||||||
|  	if (!ksyms) { |  | ||||||
|  		fprintf(stderr, "failed to load kallsyms\n"); |  | ||||||
|  		goto cleanup; |  | ||||||
|  	} |  | ||||||
| -	if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
 |  | ||||||
| -		obj->links.blk_account_io_merge_bio =
 |  | ||||||
| -			bpf_program__attach(obj->progs.blk_account_io_merge_bio);
 |  | ||||||
| -		if (!obj->links.blk_account_io_merge_bio) {
 |  | ||||||
| -			err = -errno;
 |  | ||||||
| -			fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
 |  | ||||||
| -				strerror(-err));
 |  | ||||||
| -			goto cleanup;
 |  | ||||||
| -		}
 |  | ||||||
| +	if (!ksyms__get_symbol(ksyms, "blk_account_io_merge_bio"))
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.blk_account_io_merge_bio, false);
 |  | ||||||
| +
 |  | ||||||
| +	err = biostacks_bpf__load(obj);
 |  | ||||||
| +	if (err) {
 |  | ||||||
| +		fprintf(stderr, "failed to load BPF object: %d\n", err);
 |  | ||||||
| +		goto cleanup;
 |  | ||||||
|  	} |  | ||||||
| -	obj->links.blk_account_io_done = bpf_program__attach(obj->progs.blk_account_io_done);
 |  | ||||||
| -	if (!obj->links.blk_account_io_done) {
 |  | ||||||
| -		err = -errno;
 |  | ||||||
| -		fprintf(stderr, "failed to attach blk_account_io_done: %s\n",
 |  | ||||||
| -			strerror(-err));
 |  | ||||||
| +
 |  | ||||||
| +	err = biostacks_bpf__attach(obj);
 |  | ||||||
| +	if (err) {
 |  | ||||||
| +		fprintf(stderr, "failed to attach BPF programs: %d\n", err);
 |  | ||||||
|  		goto cleanup; |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| diff --git a/libbpf-tools/biotop.bpf.c b/libbpf-tools/biotop.bpf.c
 |  | ||||||
| index 226e32d3..07631378 100644
 |  | ||||||
| --- a/libbpf-tools/biotop.bpf.c
 |  | ||||||
| +++ b/libbpf-tools/biotop.bpf.c
 |  | ||||||
| @@ -30,8 +30,8 @@ struct {
 |  | ||||||
|  	__type(value, struct val_t); |  | ||||||
|  } counts SEC(".maps"); |  | ||||||
|   |  | ||||||
| -SEC("kprobe")
 |  | ||||||
| -int BPF_KPROBE(blk_account_io_start, struct request *req)
 |  | ||||||
| +static __always_inline
 |  | ||||||
| +int trace_start(struct request *req)
 |  | ||||||
|  { |  | ||||||
|  	struct who_t who = {}; |  | ||||||
|   |  | ||||||
| @@ -56,8 +56,8 @@ int BPF_KPROBE(blk_mq_start_request, struct request *req)
 |  | ||||||
|  	return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| -SEC("kprobe")
 |  | ||||||
| -int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
 |  | ||||||
| +static __always_inline
 |  | ||||||
| +int trace_done(struct request *req)
 |  | ||||||
|  { |  | ||||||
|  	struct val_t *valp, zero = {}; |  | ||||||
|  	struct info_t info = {}; |  | ||||||
| @@ -103,4 +103,40 @@ int BPF_KPROBE(blk_account_io_done, struct request *req, u64 now)
 |  | ||||||
|  	return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +SEC("kprobe/blk_account_io_start")
 |  | ||||||
| +int BPF_KPROBE(blk_account_io_start, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("kprobe/blk_account_io_done")
 |  | ||||||
| +int BPF_KPROBE(blk_account_io_done, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_done(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("kprobe/__blk_account_io_start")
 |  | ||||||
| +int BPF_KPROBE(__blk_account_io_start, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("kprobe/__blk_account_io_done")
 |  | ||||||
| +int BPF_KPROBE(__blk_account_io_done, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_done(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("tp_btf/block_io_start")
 |  | ||||||
| +int BPF_PROG(block_io_start, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_start(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +SEC("tp_btf/block_io_done")
 |  | ||||||
| +int BPF_PROG(block_io_done, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +	return trace_done(req);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  char LICENSE[] SEC("license") = "GPL"; |  | ||||||
| diff --git a/libbpf-tools/biotop.c b/libbpf-tools/biotop.c
 |  | ||||||
| index 75484281..5b3a7cf3 100644
 |  | ||||||
| --- a/libbpf-tools/biotop.c
 |  | ||||||
| +++ b/libbpf-tools/biotop.c
 |  | ||||||
| @@ -354,6 +354,38 @@ static int print_stat(struct biotop_bpf *obj)
 |  | ||||||
|  	return err; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +static bool has_block_io_tracepoints(void)
 |  | ||||||
| +{
 |  | ||||||
| +	return tracepoint_exists("block", "block_io_start") &&
 |  | ||||||
| +		tracepoint_exists("block", "block_io_done");
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void disable_block_io_tracepoints(struct biotop_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.block_io_start, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.block_io_done, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void disable_blk_account_io_kprobes(struct biotop_bpf *obj)
 |  | ||||||
| +{
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
 |  | ||||||
| +	bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +static void blk_account_io_set_autoload(struct biotop_bpf *obj,
 |  | ||||||
| +					struct ksyms *ksyms)
 |  | ||||||
| +{
 |  | ||||||
| +	if (!ksyms__get_symbol(ksyms, "__blk_account_io_start")) {
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.__blk_account_io_start, false);
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.__blk_account_io_done, false);
 |  | ||||||
| +	} else {
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.blk_account_io_start, false);
 |  | ||||||
| +		bpf_program__set_autoload(obj->progs.blk_account_io_done, false);
 |  | ||||||
| +	}
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  int main(int argc, char **argv) |  | ||||||
|  { |  | ||||||
|  	static const struct argp argp = { |  | ||||||
| @@ -386,32 +418,19 @@ int main(int argc, char **argv)
 |  | ||||||
|  		goto cleanup; |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| +	if (has_block_io_tracepoints())
 |  | ||||||
| +		disable_blk_account_io_kprobes(obj);
 |  | ||||||
| +	else {
 |  | ||||||
| +		disable_block_io_tracepoints(obj);
 |  | ||||||
| +		blk_account_io_set_autoload(obj, ksyms);
 |  | ||||||
| +	}
 |  | ||||||
| +
 |  | ||||||
|  	err = biotop_bpf__load(obj); |  | ||||||
|  	if (err) { |  | ||||||
|  		warn("failed to load BPF object: %d\n", err); |  | ||||||
|  		goto cleanup; |  | ||||||
|  	} |  | ||||||
|   |  | ||||||
| -	if (ksyms__get_symbol(ksyms, "__blk_account_io_start"))
 |  | ||||||
| -		obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "__blk_account_io_start");
 |  | ||||||
| -	else
 |  | ||||||
| -		obj->links.blk_account_io_start = bpf_program__attach_kprobe(obj->progs.blk_account_io_start, false, "blk_account_io_start");
 |  | ||||||
| -
 |  | ||||||
| -	if (!obj->links.blk_account_io_start) {
 |  | ||||||
| -		warn("failed to load attach blk_account_io_start\n");
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
| -
 |  | ||||||
| -	if (ksyms__get_symbol(ksyms, "__blk_account_io_done"))
 |  | ||||||
| -		obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "__blk_account_io_done");
 |  | ||||||
| -	else
 |  | ||||||
| -		obj->links.blk_account_io_done = bpf_program__attach_kprobe(obj->progs.blk_account_io_done, false, "blk_account_io_done");
 |  | ||||||
| -
 |  | ||||||
| -	if (!obj->links.blk_account_io_done) {
 |  | ||||||
| -		warn("failed to load attach blk_account_io_done\n");
 |  | ||||||
| -		goto cleanup;
 |  | ||||||
| -	}
 |  | ||||||
| -
 |  | ||||||
|  	err = biotop_bpf__attach(obj); |  | ||||||
|  	if (err) { |  | ||||||
|  		warn("failed to attach BPF programs: %d\n", err); |  | ||||||
| -- 
 |  | ||||||
| 2.41.0 |  | ||||||
| 
 |  | ||||||
							
								
								
									
										2
									
								
								sources
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								sources
									
									
									
									
									
								
							| @ -1 +1 @@ | |||||||
| SHA512 (bcc-0.28.0.tar.gz) = 792ce93dba64b1f87390b2602dcaeba04ac8b2863652b06eb9a907b93bc6137a944b856cc6fa9c7a38671c89814740967561ca4f3b29c267babca7dc5e78aa02 | SHA512 (bcc-0.29.1.tar.gz) = 9e60130ea602e19e6c6f88a8c17023cea5daf4c5bcc7af8816e9f5c662341136eb449a3fdf870ffad215495ac3bf895115c0d968d92ce79ebe2899b3e2464d24 | ||||||
|  | |||||||
| @ -1,55 +0,0 @@ | |||||||
| From 59a1fccfc78482af189150b7937b21244f34e48a Mon Sep 17 00:00:00 2001 |  | ||||||
| From: Jerome Marchand <jmarchan@redhat.com> |  | ||||||
| Date: Thu, 3 Aug 2023 16:11:50 +0200 |  | ||||||
| Subject: [PATCH] tool/slabratetop: add definition of freelist_aba_t |  | ||||||
| 
 |  | ||||||
| With recent kernel containing the commit 6801be4f2653 ("slub: Replace |  | ||||||
| cmpxchg_double()"), slabratetop fails to compiles with the following |  | ||||||
| error: |  | ||||||
| 
 |  | ||||||
| In file included from /virtual/main.c:86: |  | ||||||
| include/linux/slub_def.h:56:3: error: unknown type name 'freelist_aba_t' |  | ||||||
|                 freelist_aba_t freelist_tid; |  | ||||||
|                 ^ |  | ||||||
| 2 warnings and 1 error generated. |  | ||||||
| Traceback (most recent call last): |  | ||||||
|   File "/usr/share/bcc/tools/slabratetop", line 187, in <module> |  | ||||||
|     b = BPF(text=bpf_text) |  | ||||||
|         ^^^^^^^^^^^^^^^^^^ |  | ||||||
|   File "/usr/lib/python3.12/site-packages/bcc/__init__.py", line 479, in __init__ |  | ||||||
|     raise Exception("Failed to compile BPF module %s" % (src_file or "<text>")) |  | ||||||
| Exception: Failed to compile BPF module <text> |  | ||||||
| 
 |  | ||||||
| Adding the definition of freelist_aba_t fixes the issue. |  | ||||||
| ---
 |  | ||||||
|  tools/slabratetop.py | 14 ++++++++++++++ |  | ||||||
|  1 file changed, 14 insertions(+) |  | ||||||
| 
 |  | ||||||
| diff --git a/tools/slabratetop.py b/tools/slabratetop.py
 |  | ||||||
| index 8fbcac5e..8a7d486e 100755
 |  | ||||||
| --- a/tools/slabratetop.py
 |  | ||||||
| +++ b/tools/slabratetop.py
 |  | ||||||
| @@ -141,6 +141,20 @@ static inline void *slab_address(const struct slab *slab)
 |  | ||||||
|      return NULL; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +#ifdef CONFIG_64BIT
 |  | ||||||
| +typedef __uint128_t freelist_full_t;
 |  | ||||||
| +#else
 |  | ||||||
| +typedef u64 freelist_full_t;
 |  | ||||||
| +#endif
 |  | ||||||
| +
 |  | ||||||
| +typedef union {
 |  | ||||||
| +	struct {
 |  | ||||||
| +		void *freelist;
 |  | ||||||
| +		unsigned long counter;
 |  | ||||||
| +	};
 |  | ||||||
| +	freelist_full_t full;
 |  | ||||||
| +} freelist_aba_t;
 |  | ||||||
| +
 |  | ||||||
|  #ifdef CONFIG_SLUB |  | ||||||
|  #include <linux/slub_def.h> |  | ||||||
|  #else |  | ||||||
| -- 
 |  | ||||||
| 2.41.0 |  | ||||||
| 
 |  | ||||||
| @ -1,855 +0,0 @@ | |||||||
| From 53ef33b5ad42e6a4baa37821119199f2d846beff Mon Sep 17 00:00:00 2001 |  | ||||||
| From: Jerome Marchand <jmarchan@redhat.com> |  | ||||||
| Date: Thu, 27 Jul 2023 18:19:18 +0200 |  | ||||||
| Subject: [PATCH 2/2] tools: Add support for the new block_io_* tracepoints |  | ||||||
| 
 |  | ||||||
| The bio tools currently depends on blk_account_io_done/start functions |  | ||||||
| that can be inlined. To fix that, a couple of tracepoints have been |  | ||||||
| added upstream (block:block_io_start/done). This patch add the support |  | ||||||
| for those tracepoints when they are available. |  | ||||||
| 
 |  | ||||||
| Unfortunately, the bio tools relies on data that is not available to |  | ||||||
| the tracepoints (mostly the struct request). So the tracepoints can't |  | ||||||
| be used as drop in replacement for blk_account_io_*. Main difference, |  | ||||||
| is that we can't use the struct request as the hash key anymore, so it |  | ||||||
| now uses the couple (dev_t, sector) for that purpose. |  | ||||||
| 
 |  | ||||||
| For the biolatency tool, the -F option is disabled when only the |  | ||||||
| tracepoints are available because the flags are not all accessible |  | ||||||
| from the tracepoints. Otherwise, all features of the tools should |  | ||||||
| remain. |  | ||||||
| 
 |  | ||||||
| Closes #4261 |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Jerome Marchand <jmarchan@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  tools/biolatency.py | 166 ++++++++++++++++++++++++++++-------- |  | ||||||
|  tools/biosnoop.py   | 200 +++++++++++++++++++++++++++++++++----------- |  | ||||||
|  tools/biotop.py     | 108 +++++++++++++++++++----- |  | ||||||
|  3 files changed, 371 insertions(+), 103 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/tools/biolatency.py b/tools/biolatency.py
 |  | ||||||
| index 8fe43a7c..03b48a4c 100755
 |  | ||||||
| --- a/tools/biolatency.py
 |  | ||||||
| +++ b/tools/biolatency.py
 |  | ||||||
| @@ -11,6 +11,7 @@
 |  | ||||||
|  # |  | ||||||
|  # 20-Sep-2015   Brendan Gregg   Created this. |  | ||||||
|  # 31-Mar-2022   Rocky Xing      Added disk filter support. |  | ||||||
| +# 01-Aug-2023   Jerome Marchand Added support for block tracepoints
 |  | ||||||
|   |  | ||||||
|  from __future__ import print_function |  | ||||||
|  from bcc import BPF |  | ||||||
| @@ -72,7 +73,7 @@ bpf_text = """
 |  | ||||||
|  #include <linux/blk-mq.h> |  | ||||||
|   |  | ||||||
|  typedef struct disk_key { |  | ||||||
| -    char disk[DISK_NAME_LEN];
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
|      u64 slot; |  | ||||||
|  } disk_key_t; |  | ||||||
|   |  | ||||||
| @@ -86,26 +87,70 @@ typedef struct ext_val {
 |  | ||||||
|      u64 count; |  | ||||||
|  } ext_val_t; |  | ||||||
|   |  | ||||||
| -BPF_HASH(start, struct request *);
 |  | ||||||
| +struct tp_args {
 |  | ||||||
| +    u64 __unused__;
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +    unsigned int nr_sector;
 |  | ||||||
| +    unsigned int bytes;
 |  | ||||||
| +    char rwbs[8];
 |  | ||||||
| +    char comm[16];
 |  | ||||||
| +    char cmd[];
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +struct start_key {
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    u32 _pad;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +    CMD_FLAGS
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +BPF_HASH(start, struct start_key);
 |  | ||||||
|  STORAGE |  | ||||||
|   |  | ||||||
| +static dev_t ddevt(struct gendisk *disk) {
 |  | ||||||
| +    return (disk->major  << 20) | disk->first_minor;
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // time block I/O |  | ||||||
| -int trace_req_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_req_start(struct start_key key)
 |  | ||||||
|  { |  | ||||||
|      DISK_FILTER |  | ||||||
|   |  | ||||||
|      u64 ts = bpf_ktime_get_ns(); |  | ||||||
| -    start.update(&req, &ts);
 |  | ||||||
| +    start.update(&key, &ts);
 |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +int trace_req_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct start_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    SET_FLAGS
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_req_start_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct start_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // output |  | ||||||
| -int trace_req_done(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_req_done(struct start_key key)
 |  | ||||||
|  { |  | ||||||
|      u64 *tsp, delta; |  | ||||||
|   |  | ||||||
|      // fetch timestamp and calculate delta |  | ||||||
| -    tsp = start.lookup(&req);
 |  | ||||||
| +    tsp = start.lookup(&key);
 |  | ||||||
|      if (tsp == 0) { |  | ||||||
|          return 0;   // missed issue |  | ||||||
|      } |  | ||||||
| @@ -116,9 +161,31 @@ int trace_req_done(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|      // store as histogram |  | ||||||
|      STORE |  | ||||||
|   |  | ||||||
| -    start.delete(&req);
 |  | ||||||
| +    start.delete(&key);
 |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
| +
 |  | ||||||
| +int trace_req_done(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct start_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    SET_FLAGS
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_done(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_req_done_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct start_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_done(key);
 |  | ||||||
| +}
 |  | ||||||
|  """ |  | ||||||
|   |  | ||||||
|  # code substitutions |  | ||||||
| @@ -134,21 +201,18 @@ store_str = ""
 |  | ||||||
|  if args.disks: |  | ||||||
|      storage_str += "BPF_HISTOGRAM(dist, disk_key_t);" |  | ||||||
|      disks_str = """ |  | ||||||
| -    disk_key_t key = {.slot = bpf_log2l(delta)};
 |  | ||||||
| -    void *__tmp = (void *)req->__RQ_DISK__->disk_name;
 |  | ||||||
| -    bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
 |  | ||||||
| -    dist.atomic_increment(key);
 |  | ||||||
| +    disk_key_t dkey = {};
 |  | ||||||
| +    dkey.dev = key.dev;
 |  | ||||||
| +    dkey.slot = bpf_log2l(delta);
 |  | ||||||
| +    dist.atomic_increment(dkey);
 |  | ||||||
|      """ |  | ||||||
| -    if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
 |  | ||||||
| -        store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
 |  | ||||||
| -    else:
 |  | ||||||
| -        store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
 |  | ||||||
| +    store_str += disks_str
 |  | ||||||
|  elif args.flags: |  | ||||||
|      storage_str += "BPF_HISTOGRAM(dist, flag_key_t);" |  | ||||||
|      store_str += """ |  | ||||||
| -    flag_key_t key = {.slot = bpf_log2l(delta)};
 |  | ||||||
| -    key.flags = req->cmd_flags;
 |  | ||||||
| -    dist.atomic_increment(key);
 |  | ||||||
| +    flag_key_t fkey = {.slot = bpf_log2l(delta)};
 |  | ||||||
| +    fkey.flags = key.flags;
 |  | ||||||
| +    dist.atomic_increment(fkey);
 |  | ||||||
|      """ |  | ||||||
|  else: |  | ||||||
|      storage_str += "BPF_HISTOGRAM(dist);" |  | ||||||
| @@ -161,21 +225,13 @@ store_str = ""
 |  | ||||||
|          exit(1) |  | ||||||
|   |  | ||||||
|      stat_info = os.stat(disk_path) |  | ||||||
| -    major = os.major(stat_info.st_rdev)
 |  | ||||||
| -    minor = os.minor(stat_info.st_rdev)
 |  | ||||||
| -
 |  | ||||||
| -    disk_field_str = ""
 |  | ||||||
| -    if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
 |  | ||||||
| -        disk_field_str = 'req->rq_disk'
 |  | ||||||
| -    else:
 |  | ||||||
| -        disk_field_str = 'req->q->disk'
 |  | ||||||
| +    dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
 |  | ||||||
|   |  | ||||||
|      disk_filter_str = """ |  | ||||||
| -    struct gendisk *disk = %s;
 |  | ||||||
| -    if (!(disk->major == %d && disk->first_minor == %d)) {
 |  | ||||||
| +    if(key.dev != %s) {
 |  | ||||||
|          return 0; |  | ||||||
|      } |  | ||||||
| -    """ % (disk_field_str, major, minor)
 |  | ||||||
| +    """ % (dev)
 |  | ||||||
|   |  | ||||||
|      bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str) |  | ||||||
|  else: |  | ||||||
| @@ -194,6 +250,16 @@ store_str = ""
 |  | ||||||
|   |  | ||||||
|  bpf_text = bpf_text.replace("STORAGE", storage_str) |  | ||||||
|  bpf_text = bpf_text.replace("STORE", store_str) |  | ||||||
| +if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
 |  | ||||||
| +    bpf_text = bpf_text.replace('__RQ_DISK__', 'rq_disk')
 |  | ||||||
| +else:
 |  | ||||||
| +    bpf_text = bpf_text.replace('__RQ_DISK__', 'q->disk')
 |  | ||||||
| +if args.flags:
 |  | ||||||
| +    bpf_text = bpf_text.replace('CMD_FLAGS', 'u64 flags;')
 |  | ||||||
| +    bpf_text = bpf_text.replace('SET_FLAGS', 'key.flags = req->cmd_flags;')
 |  | ||||||
| +else:
 |  | ||||||
| +    bpf_text = bpf_text.replace('CMD_FLAGS', '')
 |  | ||||||
| +    bpf_text = bpf_text.replace('SET_FLAGS', '')
 |  | ||||||
|   |  | ||||||
|  if debug or args.ebpf: |  | ||||||
|      print(bpf_text) |  | ||||||
| @@ -205,25 +271,53 @@ b = BPF(text=bpf_text)
 |  | ||||||
|  if args.queued: |  | ||||||
|      if BPF.get_kprobe_functions(b'__blk_account_io_start'): |  | ||||||
|          b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start") |  | ||||||
| -    else:
 |  | ||||||
| +    elif BPF.get_kprobe_functions(b'blk_account_io_start'):
 |  | ||||||
|          b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start") |  | ||||||
| +    else:
 |  | ||||||
| +        if args.flags:
 |  | ||||||
| +            # Some flags are accessible in the rwbs field (RAHEAD, SYNC and META)
 |  | ||||||
| +            # but other aren't. Disable the -F option for tracepoint for now.
 |  | ||||||
| +            print("ERROR: blk_account_io_start probe not available. Can't use -F.")
 |  | ||||||
| +            exit()
 |  | ||||||
| +        b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_req_start_tp")
 |  | ||||||
|  else: |  | ||||||
|      if BPF.get_kprobe_functions(b'blk_start_request'): |  | ||||||
|          b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") |  | ||||||
|      b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") |  | ||||||
| +
 |  | ||||||
|  if BPF.get_kprobe_functions(b'__blk_account_io_done'): |  | ||||||
|      b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done") |  | ||||||
| -else:
 |  | ||||||
| +elif BPF.get_kprobe_functions(b'blk_account_io_done'):
 |  | ||||||
|      b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done") |  | ||||||
| +else:
 |  | ||||||
| +    if args.flags:
 |  | ||||||
| +        print("ERROR: blk_account_io_done probe not available. Can't use -F.")
 |  | ||||||
| +        exit()
 |  | ||||||
| +    b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_done_tp")
 |  | ||||||
| +
 |  | ||||||
|   |  | ||||||
|  if not args.json: |  | ||||||
|      print("Tracing block device I/O... Hit Ctrl-C to end.") |  | ||||||
|   |  | ||||||
| -def disk_print(s):
 |  | ||||||
| -    disk = s.decode('utf-8', 'replace')
 |  | ||||||
| -    if not disk:
 |  | ||||||
| -        disk = "<unknown>"
 |  | ||||||
| -    return disk
 |  | ||||||
| +# cache disk major,minor -> diskname
 |  | ||||||
| +diskstats = "/proc/diskstats"
 |  | ||||||
| +disklookup = {}
 |  | ||||||
| +with open(diskstats) as stats:
 |  | ||||||
| +    for line in stats:
 |  | ||||||
| +        a = line.split()
 |  | ||||||
| +        disklookup[a[0] + "," + a[1]] = a[2]
 |  | ||||||
| +
 |  | ||||||
| +def disk_print(d):
 |  | ||||||
| +    major = d >> 20
 |  | ||||||
| +    minor = d & ((1 << 20) - 1)
 |  | ||||||
| +
 |  | ||||||
| +    disk = str(major) + "," + str(minor)
 |  | ||||||
| +    if disk in disklookup:
 |  | ||||||
| +        diskname = disklookup[disk]
 |  | ||||||
| +    else:
 |  | ||||||
| +        diskname = "?"
 |  | ||||||
| +
 |  | ||||||
| +    return diskname
 |  | ||||||
|   |  | ||||||
|  # see blk_fill_rwbs(): |  | ||||||
|  req_opf = { |  | ||||||
| diff --git a/tools/biosnoop.py b/tools/biosnoop.py
 |  | ||||||
| index 33703233..f0fef98b 100755
 |  | ||||||
| --- a/tools/biosnoop.py
 |  | ||||||
| +++ b/tools/biosnoop.py
 |  | ||||||
| @@ -14,6 +14,7 @@
 |  | ||||||
|  # 11-Feb-2016   Allan McAleavy  updated for BPF_PERF_OUTPUT |  | ||||||
|  # 21-Jun-2022   Rocky Xing      Added disk filter support. |  | ||||||
|  # 13-Oct-2022   Rocky Xing      Added support for displaying block I/O pattern. |  | ||||||
| +# 01-Aug-2023   Jerome Marchand Added support for block tracepoints
 |  | ||||||
|   |  | ||||||
|  from __future__ import print_function |  | ||||||
|  from bcc import BPF |  | ||||||
| @@ -64,6 +65,24 @@ struct val_t {
 |  | ||||||
|      char name[TASK_COMM_LEN]; |  | ||||||
|  }; |  | ||||||
|   |  | ||||||
| +struct tp_args {
 |  | ||||||
| +    u64 __unused__;
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +    unsigned int nr_sector;
 |  | ||||||
| +    unsigned int bytes;
 |  | ||||||
| +    char rwbs[8];
 |  | ||||||
| +    char comm[16];
 |  | ||||||
| +    char cmd[];
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +struct hash_key {
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    u32 rwflag;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +
 |  | ||||||
|  #ifdef INCLUDE_PATTERN |  | ||||||
|  struct sector_key_t { |  | ||||||
|      u32 dev_major; |  | ||||||
| @@ -79,6 +98,7 @@ enum bio_pattern {
 |  | ||||||
|   |  | ||||||
|  struct data_t { |  | ||||||
|      u32 pid; |  | ||||||
| +    u32 dev;
 |  | ||||||
|      u64 rwflag; |  | ||||||
|      u64 delta; |  | ||||||
|      u64 qdelta; |  | ||||||
| @@ -88,7 +108,6 @@ struct data_t {
 |  | ||||||
|      enum bio_pattern pattern; |  | ||||||
|  #endif |  | ||||||
|      u64 ts; |  | ||||||
| -    char disk_name[DISK_NAME_LEN];
 |  | ||||||
|      char name[TASK_COMM_LEN]; |  | ||||||
|  }; |  | ||||||
|   |  | ||||||
| @@ -96,12 +115,45 @@ struct data_t {
 |  | ||||||
|  BPF_HASH(last_sectors, struct sector_key_t, u64); |  | ||||||
|  #endif |  | ||||||
|   |  | ||||||
| -BPF_HASH(start, struct request *, struct start_req_t);
 |  | ||||||
| -BPF_HASH(infobyreq, struct request *, struct val_t);
 |  | ||||||
| +BPF_HASH(start, struct hash_key, struct start_req_t);
 |  | ||||||
| +BPF_HASH(infobyreq, struct hash_key, struct val_t);
 |  | ||||||
|  BPF_PERF_OUTPUT(events); |  | ||||||
|   |  | ||||||
| +static dev_t ddevt(struct gendisk *disk) {
 |  | ||||||
| +    return (disk->major  << 20) | disk->first_minor;
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +/*
 |  | ||||||
| + * The following deals with a kernel version change (in mainline 4.7, although
 |  | ||||||
| + * it may be backported to earlier kernels) with how block request write flags
 |  | ||||||
| + * are tested. We handle both pre- and post-change versions here. Please avoid
 |  | ||||||
| + * kernel version tests like this as much as possible: they inflate the code,
 |  | ||||||
| + * test, and maintenance burden.
 |  | ||||||
| + */
 |  | ||||||
| +static int get_rwflag(u32 cmd_flags) {
 |  | ||||||
| +#ifdef REQ_WRITE
 |  | ||||||
| +    return !!(cmd_flags & REQ_WRITE);
 |  | ||||||
| +#elif defined(REQ_OP_SHIFT)
 |  | ||||||
| +    return !!((cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
 |  | ||||||
| +#else
 |  | ||||||
| +    return !!((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
 |  | ||||||
| +#endif
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +#define RWBS_LEN	8
 |  | ||||||
| +
 |  | ||||||
| +static int get_rwflag_tp(char *rwbs) {
 |  | ||||||
| +    for (int i = 0; i < RWBS_LEN; i++) {
 |  | ||||||
| +        if (rwbs[i] == 'W')
 |  | ||||||
| +            return 1;
 |  | ||||||
| +        if (rwbs[i] == '\\0')
 |  | ||||||
| +            return 0;
 |  | ||||||
| +    }
 |  | ||||||
| +    return 0;
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // cache PID and comm by-req |  | ||||||
| -int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_pid_start(struct hash_key key)
 |  | ||||||
|  { |  | ||||||
|      DISK_FILTER |  | ||||||
|   |  | ||||||
| @@ -113,47 +165,76 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|          if (##QUEUE##) { |  | ||||||
|              val.ts = bpf_ktime_get_ns(); |  | ||||||
|          } |  | ||||||
| -        infobyreq.update(&req, &val);
 |  | ||||||
| +        infobyreq.update(&key, &val);
 |  | ||||||
|      } |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +
 |  | ||||||
| +int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .rwflag = get_rwflag(req->cmd_flags),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_pid_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_pid_start_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .rwflag = get_rwflag_tp(args->rwbs),
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_pid_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // time block I/O |  | ||||||
|  int trace_req_start(struct pt_regs *ctx, struct request *req) |  | ||||||
|  { |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .rwflag = get_rwflag(req->cmd_flags),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
|      DISK_FILTER |  | ||||||
|   |  | ||||||
|      struct start_req_t start_req = { |  | ||||||
|          .ts = bpf_ktime_get_ns(), |  | ||||||
|          .data_len = req->__data_len |  | ||||||
|      }; |  | ||||||
| -    start.update(&req, &start_req);
 |  | ||||||
| +    start.update(&key, &start_req);
 |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
|  // output |  | ||||||
| -int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_req_completion(void *ctx, struct hash_key key)
 |  | ||||||
|  { |  | ||||||
|      struct start_req_t *startp; |  | ||||||
|      struct val_t *valp; |  | ||||||
|      struct data_t data = {}; |  | ||||||
| -    struct gendisk *rq_disk;
 |  | ||||||
| +    //struct gendisk *rq_disk;
 |  | ||||||
|      u64 ts; |  | ||||||
|   |  | ||||||
|      // fetch timestamp and calculate delta |  | ||||||
| -    startp = start.lookup(&req);
 |  | ||||||
| +    startp = start.lookup(&key);
 |  | ||||||
|      if (startp == 0) { |  | ||||||
|          // missed tracing issue |  | ||||||
|          return 0; |  | ||||||
|      } |  | ||||||
|      ts = bpf_ktime_get_ns(); |  | ||||||
| -    rq_disk = req->__RQ_DISK__;
 |  | ||||||
| +    //rq_disk = req->__RQ_DISK__;
 |  | ||||||
|      data.delta = ts - startp->ts; |  | ||||||
|      data.ts = ts / 1000; |  | ||||||
|      data.qdelta = 0; |  | ||||||
|      data.len = startp->data_len; |  | ||||||
|   |  | ||||||
| -    valp = infobyreq.lookup(&req);
 |  | ||||||
| +    valp = infobyreq.lookup(&key);
 |  | ||||||
|      if (valp == 0) { |  | ||||||
|          data.name[0] = '?'; |  | ||||||
|          data.name[1] = 0; |  | ||||||
| @@ -162,10 +243,9 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|              data.qdelta = startp->ts - valp->ts; |  | ||||||
|          } |  | ||||||
|          data.pid = valp->pid; |  | ||||||
| -        data.sector = req->__sector;
 |  | ||||||
| +        data.sector = key.sector;
 |  | ||||||
| +        data.dev = key.dev;
 |  | ||||||
|          bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name); |  | ||||||
| -        bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
 |  | ||||||
| -                       rq_disk->disk_name);
 |  | ||||||
|      } |  | ||||||
|   |  | ||||||
|  #ifdef INCLUDE_PATTERN |  | ||||||
| @@ -174,8 +254,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|      u64 *sector, last_sector; |  | ||||||
|   |  | ||||||
|      struct sector_key_t sector_key = { |  | ||||||
| -        .dev_major = rq_disk->major,
 |  | ||||||
| -        .dev_minor = rq_disk->first_minor
 |  | ||||||
| +        .dev_major = key.dev >> 20,
 |  | ||||||
| +        .dev_minor = key.dev & ((1 << 20) - 1)
 |  | ||||||
|      }; |  | ||||||
|   |  | ||||||
|      sector = last_sectors.lookup(§or_key); |  | ||||||
| @@ -187,27 +267,36 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|      last_sectors.update(§or_key, &last_sector); |  | ||||||
|  #endif |  | ||||||
|   |  | ||||||
| -/*
 |  | ||||||
| - * The following deals with a kernel version change (in mainline 4.7, although
 |  | ||||||
| - * it may be backported to earlier kernels) with how block request write flags
 |  | ||||||
| - * are tested. We handle both pre- and post-change versions here. Please avoid
 |  | ||||||
| - * kernel version tests like this as much as possible: they inflate the code,
 |  | ||||||
| - * test, and maintenance burden.
 |  | ||||||
| - */
 |  | ||||||
| -#ifdef REQ_WRITE
 |  | ||||||
| -    data.rwflag = !!(req->cmd_flags & REQ_WRITE);
 |  | ||||||
| -#elif defined(REQ_OP_SHIFT)
 |  | ||||||
| -    data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
 |  | ||||||
| -#else
 |  | ||||||
| -    data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
 |  | ||||||
| -#endif
 |  | ||||||
| +    data.rwflag = key.rwflag;
 |  | ||||||
|   |  | ||||||
|      events.perf_submit(ctx, &data, sizeof(data)); |  | ||||||
| -    start.delete(&req);
 |  | ||||||
| -    infobyreq.delete(&req);
 |  | ||||||
| +    start.delete(&key);
 |  | ||||||
| +    infobyreq.delete(&key);
 |  | ||||||
|   |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
| +
 |  | ||||||
| +int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .rwflag = get_rwflag(req->cmd_flags),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_completion(ctx, key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_req_completion_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .rwflag = get_rwflag_tp(args->rwbs),
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_completion(args, key);
 |  | ||||||
| +}
 |  | ||||||
|  """ |  | ||||||
|  if args.queue: |  | ||||||
|      bpf_text = bpf_text.replace('##QUEUE##', '1') |  | ||||||
| @@ -225,21 +314,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|          exit(1) |  | ||||||
|   |  | ||||||
|      stat_info = os.stat(disk_path) |  | ||||||
| -    major = os.major(stat_info.st_rdev)
 |  | ||||||
| -    minor = os.minor(stat_info.st_rdev)
 |  | ||||||
| -
 |  | ||||||
| -    disk_field_str = ""
 |  | ||||||
| -    if BPF.kernel_struct_has_field(b'request', b'rq_disk') == 1:
 |  | ||||||
| -        disk_field_str = 'req->rq_disk'
 |  | ||||||
| -    else:
 |  | ||||||
| -        disk_field_str = 'req->q->disk'
 |  | ||||||
| +    dev = os.major(stat_info.st_rdev) << 20 | os.minor(stat_info.st_rdev)
 |  | ||||||
|   |  | ||||||
|      disk_filter_str = """ |  | ||||||
| -    struct gendisk *disk = %s;
 |  | ||||||
| -    if (!(disk->major == %d && disk->first_minor == %d)) {
 |  | ||||||
| +    if(key.dev != %s) {
 |  | ||||||
|          return 0; |  | ||||||
|      } |  | ||||||
| -    """ % (disk_field_str, major, minor)
 |  | ||||||
| +    """ % (dev)
 |  | ||||||
|   |  | ||||||
|      bpf_text = bpf_text.replace('DISK_FILTER', disk_filter_str) |  | ||||||
|  else: |  | ||||||
| @@ -254,15 +335,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|  b = BPF(text=bpf_text) |  | ||||||
|  if BPF.get_kprobe_functions(b'__blk_account_io_start'): |  | ||||||
|      b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start") |  | ||||||
| -else:
 |  | ||||||
| +elif BPF.get_kprobe_functions(b'blk_account_io_start'):
 |  | ||||||
|      b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start") |  | ||||||
| +else:
 |  | ||||||
| +    b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
 |  | ||||||
|  if BPF.get_kprobe_functions(b'blk_start_request'): |  | ||||||
|      b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") |  | ||||||
|  b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") |  | ||||||
|  if BPF.get_kprobe_functions(b'__blk_account_io_done'): |  | ||||||
|      b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion") |  | ||||||
| -else:
 |  | ||||||
| +elif BPF.get_kprobe_functions(b'blk_account_io_done'):
 |  | ||||||
|      b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion") |  | ||||||
| +else:
 |  | ||||||
| +    b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
 |  | ||||||
|   |  | ||||||
|  # header |  | ||||||
|  print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID", |  | ||||||
| @@ -273,6 +358,27 @@ print("%-11s %-14s %-7s %-9s %-1s %-10s %-7s" % ("TIME(s)", "COMM", "PID",
 |  | ||||||
|      print("%7s " % ("QUE(ms)"), end="") |  | ||||||
|  print("%7s" % "LAT(ms)") |  | ||||||
|   |  | ||||||
| +
 |  | ||||||
| +# cache disk major,minor -> diskname
 |  | ||||||
| +diskstats = "/proc/diskstats"
 |  | ||||||
| +disklookup = {}
 |  | ||||||
| +with open(diskstats) as stats:
 |  | ||||||
| +    for line in stats:
 |  | ||||||
| +        a = line.split()
 |  | ||||||
| +        disklookup[a[0] + "," + a[1]] = a[2]
 |  | ||||||
| +
 |  | ||||||
| +def disk_print(d):
 |  | ||||||
| +    major = d >> 20
 |  | ||||||
| +    minor = d & ((1 << 20) - 1)
 |  | ||||||
| +
 |  | ||||||
| +    disk = str(major) + "," + str(minor)
 |  | ||||||
| +    if disk in disklookup:
 |  | ||||||
| +        diskname = disklookup[disk]
 |  | ||||||
| +    else:
 |  | ||||||
| +        diskname = "<unknown>"
 |  | ||||||
| +
 |  | ||||||
| +    return diskname
 |  | ||||||
| +
 |  | ||||||
|  rwflg = "" |  | ||||||
|  pattern = "" |  | ||||||
|  start_ts = 0 |  | ||||||
| @@ -297,9 +403,7 @@ P_RANDOM = 2
 |  | ||||||
|   |  | ||||||
|      delta = float(event.ts) - start_ts |  | ||||||
|   |  | ||||||
| -    disk_name = event.disk_name.decode('utf-8', 'replace')
 |  | ||||||
| -    if not disk_name:
 |  | ||||||
| -        disk_name = '<unknown>'
 |  | ||||||
| +    disk_name = disk_print(event.dev)
 |  | ||||||
|   |  | ||||||
|      print("%-11.6f %-14.14s %-7s %-9s %-1s %-10s %-7s" % ( |  | ||||||
|          delta / 1000000, event.name.decode('utf-8', 'replace'), event.pid, |  | ||||||
| diff --git a/tools/biotop.py b/tools/biotop.py
 |  | ||||||
| index fcdd373f..2620983a 100755
 |  | ||||||
| --- a/tools/biotop.py
 |  | ||||||
| +++ b/tools/biotop.py
 |  | ||||||
| @@ -14,6 +14,7 @@
 |  | ||||||
|  # |  | ||||||
|  # 06-Feb-2016   Brendan Gregg   Created this. |  | ||||||
|  # 17-Mar-2022   Rocky Xing      Added PID filter support. |  | ||||||
| +# 01-Aug-2023   Jerome Marchand Added support for block tracepoints
 |  | ||||||
|   |  | ||||||
|  from __future__ import print_function |  | ||||||
|  from bcc import BPF |  | ||||||
| @@ -88,14 +89,35 @@ struct val_t {
 |  | ||||||
|      u32 io; |  | ||||||
|  }; |  | ||||||
|   |  | ||||||
| -BPF_HASH(start, struct request *, struct start_req_t);
 |  | ||||||
| -BPF_HASH(whobyreq, struct request *, struct who_t);
 |  | ||||||
| +struct tp_args {
 |  | ||||||
| +    u64 __unused__;
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +    unsigned int nr_sector;
 |  | ||||||
| +    unsigned int bytes;
 |  | ||||||
| +    char rwbs[8];
 |  | ||||||
| +    char comm[16];
 |  | ||||||
| +    char cmd[];
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +struct hash_key {
 |  | ||||||
| +    dev_t dev;
 |  | ||||||
| +    u32 _pad;
 |  | ||||||
| +    sector_t sector;
 |  | ||||||
| +};
 |  | ||||||
| +
 |  | ||||||
| +BPF_HASH(start, struct hash_key, struct start_req_t);
 |  | ||||||
| +BPF_HASH(whobyreq, struct hash_key, struct who_t);
 |  | ||||||
|  BPF_HASH(counts, struct info_t, struct val_t); |  | ||||||
|   |  | ||||||
| +static dev_t ddevt(struct gendisk *disk) {
 |  | ||||||
| +    return (disk->major  << 20) | disk->first_minor;
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // cache PID and comm by-req |  | ||||||
| -int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_pid_start(struct hash_key key)
 |  | ||||||
|  { |  | ||||||
| -    struct who_t who = {};
 |  | ||||||
| +    struct who_t who;
 |  | ||||||
|      u32 pid; |  | ||||||
|   |  | ||||||
|      if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) { |  | ||||||
| @@ -104,30 +126,54 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|              return 0; |  | ||||||
|   |  | ||||||
|          who.pid = pid; |  | ||||||
| -        whobyreq.update(&req, &who);
 |  | ||||||
| +        whobyreq.update(&key, &who);
 |  | ||||||
|      } |  | ||||||
|   |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
| +int trace_pid_start(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_pid_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_pid_start_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_pid_start(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
|  // time block I/O |  | ||||||
|  int trace_req_start(struct pt_regs *ctx, struct request *req) |  | ||||||
|  { |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
|      struct start_req_t start_req = { |  | ||||||
|          .ts = bpf_ktime_get_ns(), |  | ||||||
|          .data_len = req->__data_len |  | ||||||
|      }; |  | ||||||
| -    start.update(&req, &start_req);
 |  | ||||||
| +    start.update(&key, &start_req);
 |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
|   |  | ||||||
|  // output |  | ||||||
| -int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +static int __trace_req_completion(struct hash_key key)
 |  | ||||||
|  { |  | ||||||
|      struct start_req_t *startp; |  | ||||||
|   |  | ||||||
|      // fetch timestamp and calculate delta |  | ||||||
| -    startp = start.lookup(&req);
 |  | ||||||
| +    startp = start.lookup(&key);
 |  | ||||||
|      if (startp == 0) { |  | ||||||
|          return 0;    // missed tracing issue |  | ||||||
|      } |  | ||||||
| @@ -135,12 +181,12 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|      struct who_t *whop; |  | ||||||
|      u32 pid; |  | ||||||
|   |  | ||||||
| -    whop = whobyreq.lookup(&req);
 |  | ||||||
| +    whop = whobyreq.lookup(&key);
 |  | ||||||
|      pid = whop != 0 ? whop->pid : 0; |  | ||||||
|      if (FILTER_PID) { |  | ||||||
| -        start.delete(&req);
 |  | ||||||
| +        start.delete(&key);
 |  | ||||||
|          if (whop != 0) { |  | ||||||
| -            whobyreq.delete(&req);
 |  | ||||||
| +            whobyreq.delete(&key);
 |  | ||||||
|          } |  | ||||||
|          return 0; |  | ||||||
|      } |  | ||||||
| @@ -150,8 +196,8 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|   |  | ||||||
|      // setup info_t key |  | ||||||
|      struct info_t info = {}; |  | ||||||
| -    info.major = req->__RQ_DISK__->major;
 |  | ||||||
| -    info.minor = req->__RQ_DISK__->first_minor;
 |  | ||||||
| +    info.major = key.dev >> 20;
 |  | ||||||
| +    info.minor = key.dev & ((1 << 20) - 1);
 |  | ||||||
|  /* |  | ||||||
|   * The following deals with a kernel version change (in mainline 4.7, although |  | ||||||
|   * it may be backported to earlier kernels) with how block request write flags |  | ||||||
| @@ -159,13 +205,13 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|   * kernel version tests like this as much as possible: they inflate the code, |  | ||||||
|   * test, and maintenance burden. |  | ||||||
|   */ |  | ||||||
| -#ifdef REQ_WRITE
 |  | ||||||
| +/*#ifdef REQ_WRITE
 |  | ||||||
|      info.rwflag = !!(req->cmd_flags & REQ_WRITE); |  | ||||||
|  #elif defined(REQ_OP_SHIFT) |  | ||||||
|      info.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE); |  | ||||||
|  #else |  | ||||||
|      info.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE); |  | ||||||
| -#endif
 |  | ||||||
| +#endif*/
 |  | ||||||
|   |  | ||||||
|      if (whop == 0) { |  | ||||||
|          // missed pid who, save stats as pid 0 |  | ||||||
| @@ -183,11 +229,31 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|          valp->io++; |  | ||||||
|      } |  | ||||||
|   |  | ||||||
| -    start.delete(&req);
 |  | ||||||
| -    whobyreq.delete(&req);
 |  | ||||||
| +    start.delete(&key);
 |  | ||||||
| +    whobyreq.delete(&key);
 |  | ||||||
|   |  | ||||||
|      return 0; |  | ||||||
|  } |  | ||||||
| +
 |  | ||||||
| +int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = ddevt(req->__RQ_DISK__),
 |  | ||||||
| +        .sector = req->__sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_completion(key);
 |  | ||||||
| +}
 |  | ||||||
| +
 |  | ||||||
| +int trace_req_completion_tp(struct tp_args *args)
 |  | ||||||
| +{
 |  | ||||||
| +    struct hash_key key = {
 |  | ||||||
| +        .dev = args->dev,
 |  | ||||||
| +        .sector = args->sector
 |  | ||||||
| +    };
 |  | ||||||
| +
 |  | ||||||
| +    return __trace_req_completion(key);
 |  | ||||||
| +}
 |  | ||||||
|  """ |  | ||||||
|   |  | ||||||
|  if args.ebpf: |  | ||||||
| @@ -207,15 +273,19 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
 |  | ||||||
|  b = BPF(text=bpf_text) |  | ||||||
|  if BPF.get_kprobe_functions(b'__blk_account_io_start'): |  | ||||||
|      b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_pid_start") |  | ||||||
| -else:
 |  | ||||||
| +elif BPF.get_kprobe_functions(b'blk_account_io_start'):
 |  | ||||||
|      b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start") |  | ||||||
| +else:
 |  | ||||||
| +    b.attach_tracepoint(tp="block:block_io_start", fn_name="trace_pid_start_tp")
 |  | ||||||
|  if BPF.get_kprobe_functions(b'blk_start_request'): |  | ||||||
|      b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") |  | ||||||
|  b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") |  | ||||||
|  if BPF.get_kprobe_functions(b'__blk_account_io_done'): |  | ||||||
|      b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_completion") |  | ||||||
| -else:
 |  | ||||||
| +elif BPF.get_kprobe_functions(b'blk_account_io_done'):
 |  | ||||||
|      b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_completion") |  | ||||||
| +else:
 |  | ||||||
| +    b.attach_tracepoint(tp="block:block_io_done", fn_name="trace_req_completion_tp")
 |  | ||||||
|   |  | ||||||
|  print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) |  | ||||||
|   |  | ||||||
| -- 
 |  | ||||||
| 2.41.0 |  | ||||||
| 
 |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user