From 0c12dfe26a362db181e6172cb56a39cd002a6892 Mon Sep 17 00:00:00 2001 From: yonghong-song Date: Sun, 18 Jul 2021 15:05:34 -0700 Subject: [PATCH] sync with latest libbpf repo (#3529) sync with latest libbpf repo which is upto commit 21f90f61b084 sync: latest libbpf changes from kernel Signed-off-by: Yonghong Song --- docs/kernel-versions.md | 8 ++ introspection/bps.c | 1 + src/cc/compat/linux/virtual_bpf.h | 167 ++++++++++++++++++++++++++++-- src/cc/export/helpers.h | 17 +++ src/cc/libbpf.c | 8 ++ 5 files changed, 190 insertions(+), 11 deletions(-) diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md index 9192aa43..33318624 100644 --- a/docs/kernel-versions.md +++ b/docs/kernel-versions.md @@ -208,6 +208,7 @@ Helper | Kernel version | License | Commit | -------|----------------|---------|--------| `BPF_FUNC_bind()` | 4.17 | | [`d74bad4e74ee`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d74bad4e74ee373787a9ae24197c17b7cdc428d5) | `BPF_FUNC_bprm_opts_set()` | 5.11 | | [`3f6719c7b62f`](https://github.com/torvalds/linux/commit/3f6719c7b62f0327c9091e26d0da10e65668229e) +`BPF_FUNC_btf_find_by_name_kind()` | 5.14 | | [`3d78417b60fb`](https://github.com/torvalds/linux/commit/3d78417b60fba249cc555468cb72d96f5cde2964) `BPF_FUNC_check_mtu()` | 5.12 | | [`34b2021cc616`](https://github.com/torvalds/linux/commit/34b2021cc61642d61c3cf943d9e71925b827941b) `BPF_FUNC_clone_redirect()` | 4.2 | | [`3896d655f4d4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3896d655f4d491c67d669a15f275a39f713410f8) `BPF_FUNC_copy_from_user()` | 5.10 | | [`07be4c4a3e7a`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=07be4c4a3e7a0db148e44b16c5190e753d1c8569) @@ -226,6 +227,7 @@ Helper | Kernel version | License | Commit | `BPF_FUNC_get_current_task()` | 4.8 | GPL | [`606274c5abd8`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=606274c5abd8e245add01bc7145a8cbb92b69ba8) `BPF_FUNC_get_current_task_btf()` | 5.11 | GPL | [`3ca1032ab7ab`](https://github.com/torvalds/linux/commit/3ca1032ab7ab010eccb107aa515598788f7d93bb) `BPF_FUNC_get_current_uid_gid()` | 4.2 | | [`ffeedafbf023`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89) +`BPF_FUNC_get_func_ip()` | 5.15 | | [`5d8b583d04ae`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=5d8b583d04aedb3bd5f6d227a334c210c7d735f9) `BPF_FUNC_get_hash_recalc()` | 4.8 | | [`13c5c240f789`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=13c5c240f789bbd2bcacb14a23771491485ae61f) `BPF_FUNC_get_listener_sock()` | 5.1 | | [`dbafd7ddd623`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/dbafd7ddd62369b2f3926ab847cbf8fc40e800b7) `BPF_FUNC_get_local_storage()` | 4.19 | | [`cd3394317653`](https://github.com/torvalds/linux/commit/cd3394317653837e2eb5c5d0904a8996102af9fc) @@ -352,6 +354,8 @@ Helper | Kernel version | License | Commit | `BPF_FUNC_store_hdr_opt()` | 5.10 | | [`0813a841566f`](https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit?id=0813a841566f0962a5551be7749b43c45f0022a0) `BPF_FUNC_strtol()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a) `BPF_FUNC_strtoul()` | 5.2 | | [`d7a4cb9b6705`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/d7a4cb9b6705a89937d12c8158a35a3145dc967a) +`BPF_FUNC_sys_bpf()` | 5.14 | | [`79a7f8bdb159`](https://github.com/torvalds/linux/commit/79a7f8bdb159d9914b58740f3d31d602a6e4aca8) +`BPF_FUNC_sys_close()` | 5.14 | | [`3abea089246f`](https://github.com/torvalds/linux/commit/3abea089246f76c1517b054ddb5946f3f1dbd2c0) `BPF_FUNC_sysctl_get_current_value()` | 5.2 | | [`1d11b3016cec`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/1d11b3016cec4ed9770b98e82a61708c8f4926e7) `BPF_FUNC_sysctl_get_name()` | 5.2 | | [`808649fb787d`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/808649fb787d918a48a360a668ee4ee9023f0c11) `BPF_FUNC_sysctl_get_new_value()` | 5.2 | | [`4e63acdff864`](https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4e63acdff864654cee0ac5aaeda3913798ee78f6) @@ -364,6 +368,10 @@ Helper | Kernel version | License | Commit | `BPF_FUNC_tcp_send_ack()` | 5.5 | | [`206057fe020a`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=206057fe020ac5c037d5e2dd6562a9bd216ec765) `BPF_FUNC_tcp_sock()` | 5.1 | | [`655a51e536c0`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=655a51e536c09d15ffa3603b1b6fce2b45b85a1f) `BPF_FUNC_this_cpu_ptr()` | 5.10 | | [`63d9b80dcf2c`](https://github.com/torvalds/linux/commit/63d9b80dcf2c67bc5ade61cbbaa09d7af21f43f1) | +`BPF_FUNC_timer_init()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) +`BPF_FUNC_timer_set_callback()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) +`BPF_FUNC_timer_start()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) +`BPF_FUNC_timer_cancel()` | 5.15 | | [`b00628b1c7d5`](https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=b00628b1c7d595ae5b544e059c27b1f5828314b4) `BPF_FUNC_trace_printk()` | 4.1 | GPL | [`9c959c863f82`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=9c959c863f8217a2ff3d7c296e8223654d240569) `BPF_FUNC_xdp_adjust_head()` | 4.10 | | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03) `BPF_FUNC_xdp_adjust_meta()` | 4.15 | | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da) diff --git a/introspection/bps.c b/introspection/bps.c index e92da3f6..25a88cbd 100644 --- a/introspection/bps.c +++ b/introspection/bps.c @@ -47,6 +47,7 @@ static const char * const prog_type_strings[] = { [BPF_PROG_TYPE_EXT] = "ext", [BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", }; static const char * const map_type_strings[] = { diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h index 3490bc14..bf4bc3a6 100644 --- a/src/cc/compat/linux/virtual_bpf.h +++ b/src/cc/compat/linux/virtual_bpf.h @@ -325,9 +325,6 @@ union bpf_iter_link_info { * **BPF_PROG_TYPE_SK_LOOKUP** * *data_in* and *data_out* must be NULL. * - * **BPF_PROG_TYPE_XDP** - * *ctx_in* and *ctx_out* must be NULL. - * * **BPF_PROG_TYPE_RAW_TRACEPOINT**, * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** * @@ -528,6 +525,15 @@ union bpf_iter_link_info { * Look up an element with the given *key* in the map referred to * by the file descriptor *fd*, and if found, delete the element. * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types * implement this command as a "pop" operation, deleting the top * element rather than one corresponding to *key*. @@ -537,6 +543,10 @@ union bpf_iter_link_info { * This command is only valid for the following map types: * * **BPF_MAP_TYPE_QUEUE** * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -838,6 +848,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, @@ -938,6 +949,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ }; enum bpf_attach_type { @@ -980,6 +992,8 @@ enum bpf_attach_type { BPF_SK_LOOKUP, BPF_XDP, BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, __MAX_BPF_ATTACH_TYPE }; @@ -1098,8 +1112,8 @@ enum bpf_link_type { /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -1107,15 +1121,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -1315,6 +1333,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; + __u32 :32; /* pad */ + __aligned_u64 fd_array; /* array of FDs */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2535,8 +2555,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -3223,7 +3247,7 @@ union bpf_attr { * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -4736,6 +4760,94 @@ union bpf_attr { * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * Return + * Address of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4904,6 +5016,14 @@ union bpf_attr { FN(check_mtu), \ FN(for_each_map_elem), \ FN(snprintf), \ + FN(sys_bpf), \ + FN(btf_find_by_name_kind), \ + FN(sys_close), \ + FN(timer_init), \ + FN(timer_set_callback), \ + FN(timer_start), \ + FN(timer_cancel), \ + FN(get_func_ip), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5081,6 +5201,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ @@ -5365,6 +5491,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 @@ -6010,6 +6150,11 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h index e9137f7f..a4e9b705 100644 --- a/src/cc/export/helpers.h +++ b/src/cc/export/helpers.h @@ -847,6 +847,23 @@ static long (*bpf_snprintf)(char *str, __u32 str_size, const char *fmt, __u64 *data, __u32 data_len) = (void *)BPF_FUNC_snprintf; +static long (*bpf_sys_bpf)(__u32 cmd, void *attr, __u32 attr_size) = + (void *)BPF_FUNC_sys_bpf; +static long (*bpf_btf_find_by_name_kind)(char *name, int name_sz, __u32 kind, int flags) = + (void *)BPF_FUNC_btf_find_by_name_kind; +static long (*bpf_sys_close)(__u32 fd) = (void *)BPF_FUNC_sys_close; + +struct bpf_timer; +static long (*bpf_timer_init)(struct bpf_timer *timer, void *map, __u64 flags) = + (void *)BPF_FUNC_timer_init; +static long (*bpf_timer_set_callback)(struct bpf_timer *timer, void *callback_fn) = + (void *)BPF_FUNC_timer_set_callback; +static long (*bpf_timer_start)(struct bpf_timer *timer, __u64 nsecs, __u64 flags) = + (void *)BPF_FUNC_timer_start; +static long (*bpf_timer_cancel)(struct bpf_timer *timer) = (void *)BPF_FUNC_timer_cancel; + +static __u64 (*bpf_get_func_ip)(void *ctx) = (void *)BPF_FUNC_get_func_ip; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c index b83d68fd..f3608cfe 100644 --- a/src/cc/libbpf.c +++ b/src/cc/libbpf.c @@ -270,6 +270,14 @@ static struct bpf_helper helpers[] = { {"check_mtu", "5.12"}, {"for_each_map_elem", "5.13"}, {"snprintf", "5.13"}, + {"sys_bpf", "5.14"}, + {"btf_find_by_name_kind", "5.14"}, + {"sys_close", "5.14"}, + {"timer_init", "5.15"}, + {"timer_set_callback", "5.15"}, + {"timer_start", "5.15"}, + {"timer_cancel", "5.15"}, + {"get_func_ip", "5.15"}, }; static uint64_t ptr_to_u64(void *ptr) -- 2.31.1