import iproute-5.12.0-3.el8
This commit is contained in:
parent
a70959b525
commit
0ea090084e
123
SOURCES/0003-mptcp-add-support-for-port-based-endpoint.patch
Normal file
123
SOURCES/0003-mptcp-add-support-for-port-based-endpoint.patch
Normal file
@ -0,0 +1,123 @@
|
||||
From 0ccd2dbb3eca44a892a183db8c2e4221488ecf51 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <0ccd2dbb3eca44a892a183db8c2e4221488ecf51.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Mon, 9 Aug 2021 15:18:11 +0200
|
||||
Subject: [PATCH] mptcp: add support for port based endpoint
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1984733
|
||||
Upstream Status: iproute2.git commit 42fbca91
|
||||
|
||||
commit 42fbca91cd616ae714c3f6aa2d4e2c3399498e38
|
||||
Author: Paolo Abeni <pabeni@redhat.com>
|
||||
Date: Fri Feb 19 21:42:55 2021 +0100
|
||||
|
||||
mptcp: add support for port based endpoint
|
||||
|
||||
The feature is supported by the kernel since 5.11-net-next,
|
||||
let's allow user-space to use it.
|
||||
|
||||
Just parse and dump an additional, per endpoint, u16 attribute
|
||||
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
Signed-off-by: David Ahern <dsahern@kernel.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
ip/ipmptcp.c | 16 ++++++++++++++--
|
||||
man/man8/ip-mptcp.8 | 8 ++++++++
|
||||
2 files changed, 22 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ip/ipmptcp.c b/ip/ipmptcp.c
|
||||
index e1ffafb3..5f659b59 100644
|
||||
--- a/ip/ipmptcp.c
|
||||
+++ b/ip/ipmptcp.c
|
||||
@@ -17,7 +17,7 @@ static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: ip mptcp endpoint add ADDRESS [ dev NAME ] [ id ID ]\n"
|
||||
- " [ FLAG-LIST ]\n"
|
||||
+ " [ port NR ] [ FLAG-LIST ]\n"
|
||||
" ip mptcp endpoint delete id ID\n"
|
||||
" ip mptcp endpoint show [ id ID ]\n"
|
||||
" ip mptcp endpoint flush\n"
|
||||
@@ -97,6 +97,7 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
|
||||
bool id_set = false;
|
||||
__u32 index = 0;
|
||||
__u32 flags = 0;
|
||||
+ __u16 port = 0;
|
||||
__u8 id = 0;
|
||||
|
||||
ll_init_map(&rth);
|
||||
@@ -123,6 +124,10 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
|
||||
if (!index)
|
||||
invarg("device does not exist\n", ifname);
|
||||
|
||||
+ } else if (matches(*argv, "port") == 0) {
|
||||
+ NEXT_ARG();
|
||||
+ if (get_u16(&port, *argv, 0))
|
||||
+ invarg("expected port", *argv);
|
||||
} else if (get_addr(&address, *argv, AF_UNSPEC) == 0) {
|
||||
addr_set = true;
|
||||
} else {
|
||||
@@ -145,6 +150,8 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
|
||||
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_FLAGS, flags);
|
||||
if (index)
|
||||
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_IF_IDX, index);
|
||||
+ if (port)
|
||||
+ addattr16(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_PORT, port);
|
||||
if (addr_set) {
|
||||
int type;
|
||||
|
||||
@@ -181,8 +188,8 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo)
|
||||
__u8 family = AF_UNSPEC, addr_attr_type;
|
||||
const char *ifname;
|
||||
unsigned int flags;
|
||||
+ __u16 id, port;
|
||||
int index;
|
||||
- __u16 id;
|
||||
|
||||
parse_rtattr_nested(tb, MPTCP_PM_ADDR_ATTR_MAX, addrinfo);
|
||||
|
||||
@@ -196,6 +203,11 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo)
|
||||
print_string(PRINT_ANY, "address", "%s ",
|
||||
format_host_rta(family, tb[addr_attr_type]));
|
||||
}
|
||||
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
|
||||
+ port = rta_getattr_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]);
|
||||
+ if (port)
|
||||
+ print_uint(PRINT_ANY, "port", "port %u ", port);
|
||||
+ }
|
||||
if (tb[MPTCP_PM_ADDR_ATTR_ID]) {
|
||||
id = rta_getattr_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
|
||||
print_uint(PRINT_ANY, "id", "id %u ", id);
|
||||
diff --git a/man/man8/ip-mptcp.8 b/man/man8/ip-mptcp.8
|
||||
index ef8409ea..98cb93b9 100644
|
||||
--- a/man/man8/ip-mptcp.8
|
||||
+++ b/man/man8/ip-mptcp.8
|
||||
@@ -20,6 +20,8 @@ ip-mptcp \- MPTCP path manager configuration
|
||||
.ti -8
|
||||
.BR "ip mptcp endpoint add "
|
||||
.IR IFADDR
|
||||
+.RB "[ " port
|
||||
+.IR PORT " ]"
|
||||
.RB "[ " dev
|
||||
.IR IFNAME " ]"
|
||||
.RB "[ " id
|
||||
@@ -87,6 +89,12 @@ ip mptcp endpoint flush flush all existing MPTCP endpoints
|
||||
.TE
|
||||
|
||||
.TP
|
||||
+.IR PORT
|
||||
+When a port number is specified, incoming MPTCP subflows for already
|
||||
+established MPTCP sockets will be accepted on the specified port, regardless
|
||||
+the original listener port accepting the first MPTCP subflow and/or
|
||||
+this peer being actually on the client side.
|
||||
+
|
||||
.IR ID
|
||||
is a unique numeric identifier for the given endpoint
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
986
SOURCES/0004-Update-kernel-headers.patch
Normal file
986
SOURCES/0004-Update-kernel-headers.patch
Normal file
@ -0,0 +1,986 @@
|
||||
From 2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Wed, 11 Aug 2021 12:55:14 +0200
|
||||
Subject: [PATCH] Update kernel headers
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
|
||||
Upstream Status: iproute2.git commit a5b355c0
|
||||
|
||||
commit a5b355c08c62fb5b3a42d0e27ef05571c7b30e2e
|
||||
Author: David Ahern <dsahern@kernel.org>
|
||||
Date: Fri Mar 19 14:59:17 2021 +0000
|
||||
|
||||
Update kernel headers
|
||||
|
||||
Update kernel headers to commit:
|
||||
38cb57602369 ("selftests: net: forwarding: Fix a typo")
|
||||
|
||||
Signed-off-by: David Ahern <dsahern@kernel.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
include/uapi/linux/bpf.h | 764 ++++++++++++++++++++++++++++++++-
|
||||
include/uapi/linux/btf.h | 5 +-
|
||||
include/uapi/linux/nexthop.h | 47 +-
|
||||
include/uapi/linux/pkt_cls.h | 2 +
|
||||
include/uapi/linux/rtnetlink.h | 7 +
|
||||
5 files changed, 818 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
|
||||
index b1aba6af..502934f7 100644
|
||||
--- a/include/uapi/linux/bpf.h
|
||||
+++ b/include/uapi/linux/bpf.h
|
||||
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
|
||||
} map;
|
||||
};
|
||||
|
||||
-/* BPF syscall commands, see bpf(2) man-page for details. */
|
||||
+/* BPF syscall commands, see bpf(2) man-page for more details. */
|
||||
+/**
|
||||
+ * DOC: eBPF Syscall Preamble
|
||||
+ *
|
||||
+ * The operation to be performed by the **bpf**\ () system call is determined
|
||||
+ * by the *cmd* argument. Each operation takes an accompanying argument,
|
||||
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
|
||||
+ * below). The size argument is the size of the union pointed to by *attr*.
|
||||
+ */
|
||||
+/**
|
||||
+ * DOC: eBPF Syscall Commands
|
||||
+ *
|
||||
+ * BPF_MAP_CREATE
|
||||
+ * Description
|
||||
+ * Create a map and return a file descriptor that refers to the
|
||||
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
|
||||
+ * is automatically enabled for the new file descriptor.
|
||||
+ *
|
||||
+ * Applying **close**\ (2) to the file descriptor returned by
|
||||
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_MAP_LOOKUP_ELEM
|
||||
+ * Description
|
||||
+ * Look up an element with a given *key* in the map referred to
|
||||
+ * by the file descriptor *map_fd*.
|
||||
+ *
|
||||
+ * The *flags* argument may be specified as one of the
|
||||
+ * following:
|
||||
+ *
|
||||
+ * **BPF_F_LOCK**
|
||||
+ * Look up the value of a spin-locked map without
|
||||
+ * returning the lock. This must be specified if the
|
||||
+ * elements contain a spinlock.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_UPDATE_ELEM
|
||||
+ * Description
|
||||
+ * Create or update an element (key/value pair) in a specified map.
|
||||
+ *
|
||||
+ * The *flags* argument should be specified as one of the
|
||||
+ * following:
|
||||
+ *
|
||||
+ * **BPF_ANY**
|
||||
+ * Create a new element or update an existing element.
|
||||
+ * **BPF_NOEXIST**
|
||||
+ * Create a new element only if it did not exist.
|
||||
+ * **BPF_EXIST**
|
||||
+ * Update an existing element.
|
||||
+ * **BPF_F_LOCK**
|
||||
+ * Update a spin_lock-ed map element.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
|
||||
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
|
||||
+ *
|
||||
+ * **E2BIG**
|
||||
+ * The number of elements in the map reached the
|
||||
+ * *max_entries* limit specified at map creation time.
|
||||
+ * **EEXIST**
|
||||
+ * If *flags* specifies **BPF_NOEXIST** and the element
|
||||
+ * with *key* already exists in the map.
|
||||
+ * **ENOENT**
|
||||
+ * If *flags* specifies **BPF_EXIST** and the element with
|
||||
+ * *key* does not exist in the map.
|
||||
+ *
|
||||
+ * BPF_MAP_DELETE_ELEM
|
||||
+ * Description
|
||||
+ * Look up and delete an element by key in a specified map.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_GET_NEXT_KEY
|
||||
+ * Description
|
||||
+ * Look up an element by key in a specified map and return the key
|
||||
+ * of the next element. Can be used to iterate over all elements
|
||||
+ * in the map.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * The following cases can be used to iterate over all elements of
|
||||
+ * the map:
|
||||
+ *
|
||||
+ * * If *key* is not found, the operation returns zero and sets
|
||||
+ * the *next_key* pointer to the key of the first element.
|
||||
+ * * If *key* is found, the operation returns zero and sets the
|
||||
+ * *next_key* pointer to the key of the next element.
|
||||
+ * * If *key* is the last element, returns -1 and *errno* is set
|
||||
+ * to **ENOENT**.
|
||||
+ *
|
||||
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
|
||||
+ * **EINVAL** on error.
|
||||
+ *
|
||||
+ * BPF_PROG_LOAD
|
||||
+ * Description
|
||||
+ * Verify and load an eBPF program, returning a new file
|
||||
+ * descriptor associated with the program.
|
||||
+ *
|
||||
+ * Applying **close**\ (2) to the file descriptor returned by
|
||||
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
|
||||
+ *
|
||||
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
|
||||
+ * automatically enabled for the new file descriptor.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_OBJ_PIN
|
||||
+ * Description
|
||||
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
|
||||
+ * to the provided *pathname* on the filesystem.
|
||||
+ *
|
||||
+ * The *pathname* argument must not contain a dot (".").
|
||||
+ *
|
||||
+ * On success, *pathname* retains a reference to the eBPF object,
|
||||
+ * preventing deallocation of the object when the original
|
||||
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
|
||||
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
|
||||
+ * process.
|
||||
+ *
|
||||
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
|
||||
+ * unpins the object from the filesystem, removing the reference.
|
||||
+ * If no other file descriptors or filesystem nodes refer to the
|
||||
+ * same object, it will be deallocated (see NOTES).
|
||||
+ *
|
||||
+ * The filesystem type for the parent directory of *pathname* must
|
||||
+ * be **BPF_FS_MAGIC**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_OBJ_GET
|
||||
+ * Description
|
||||
+ * Open a file descriptor for the eBPF object pinned to the
|
||||
+ * specified *pathname*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_PROG_ATTACH
|
||||
+ * Description
|
||||
+ * Attach an eBPF program to a *target_fd* at the specified
|
||||
+ * *attach_type* hook.
|
||||
+ *
|
||||
+ * The *attach_type* specifies the eBPF attachment point to
|
||||
+ * attach the program to, and must be one of *bpf_attach_type*
|
||||
+ * (see below).
|
||||
+ *
|
||||
+ * The *attach_bpf_fd* must be a valid file descriptor for a
|
||||
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
|
||||
+ * or sock_ops type corresponding to the specified *attach_type*.
|
||||
+ *
|
||||
+ * The *target_fd* must be a valid file descriptor for a kernel
|
||||
+ * object which depends on the attach type of *attach_bpf_fd*:
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
+ * **BPF_PROG_TYPE_SOCK_OPS**
|
||||
+ *
|
||||
+ * Control Group v2 hierarchy with the eBPF controller
|
||||
+ * enabled. Requires the kernel to be compiled with
|
||||
+ * **CONFIG_CGROUP_BPF**.
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
+ *
|
||||
+ * Network namespace (eg /proc/self/ns/net).
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
+ *
|
||||
+ * LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_SK_SKB**,
|
||||
+ * **BPF_PROG_TYPE_SK_MSG**
|
||||
+ *
|
||||
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_PROG_DETACH
|
||||
+ * Description
|
||||
+ * Detach the eBPF program associated with the *target_fd* at the
|
||||
+ * hook specified by *attach_type*. The program must have been
|
||||
+ * previously attached using **BPF_PROG_ATTACH**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_PROG_TEST_RUN
|
||||
+ * Description
|
||||
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
|
||||
+ * number of times against a provided program context *ctx_in* and
|
||||
+ * data *data_in*, and return the modified program context
|
||||
+ * *ctx_out*, *data_out* (for example, packet data), result of the
|
||||
+ * execution *retval*, and *duration* of the test run.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * **ENOSPC**
|
||||
+ * Either *data_size_out* or *ctx_size_out* is too small.
|
||||
+ * **ENOTSUPP**
|
||||
+ * This command is not supported by the program type of
|
||||
+ * the program referred to by *prog_fd*.
|
||||
+ *
|
||||
+ * BPF_PROG_GET_NEXT_ID
|
||||
+ * Description
|
||||
+ * Fetch the next eBPF program currently loaded into the kernel.
|
||||
+ *
|
||||
+ * Looks for the eBPF program with an id greater than *start_id*
|
||||
+ * and updates *next_id* on success. If no other eBPF programs
|
||||
+ * remain with ids higher than *start_id*, returns -1 and sets
|
||||
+ * *errno* to **ENOENT**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, or when no id remains, -1
|
||||
+ * is returned and *errno* is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_GET_NEXT_ID
|
||||
+ * Description
|
||||
+ * Fetch the next eBPF map currently loaded into the kernel.
|
||||
+ *
|
||||
+ * Looks for the eBPF map with an id greater than *start_id*
|
||||
+ * and updates *next_id* on success. If no other eBPF maps
|
||||
+ * remain with ids higher than *start_id*, returns -1 and sets
|
||||
+ * *errno* to **ENOENT**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, or when no id remains, -1
|
||||
+ * is returned and *errno* is set appropriately.
|
||||
+ *
|
||||
+ * BPF_PROG_GET_FD_BY_ID
|
||||
+ * Description
|
||||
+ * Open a file descriptor for the eBPF program corresponding to
|
||||
+ * *prog_id*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_MAP_GET_FD_BY_ID
|
||||
+ * Description
|
||||
+ * Open a file descriptor for the eBPF map corresponding to
|
||||
+ * *map_id*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_OBJ_GET_INFO_BY_FD
|
||||
+ * Description
|
||||
+ * Obtain information about the eBPF object corresponding to
|
||||
+ * *bpf_fd*.
|
||||
+ *
|
||||
+ * Populates up to *info_len* bytes of *info*, which will be in
|
||||
+ * one of the following formats depending on the eBPF object type
|
||||
+ * of *bpf_fd*:
|
||||
+ *
|
||||
+ * * **struct bpf_prog_info**
|
||||
+ * * **struct bpf_map_info**
|
||||
+ * * **struct bpf_btf_info**
|
||||
+ * * **struct bpf_link_info**
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_PROG_QUERY
|
||||
+ * Description
|
||||
+ * Obtain information about eBPF programs associated with the
|
||||
+ * specified *attach_type* hook.
|
||||
+ *
|
||||
+ * The *target_fd* must be a valid file descriptor for a kernel
|
||||
+ * object which depends on the attach type of *attach_bpf_fd*:
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
|
||||
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
|
||||
+ * **BPF_PROG_TYPE_SOCK_OPS**
|
||||
+ *
|
||||
+ * Control Group v2 hierarchy with the eBPF controller
|
||||
+ * enabled. Requires the kernel to be compiled with
|
||||
+ * **CONFIG_CGROUP_BPF**.
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
|
||||
+ *
|
||||
+ * Network namespace (eg /proc/self/ns/net).
|
||||
+ *
|
||||
+ * **BPF_PROG_TYPE_LIRC_MODE2**
|
||||
+ *
|
||||
+ * LIRC device path (eg /dev/lircN). Requires the kernel
|
||||
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
|
||||
+ *
|
||||
+ * **BPF_PROG_QUERY** always fetches the number of programs
|
||||
+ * attached and the *attach_flags* which were used to attach those
|
||||
+ * programs. Additionally, if *prog_ids* is nonzero and the number
|
||||
+ * of attached programs is less than *prog_cnt*, populates
|
||||
+ * *prog_ids* with the eBPF program ids of the programs attached
|
||||
+ * at *target_fd*.
|
||||
+ *
|
||||
+ * The following flags may alter the result:
|
||||
+ *
|
||||
+ * **BPF_F_QUERY_EFFECTIVE**
|
||||
+ * Only return information regarding programs which are
|
||||
+ * currently effective at the specified *target_fd*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_RAW_TRACEPOINT_OPEN
|
||||
+ * Description
|
||||
+ * Attach an eBPF program to a tracepoint *name* to access kernel
|
||||
+ * internal arguments of the tracepoint in their raw form.
|
||||
+ *
|
||||
+ * The *prog_fd* must be a valid file descriptor associated with
|
||||
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
|
||||
+ *
|
||||
+ * No ABI guarantees are made about the content of tracepoint
|
||||
+ * arguments exposed to the corresponding eBPF program.
|
||||
+ *
|
||||
+ * Applying **close**\ (2) to the file descriptor returned by
|
||||
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_BTF_LOAD
|
||||
+ * Description
|
||||
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
|
||||
+ * returning a new file descriptor associated with the metadata.
|
||||
+ * BTF is described in more detail at
|
||||
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
|
||||
+ *
|
||||
+ * The *btf* parameter must point to valid memory providing
|
||||
+ * *btf_size* bytes of BTF binary metadata.
|
||||
+ *
|
||||
+ * The returned file descriptor can be passed to other **bpf**\ ()
|
||||
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
|
||||
+ * associate the BTF with those objects.
|
||||
+ *
|
||||
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
|
||||
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
|
||||
+ * *btf_log_level* which allow the kernel to return freeform log
|
||||
+ * output regarding the BTF verification process.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_BTF_GET_FD_BY_ID
|
||||
+ * Description
|
||||
+ * Open a file descriptor for the BPF Type Format (BTF)
|
||||
+ * corresponding to *btf_id*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_TASK_FD_QUERY
|
||||
+ * Description
|
||||
+ * Obtain information about eBPF programs associated with the
|
||||
+ * target process identified by *pid* and *fd*.
|
||||
+ *
|
||||
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
|
||||
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
|
||||
+ * be populated with the eBPF program id and file descriptor type
|
||||
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
|
||||
+ * uprobe, the *probe_offset* and *probe_addr* will also be
|
||||
+ * populated. Optionally, if *buf* is provided, then up to
|
||||
+ * *buf_len* bytes of *buf* will be populated with the name of
|
||||
+ * the tracepoint, kprobe or uprobe.
|
||||
+ *
|
||||
+ * The resulting *prog_id* may be introspected in deeper detail
|
||||
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
|
||||
+ * Description
|
||||
+ * Look up an element with the given *key* in the map referred to
|
||||
+ * by the file descriptor *fd*, and if found, delete the element.
|
||||
+ *
|
||||
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
|
||||
+ * implement this command as a "pop" operation, deleting the top
|
||||
+ * element rather than one corresponding to *key*.
|
||||
+ * The *key* and *key_len* parameters should be zeroed when
|
||||
+ * issuing this operation for these map types.
|
||||
+ *
|
||||
+ * This command is only valid for the following map types:
|
||||
+ * * **BPF_MAP_TYPE_QUEUE**
|
||||
+ * * **BPF_MAP_TYPE_STACK**
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_FREEZE
|
||||
+ * Description
|
||||
+ * Freeze the permissions of the specified map.
|
||||
+ *
|
||||
+ * Write permissions may be frozen by passing zero *flags*.
|
||||
+ * Upon success, no future syscall invocations may alter the
|
||||
+ * map state of *map_fd*. Write operations from eBPF programs
|
||||
+ * are still possible for a frozen map.
|
||||
+ *
|
||||
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_BTF_GET_NEXT_ID
|
||||
+ * Description
|
||||
+ * Fetch the next BPF Type Format (BTF) object currently loaded
|
||||
+ * into the kernel.
|
||||
+ *
|
||||
+ * Looks for the BTF object with an id greater than *start_id*
|
||||
+ * and updates *next_id* on success. If no other BTF objects
|
||||
+ * remain with ids higher than *start_id*, returns -1 and sets
|
||||
+ * *errno* to **ENOENT**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, or when no id remains, -1
|
||||
+ * is returned and *errno* is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_LOOKUP_BATCH
|
||||
+ * Description
|
||||
+ * Iterate and fetch multiple elements in a map.
|
||||
+ *
|
||||
+ * Two opaque values are used to manage batch operations,
|
||||
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
|
||||
+ * to NULL to begin the batched operation. After each subsequent
|
||||
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
|
||||
+ * *out_batch* as the *in_batch* for the next operation to
|
||||
+ * continue iteration from the current point.
|
||||
+ *
|
||||
+ * The *keys* and *values* are output parameters which must point
|
||||
+ * to memory large enough to hold *count* items based on the key
|
||||
+ * and value size of the map *map_fd*. The *keys* buffer must be
|
||||
+ * of *key_size* * *count*. The *values* buffer must be of
|
||||
+ * *value_size* * *count*.
|
||||
+ *
|
||||
+ * The *elem_flags* argument may be specified as one of the
|
||||
+ * following:
|
||||
+ *
|
||||
+ * **BPF_F_LOCK**
|
||||
+ * Look up the value of a spin-locked map without
|
||||
+ * returning the lock. This must be specified if the
|
||||
+ * elements contain a spinlock.
|
||||
+ *
|
||||
+ * On success, *count* elements from the map are copied into the
|
||||
+ * user buffer, with the keys copied into *keys* and the values
|
||||
+ * copied into the corresponding indices in *values*.
|
||||
+ *
|
||||
+ * If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
+ * is set to the number of successfully processed elements.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
|
||||
+ * *values* is too small to dump an entire bucket during
|
||||
+ * iteration of a hash-based map type.
|
||||
+ *
|
||||
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
|
||||
+ * Description
|
||||
+ * Iterate and delete all elements in a map.
|
||||
+ *
|
||||
+ * This operation has the same behavior as
|
||||
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
|
||||
+ *
|
||||
+ * * Every element that is successfully returned is also deleted
|
||||
+ * from the map. This is at least *count* elements. Note that
|
||||
+ * *count* is both an input and an output parameter.
|
||||
+ * * Upon returning with *errno* set to **EFAULT**, up to
|
||||
+ * *count* elements may be deleted without returning the keys
|
||||
+ * and values of the deleted elements.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_MAP_UPDATE_BATCH
|
||||
+ * Description
|
||||
+ * Update multiple elements in a map by *key*.
|
||||
+ *
|
||||
+ * The *keys* and *values* are input parameters which must point
|
||||
+ * to memory large enough to hold *count* items based on the key
|
||||
+ * and value size of the map *map_fd*. The *keys* buffer must be
|
||||
+ * of *key_size* * *count*. The *values* buffer must be of
|
||||
+ * *value_size* * *count*.
|
||||
+ *
|
||||
+ * Each element specified in *keys* is sequentially updated to the
|
||||
+ * value in the corresponding index in *values*. The *in_batch*
|
||||
+ * and *out_batch* parameters are ignored and should be zeroed.
|
||||
+ *
|
||||
+ * The *elem_flags* argument should be specified as one of the
|
||||
+ * following:
|
||||
+ *
|
||||
+ * **BPF_ANY**
|
||||
+ * Create new elements or update a existing elements.
|
||||
+ * **BPF_NOEXIST**
|
||||
+ * Create new elements only if they do not exist.
|
||||
+ * **BPF_EXIST**
|
||||
+ * Update existing elements.
|
||||
+ * **BPF_F_LOCK**
|
||||
+ * Update spin_lock-ed map elements. This must be
|
||||
+ * specified if the map value contains a spinlock.
|
||||
+ *
|
||||
+ * On success, *count* elements from the map are updated.
|
||||
+ *
|
||||
+ * If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
+ * is set to the number of successfully processed elements.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
|
||||
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
|
||||
+ * the map reached the *max_entries* limit specified at map
|
||||
+ * creation time.
|
||||
+ *
|
||||
+ * May set *errno* to one of the following error codes under
|
||||
+ * specific circumstances:
|
||||
+ *
|
||||
+ * **EEXIST**
|
||||
+ * If *flags* specifies **BPF_NOEXIST** and the element
|
||||
+ * with *key* already exists in the map.
|
||||
+ * **ENOENT**
|
||||
+ * If *flags* specifies **BPF_EXIST** and the element with
|
||||
+ * *key* does not exist in the map.
|
||||
+ *
|
||||
+ * BPF_MAP_DELETE_BATCH
|
||||
+ * Description
|
||||
+ * Delete multiple elements in a map by *key*.
|
||||
+ *
|
||||
+ * The *keys* parameter is an input parameter which must point
|
||||
+ * to memory large enough to hold *count* items based on the key
|
||||
+ * size of the map *map_fd*, that is, *key_size* * *count*.
|
||||
+ *
|
||||
+ * Each element specified in *keys* is sequentially deleted. The
|
||||
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
|
||||
+ * and should be zeroed.
|
||||
+ *
|
||||
+ * The *elem_flags* argument may be specified as one of the
|
||||
+ * following:
|
||||
+ *
|
||||
+ * **BPF_F_LOCK**
|
||||
+ * Look up the value of a spin-locked map without
|
||||
+ * returning the lock. This must be specified if the
|
||||
+ * elements contain a spinlock.
|
||||
+ *
|
||||
+ * On success, *count* elements from the map are updated.
|
||||
+ *
|
||||
+ * If an error is returned and *errno* is not **EFAULT**, *count*
|
||||
+ * is set to the number of successfully processed elements. If
|
||||
+ * *errno* is **EFAULT**, up to *count* elements may be been
|
||||
+ * deleted.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_LINK_CREATE
|
||||
+ * Description
|
||||
+ * Attach an eBPF program to a *target_fd* at the specified
|
||||
+ * *attach_type* hook and return a file descriptor handle for
|
||||
+ * managing the link.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_LINK_UPDATE
|
||||
+ * Description
|
||||
+ * Update the eBPF program in the specified *link_fd* to
|
||||
+ * *new_prog_fd*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_LINK_GET_FD_BY_ID
|
||||
+ * Description
|
||||
+ * Open a file descriptor for the eBPF Link corresponding to
|
||||
+ * *link_id*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_LINK_GET_NEXT_ID
|
||||
+ * Description
|
||||
+ * Fetch the next eBPF link currently loaded into the kernel.
|
||||
+ *
|
||||
+ * Looks for the eBPF link with an id greater than *start_id*
|
||||
+ * and updates *next_id* on success. If no other eBPF links
|
||||
+ * remain with ids higher than *start_id*, returns -1 and sets
|
||||
+ * *errno* to **ENOENT**.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, or when no id remains, -1
|
||||
+ * is returned and *errno* is set appropriately.
|
||||
+ *
|
||||
+ * BPF_ENABLE_STATS
|
||||
+ * Description
|
||||
+ * Enable eBPF runtime statistics gathering.
|
||||
+ *
|
||||
+ * Runtime statistics gathering for the eBPF runtime is disabled
|
||||
+ * by default to minimize the corresponding performance overhead.
|
||||
+ * This command enables statistics globally.
|
||||
+ *
|
||||
+ * Multiple programs may independently enable statistics.
|
||||
+ * After gathering the desired statistics, eBPF runtime statistics
|
||||
+ * may be disabled again by calling **close**\ (2) for the file
|
||||
+ * descriptor returned by this function. Statistics will only be
|
||||
+ * disabled system-wide when all outstanding file descriptors
|
||||
+ * returned by prior calls for this subcommand are closed.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_ITER_CREATE
|
||||
+ * Description
|
||||
+ * Create an iterator on top of the specified *link_fd* (as
|
||||
+ * previously created using **BPF_LINK_CREATE**) and return a
|
||||
+ * file descriptor that can be used to trigger the iteration.
|
||||
+ *
|
||||
+ * If the resulting file descriptor is pinned to the filesystem
|
||||
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
|
||||
+ * for that path will trigger the iterator to read kernel state
|
||||
+ * using the eBPF program attached to *link_fd*.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * A new file descriptor (a nonnegative integer), or -1 if an
|
||||
+ * error occurred (in which case, *errno* is set appropriately).
|
||||
+ *
|
||||
+ * BPF_LINK_DETACH
|
||||
+ * Description
|
||||
+ * Forcefully detach the specified *link_fd* from its
|
||||
+ * corresponding attachment point.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * BPF_PROG_BIND_MAP
|
||||
+ * Description
|
||||
+ * Bind a map to the lifetime of an eBPF program.
|
||||
+ *
|
||||
+ * The map identified by *map_fd* is bound to the program
|
||||
+ * identified by *prog_fd* and only released when *prog_fd* is
|
||||
+ * released. This may be used in cases where metadata should be
|
||||
+ * associated with a program which otherwise does not contain any
|
||||
+ * references to the map (for example, embedded in the eBPF
|
||||
+ * program instructions).
|
||||
+ *
|
||||
+ * Return
|
||||
+ * Returns zero on success. On error, -1 is returned and *errno*
|
||||
+ * is set appropriately.
|
||||
+ *
|
||||
+ * NOTES
|
||||
+ * eBPF objects (maps and programs) can be shared between processes.
|
||||
+ *
|
||||
+ * * After **fork**\ (2), the child inherits file descriptors
|
||||
+ * referring to the same eBPF objects.
|
||||
+ * * File descriptors referring to eBPF objects can be transferred over
|
||||
+ * **unix**\ (7) domain sockets.
|
||||
+ * * File descriptors referring to eBPF objects can be duplicated in the
|
||||
+ * usual way, using **dup**\ (2) and similar calls.
|
||||
+ * * File descriptors referring to eBPF objects can be pinned to the
|
||||
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
|
||||
+ *
|
||||
+ * An eBPF object is deallocated only after all file descriptors referring
|
||||
+ * to the object have been closed and no references remain pinned to the
|
||||
+ * filesystem or attached (for example, bound to a program or device).
|
||||
+ */
|
||||
enum bpf_cmd {
|
||||
BPF_MAP_CREATE,
|
||||
BPF_MAP_LOOKUP_ELEM,
|
||||
@@ -393,6 +1103,15 @@ enum bpf_link_type {
|
||||
* is struct/union.
|
||||
*/
|
||||
#define BPF_PSEUDO_BTF_ID 3
|
||||
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
|
||||
+ * insn[0].imm: insn offset to the func
|
||||
+ * insn[1].imm: 0
|
||||
+ * insn[0].off: 0
|
||||
+ * insn[1].off: 0
|
||||
+ * ldimm64 rewrite: address of the function
|
||||
+ * verifier type: PTR_TO_FUNC.
|
||||
+ */
|
||||
+#define BPF_PSEUDO_FUNC 4
|
||||
|
||||
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
|
||||
* offset to another bpf function
|
||||
@@ -720,7 +1439,7 @@ union bpf_attr {
|
||||
* parsed and used to produce a manual page. The workflow is the following,
|
||||
* and requires the rst2man utility:
|
||||
*
|
||||
- * $ ./scripts/bpf_helpers_doc.py \
|
||||
+ * $ ./scripts/bpf_doc.py \
|
||||
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
|
||||
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
|
||||
* $ man /tmp/bpf-helpers.7
|
||||
@@ -1765,6 +2484,10 @@ union bpf_attr {
|
||||
* Use with ENCAP_L3/L4 flags to further specify the tunnel
|
||||
* type; *len* is the length of the inner MAC header.
|
||||
*
|
||||
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
|
||||
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
|
||||
+ * L2 type as Ethernet.
|
||||
+ *
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
@@ -3850,7 +4573,7 @@ union bpf_attr {
|
||||
*
|
||||
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
|
||||
* Description
|
||||
- * Check packet size against exceeding MTU of net device (based
|
||||
+ * Check ctx packet size against exceeding MTU of net device (based
|
||||
* on *ifindex*). This helper will likely be used in combination
|
||||
* with helpers that adjust/change the packet size.
|
||||
*
|
||||
@@ -3915,6 +4638,34 @@ union bpf_attr {
|
||||
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
|
||||
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
|
||||
*
|
||||
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
|
||||
+ * Description
|
||||
+ * For each element in **map**, call **callback_fn** function with
|
||||
+ * **map**, **callback_ctx** and other map-specific parameters.
|
||||
+ * The **callback_fn** should be a static function and
|
||||
+ * the **callback_ctx** should be a pointer to the stack.
|
||||
+ * The **flags** is used to control certain aspects of the helper.
|
||||
+ * Currently, the **flags** must be 0.
|
||||
+ *
|
||||
+ * The following are a list of supported map types and their
|
||||
+ * respective expected callback signatures:
|
||||
+ *
|
||||
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
|
||||
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
|
||||
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
|
||||
+ *
|
||||
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
|
||||
+ *
|
||||
+ * For per_cpu maps, the map_value is the value on the cpu where the
|
||||
+ * bpf_prog is running.
|
||||
+ *
|
||||
+ * If **callback_fn** return 0, the helper will continue to the next
|
||||
+ * element. If return value is 1, the helper will skip the rest of
|
||||
+ * elements and return. Other return values are not used now.
|
||||
+ *
|
||||
+ * Return
|
||||
+ * The number of traversed map elements for success, **-EINVAL** for
|
||||
+ * invalid **flags**.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@@ -4081,6 +4832,7 @@ union bpf_attr {
|
||||
FN(ima_inode_hash), \
|
||||
FN(sock_from_file), \
|
||||
FN(check_mtu), \
|
||||
+ FN(for_each_map_elem), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
@@ -4174,6 +4926,7 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -5211,7 +5964,10 @@ struct bpf_pidns_info {
|
||||
|
||||
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
|
||||
struct bpf_sk_lookup {
|
||||
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
+ union {
|
||||
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
|
||||
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
|
||||
+ };
|
||||
|
||||
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
|
||||
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
|
||||
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
|
||||
index 4a42eb48..2c42dcac 100644
|
||||
--- a/include/uapi/linux/btf.h
|
||||
+++ b/include/uapi/linux/btf.h
|
||||
@@ -52,7 +52,7 @@ struct btf_type {
|
||||
};
|
||||
};
|
||||
|
||||
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
#define BTF_INFO_KFLAG(info) ((info) >> 31)
|
||||
|
||||
@@ -72,7 +72,8 @@ struct btf_type {
|
||||
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
|
||||
#define BTF_KIND_VAR 14 /* Variable */
|
||||
#define BTF_KIND_DATASEC 15 /* Section */
|
||||
-#define BTF_KIND_MAX BTF_KIND_DATASEC
|
||||
+#define BTF_KIND_FLOAT 16 /* Floating point */
|
||||
+#define BTF_KIND_MAX BTF_KIND_FLOAT
|
||||
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
|
||||
|
||||
/* For some specific BTF_KIND, "struct btf_type" is immediately
|
||||
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
|
||||
index b0a56139..37b14b4e 100644
|
||||
--- a/include/uapi/linux/nexthop.h
|
||||
+++ b/include/uapi/linux/nexthop.h
|
||||
@@ -21,7 +21,10 @@ struct nexthop_grp {
|
||||
};
|
||||
|
||||
enum {
|
||||
- NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
|
||||
+ NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group
|
||||
+ * default type if not specified
|
||||
+ */
|
||||
+ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */
|
||||
__NEXTHOP_GRP_TYPE_MAX,
|
||||
};
|
||||
|
||||
@@ -52,8 +55,50 @@ enum {
|
||||
NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
|
||||
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
|
||||
|
||||
+ /* nested; resilient nexthop group attributes */
|
||||
+ NHA_RES_GROUP,
|
||||
+ /* nested; nexthop bucket attributes */
|
||||
+ NHA_RES_BUCKET,
|
||||
+
|
||||
__NHA_MAX,
|
||||
};
|
||||
|
||||
#define NHA_MAX (__NHA_MAX - 1)
|
||||
+
|
||||
+enum {
|
||||
+ NHA_RES_GROUP_UNSPEC,
|
||||
+ /* Pad attribute for 64-bit alignment. */
|
||||
+ NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
|
||||
+
|
||||
+ /* u16; number of nexthop buckets in a resilient nexthop group */
|
||||
+ NHA_RES_GROUP_BUCKETS,
|
||||
+ /* clock_t as u32; nexthop bucket idle timer (per-group) */
|
||||
+ NHA_RES_GROUP_IDLE_TIMER,
|
||||
+ /* clock_t as u32; nexthop unbalanced timer */
|
||||
+ NHA_RES_GROUP_UNBALANCED_TIMER,
|
||||
+ /* clock_t as u64; nexthop unbalanced time */
|
||||
+ NHA_RES_GROUP_UNBALANCED_TIME,
|
||||
+
|
||||
+ __NHA_RES_GROUP_MAX,
|
||||
+};
|
||||
+
|
||||
+#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1)
|
||||
+
|
||||
+enum {
|
||||
+ NHA_RES_BUCKET_UNSPEC,
|
||||
+ /* Pad attribute for 64-bit alignment. */
|
||||
+ NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
|
||||
+
|
||||
+ /* u16; nexthop bucket index */
|
||||
+ NHA_RES_BUCKET_INDEX,
|
||||
+ /* clock_t as u64; nexthop bucket idle time */
|
||||
+ NHA_RES_BUCKET_IDLE_TIME,
|
||||
+ /* u32; nexthop id assigned to the nexthop bucket */
|
||||
+ NHA_RES_BUCKET_NH_ID,
|
||||
+
|
||||
+ __NHA_RES_BUCKET_MAX,
|
||||
+};
|
||||
+
|
||||
+#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1)
|
||||
+
|
||||
#endif
|
||||
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
|
||||
index 7ea59cfe..025c40fe 100644
|
||||
--- a/include/uapi/linux/pkt_cls.h
|
||||
+++ b/include/uapi/linux/pkt_cls.h
|
||||
@@ -190,6 +190,8 @@ enum {
|
||||
TCA_POLICE_PAD,
|
||||
TCA_POLICE_RATE64,
|
||||
TCA_POLICE_PEAKRATE64,
|
||||
+ TCA_POLICE_PKTRATE64,
|
||||
+ TCA_POLICE_PKTBURST64,
|
||||
__TCA_POLICE_MAX
|
||||
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
|
||||
};
|
||||
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
|
||||
index b34b9add..f62cccc1 100644
|
||||
--- a/include/uapi/linux/rtnetlink.h
|
||||
+++ b/include/uapi/linux/rtnetlink.h
|
||||
@@ -178,6 +178,13 @@ enum {
|
||||
RTM_GETVLAN,
|
||||
#define RTM_GETVLAN RTM_GETVLAN
|
||||
|
||||
+ RTM_NEWNEXTHOPBUCKET = 116,
|
||||
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
|
||||
+ RTM_DELNEXTHOPBUCKET,
|
||||
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
|
||||
+ RTM_GETNEXTHOPBUCKET,
|
||||
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
|
||||
+
|
||||
__RTM_MAX,
|
||||
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
|
||||
};
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,221 @@
|
||||
From b061aeba93b1c730b7dafeece6b90aad2e7afce8 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <b061aeba93b1c730b7dafeece6b90aad2e7afce8.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Wed, 11 Aug 2021 12:55:14 +0200
|
||||
Subject: [PATCH] police: add support for packet-per-second rate limiting
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
|
||||
Upstream Status: iproute2.git commit cf9ae1bd
|
||||
|
||||
commit cf9ae1bd31187d8ae62bc1bb408e443dbc8bd6a0
|
||||
Author: Baowen Zheng <baowen.zheng@corigine.com>
|
||||
Date: Fri Mar 26 13:50:18 2021 +0100
|
||||
|
||||
police: add support for packet-per-second rate limiting
|
||||
|
||||
Allow a policer action to enforce a rate-limit based on packets-per-second,
|
||||
configurable using a packet-per-second rate and burst parameters.
|
||||
|
||||
e.g.
|
||||
# $TC actions add action police pkts_rate 1000 pkts_burst 200 index 1
|
||||
# $TC actions ls action police
|
||||
total acts 1
|
||||
|
||||
action order 0: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200
|
||||
ref 1 bind 0
|
||||
|
||||
Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
|
||||
Signed-off-by: Simon Horman <simon.horman@netronome.com>
|
||||
Signed-off-by: Louis Peens <louis.peens@netronome.com>
|
||||
Signed-off-by: David Ahern <dsahern@kernel.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
man/man8/tc-police.8 | 35 ++++++++++++++++++++++++-------
|
||||
tc/m_police.c | 50 +++++++++++++++++++++++++++++++++++++++++---
|
||||
2 files changed, 75 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/man/man8/tc-police.8 b/man/man8/tc-police.8
|
||||
index 52279755..86e263bb 100644
|
||||
--- a/man/man8/tc-police.8
|
||||
+++ b/man/man8/tc-police.8
|
||||
@@ -5,9 +5,11 @@ police - policing action
|
||||
.SH SYNOPSIS
|
||||
.in +8
|
||||
.ti -8
|
||||
-.BR tc " ... " "action police"
|
||||
+.BR tc " ... " "action police ["
|
||||
.BI rate " RATE " burst
|
||||
-.IR BYTES [\fB/ BYTES "] ["
|
||||
+.IR BYTES [\fB/ BYTES "] ] ["
|
||||
+.BI pkts_rate " RATE " pkts_burst
|
||||
+.IR PACKETS "] ["
|
||||
.B mtu
|
||||
.IR BYTES [\fB/ BYTES "] ] ["
|
||||
.BI peakrate " RATE"
|
||||
@@ -34,19 +36,29 @@ police - policing action
|
||||
.SH DESCRIPTION
|
||||
The
|
||||
.B police
|
||||
-action allows to limit bandwidth of traffic matched by the filter it is
|
||||
-attached to. Basically there are two different algorithms available to measure
|
||||
-the packet rate: The first one uses an internal dual token bucket and is
|
||||
-configured using the
|
||||
+action allows limiting of the byte or packet rate of traffic matched by the
|
||||
+filter it is attached to.
|
||||
+.P
|
||||
+There are two different algorithms available to measure the byte rate: The
|
||||
+first one uses an internal dual token bucket and is configured using the
|
||||
.BR rate ", " burst ", " mtu ", " peakrate ", " overhead " and " linklayer
|
||||
parameters. The second one uses an in-kernel sampling mechanism. It can be
|
||||
fine-tuned using the
|
||||
.B estimator
|
||||
filter parameter.
|
||||
+.P
|
||||
+There is one algorithm available to measure packet rate and it is similar to
|
||||
+the first algorithm described for byte rate. It is configured using the
|
||||
+.BR pkt_rate " and " pkt_burst
|
||||
+parameters.
|
||||
+.P
|
||||
+At least one of the
|
||||
+.BR rate " and " pkt_rate "
|
||||
+parameters must be configured.
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BI rate " RATE"
|
||||
-The maximum traffic rate of packets passing this action. Those exceeding it will
|
||||
+The maximum byte rate of packets passing this action. Those exceeding it will
|
||||
be treated as defined by the
|
||||
.B conform-exceed
|
||||
option.
|
||||
@@ -55,6 +67,15 @@ option.
|
||||
Set the maximum allowed burst in bytes, optionally followed by a slash ('/')
|
||||
sign and cell size which must be a power of 2.
|
||||
.TP
|
||||
+.BI pkt_rate " RATE"
|
||||
+The maximum packet rate or packets passing this action. Those exceeding it will
|
||||
+be treated as defined by the
|
||||
+.B conform-exceed
|
||||
+option.
|
||||
+.TP
|
||||
+.BI pkt_burst " PACKETS"
|
||||
+Set the maximum allowed burst in packets.
|
||||
+.TP
|
||||
.BI mtu " BYTES\fR[\fB/\fIBYTES\fR]"
|
||||
This is the maximum packet size handled by the policer (larger ones will be
|
||||
handled like they exceeded the configured rate). Setting this value correctly
|
||||
diff --git a/tc/m_police.c b/tc/m_police.c
|
||||
index bb51df68..9ef0e40b 100644
|
||||
--- a/tc/m_police.c
|
||||
+++ b/tc/m_police.c
|
||||
@@ -38,7 +38,8 @@ struct action_util police_action_util = {
|
||||
static void usage(void)
|
||||
{
|
||||
fprintf(stderr,
|
||||
- "Usage: ... police rate BPS burst BYTES[/BYTES] [ mtu BYTES[/BYTES] ]\n"
|
||||
+ "Usage: ... police [ rate BPS burst BYTES[/BYTES] ] \n"
|
||||
+ " [ pkts_rate RATE pkts_burst PACKETS ] [ mtu BYTES[/BYTES] ]\n"
|
||||
" [ peakrate BPS ] [ avrate BPS ] [ overhead BYTES ]\n"
|
||||
" [ linklayer TYPE ] [ CONTROL ]\n"
|
||||
"Where: CONTROL := conform-exceed <EXCEEDACT>[/NOTEXCEEDACT]\n"
|
||||
@@ -67,6 +68,7 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p,
|
||||
int Rcell_log = -1, Pcell_log = -1;
|
||||
struct rtattr *tail;
|
||||
__u64 rate64 = 0, prate64 = 0;
|
||||
+ __u64 pps64 = 0, ppsburst64 = 0;
|
||||
|
||||
if (a) /* new way of doing things */
|
||||
NEXT_ARG();
|
||||
@@ -144,6 +146,18 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p,
|
||||
NEXT_ARG();
|
||||
if (get_linklayer(&linklayer, *argv))
|
||||
invarg("linklayer", *argv);
|
||||
+ } else if (matches(*argv, "pkts_rate") == 0) {
|
||||
+ NEXT_ARG();
|
||||
+ if (pps64)
|
||||
+ duparg("pkts_rate", *argv);
|
||||
+ if (get_u64(&pps64, *argv, 10))
|
||||
+ invarg("pkts_rate", *argv);
|
||||
+ } else if (matches(*argv, "pkts_burst") == 0) {
|
||||
+ NEXT_ARG();
|
||||
+ if (ppsburst64)
|
||||
+ duparg("pkts_burst", *argv);
|
||||
+ if (get_u64(&ppsburst64, *argv, 10))
|
||||
+ invarg("pkts_burst", *argv);
|
||||
} else if (strcmp(*argv, "help") == 0) {
|
||||
usage();
|
||||
} else {
|
||||
@@ -161,8 +175,8 @@ action_ctrl_ok:
|
||||
return -1;
|
||||
|
||||
/* Must at least do late binding, use TB or ewma policing */
|
||||
- if (!rate64 && !avrate && !p.index && !mtu) {
|
||||
- fprintf(stderr, "'rate' or 'avrate' or 'mtu' MUST be specified.\n");
|
||||
+ if (!rate64 && !avrate && !p.index && !mtu && !pps64) {
|
||||
+ fprintf(stderr, "'rate' or 'avrate' or 'mtu' or 'pkts_rate' MUST be specified.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -172,6 +186,18 @@ action_ctrl_ok:
|
||||
return -1;
|
||||
}
|
||||
|
||||
+ /* When the packets TB policer is used, pkts_burst is required */
|
||||
+ if (pps64 && !ppsburst64) {
|
||||
+ fprintf(stderr, "'pkts_burst' requires 'pkts_rate'.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ /* forbid rate and pkts_rate in same action */
|
||||
+ if (pps64 && rate64) {
|
||||
+ fprintf(stderr, "'rate' and 'pkts_rate' are not allowed in same action.\n");
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
if (prate64) {
|
||||
if (!rate64) {
|
||||
fprintf(stderr, "'peakrate' requires 'rate'.\n");
|
||||
@@ -223,6 +249,12 @@ action_ctrl_ok:
|
||||
if (presult)
|
||||
addattr32(n, MAX_MSG, TCA_POLICE_RESULT, presult);
|
||||
|
||||
+ if (pps64) {
|
||||
+ addattr64(n, MAX_MSG, TCA_POLICE_PKTRATE64, pps64);
|
||||
+ ppsburst64 = tc_calc_xmittime(pps64, ppsburst64);
|
||||
+ addattr64(n, MAX_MSG, TCA_POLICE_PKTBURST64, ppsburst64);
|
||||
+ }
|
||||
+
|
||||
addattr_nest_end(n, tail);
|
||||
res = 0;
|
||||
|
||||
@@ -244,6 +276,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
unsigned int buffer;
|
||||
unsigned int linklayer;
|
||||
__u64 rate64, prate64;
|
||||
+ __u64 pps64, ppsburst64;
|
||||
|
||||
if (arg == NULL)
|
||||
return 0;
|
||||
@@ -287,6 +320,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
tc_print_rate(PRINT_FP, NULL, "avrate %s ",
|
||||
rta_getattr_u32(tb[TCA_POLICE_AVRATE]));
|
||||
|
||||
+ if ((tb[TCA_POLICE_PKTRATE64] &&
|
||||
+ RTA_PAYLOAD(tb[TCA_POLICE_PKTRATE64]) >= sizeof(pps64)) &&
|
||||
+ (tb[TCA_POLICE_PKTBURST64] &&
|
||||
+ RTA_PAYLOAD(tb[TCA_POLICE_PKTBURST64]) >= sizeof(ppsburst64))) {
|
||||
+ pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]);
|
||||
+ ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]);
|
||||
+ ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64);
|
||||
+ fprintf(f, "pkts_rate %llu ", pps64);
|
||||
+ fprintf(f, "pkts_burst %llu ", ppsburst64);
|
||||
+ }
|
||||
+
|
||||
print_action_control(f, "action ", p->action, "");
|
||||
|
||||
if (tb[TCA_POLICE_RESULT]) {
|
||||
--
|
||||
2.31.1
|
||||
|
159
SOURCES/0006-police-Add-support-for-json-output.patch
Normal file
159
SOURCES/0006-police-Add-support-for-json-output.patch
Normal file
@ -0,0 +1,159 @@
|
||||
From 04b921c03a4680931df6660b88444f2478fb585c Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <04b921c03a4680931df6660b88444f2478fb585c.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Wed, 11 Aug 2021 12:55:14 +0200
|
||||
Subject: [PATCH] police: Add support for json output
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
|
||||
Upstream Status: iproute2.git commit 0d5cf51e
|
||||
|
||||
commit 0d5cf51e0d6c7bfdc51754381b85367b5f8e254a
|
||||
Author: Roi Dayan <roid@nvidia.com>
|
||||
Date: Mon Jun 7 09:44:08 2021 +0300
|
||||
|
||||
police: Add support for json output
|
||||
|
||||
Change to use the print wrappers instead of fprintf().
|
||||
|
||||
This is example output of the options part before this commit:
|
||||
|
||||
"options": {
|
||||
"handle": 1,
|
||||
"in_hw": true,
|
||||
"actions": [ {
|
||||
"order": 1 police 0x2 ,
|
||||
"control_action": {
|
||||
"type": "drop"
|
||||
},
|
||||
"control_action": {
|
||||
"type": "continue"
|
||||
}overhead 0b linklayer unspec
|
||||
ref 1 bind 1
|
||||
,
|
||||
"used_hw_stats": [ "delayed" ]
|
||||
} ]
|
||||
}
|
||||
|
||||
This is the output of the same dump with this commit:
|
||||
|
||||
"options": {
|
||||
"handle": 1,
|
||||
"in_hw": true,
|
||||
"actions": [ {
|
||||
"order": 1,
|
||||
"kind": "police",
|
||||
"index": 2,
|
||||
"control_action": {
|
||||
"type": "drop"
|
||||
},
|
||||
"control_action": {
|
||||
"type": "continue"
|
||||
},
|
||||
"overhead": 0,
|
||||
"linklayer": "unspec",
|
||||
"ref": 1,
|
||||
"bind": 1,
|
||||
"used_hw_stats": [ "delayed" ]
|
||||
} ]
|
||||
}
|
||||
|
||||
Signed-off-by: Roi Dayan <roid@nvidia.com>
|
||||
Reviewed-by: Paul Blakey <paulb@nvidia.com>
|
||||
Signed-off-by: David Ahern <dsahern@kernel.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
tc/m_police.c | 30 +++++++++++++++++-------------
|
||||
1 file changed, 17 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/tc/m_police.c b/tc/m_police.c
|
||||
index 9ef0e40b..2594c089 100644
|
||||
--- a/tc/m_police.c
|
||||
+++ b/tc/m_police.c
|
||||
@@ -278,18 +278,19 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
__u64 rate64, prate64;
|
||||
__u64 pps64, ppsburst64;
|
||||
|
||||
+ print_string(PRINT_ANY, "kind", "%s", "police");
|
||||
if (arg == NULL)
|
||||
return 0;
|
||||
|
||||
parse_rtattr_nested(tb, TCA_POLICE_MAX, arg);
|
||||
|
||||
if (tb[TCA_POLICE_TBF] == NULL) {
|
||||
- fprintf(f, "[NULL police tbf]");
|
||||
- return 0;
|
||||
+ fprintf(stderr, "[NULL police tbf]");
|
||||
+ return -1;
|
||||
}
|
||||
#ifndef STOOPID_8BYTE
|
||||
if (RTA_PAYLOAD(tb[TCA_POLICE_TBF]) < sizeof(*p)) {
|
||||
- fprintf(f, "[truncated police tbf]");
|
||||
+ fprintf(stderr, "[truncated police tbf]");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
@@ -300,13 +301,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64))
|
||||
rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]);
|
||||
|
||||
- fprintf(f, " police 0x%x ", p->index);
|
||||
+ print_uint(PRINT_ANY, "index", "\t index %u ", p->index);
|
||||
tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64);
|
||||
buffer = tc_calc_xmitsize(rate64, p->burst);
|
||||
print_size(PRINT_FP, NULL, "burst %s ", buffer);
|
||||
print_size(PRINT_FP, NULL, "mtu %s ", p->mtu);
|
||||
if (show_raw)
|
||||
- fprintf(f, "[%08x] ", p->burst);
|
||||
+ print_hex(PRINT_FP, NULL, "[%08x] ", p->burst);
|
||||
|
||||
prate64 = p->peakrate.rate;
|
||||
if (tb[TCA_POLICE_PEAKRATE64] &&
|
||||
@@ -327,8 +328,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]);
|
||||
ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]);
|
||||
ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64);
|
||||
- fprintf(f, "pkts_rate %llu ", pps64);
|
||||
- fprintf(f, "pkts_burst %llu ", ppsburst64);
|
||||
+ print_u64(PRINT_ANY, "pkts_rate", "pkts_rate %llu ", pps64);
|
||||
+ print_u64(PRINT_ANY, "pkts_burst", "pkts_burst %llu ", ppsburst64);
|
||||
}
|
||||
|
||||
print_action_control(f, "action ", p->action, "");
|
||||
@@ -337,14 +338,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
__u32 action = rta_getattr_u32(tb[TCA_POLICE_RESULT]);
|
||||
|
||||
print_action_control(f, "/", action, " ");
|
||||
- } else
|
||||
- fprintf(f, " ");
|
||||
+ } else {
|
||||
+ print_string(PRINT_FP, NULL, " ", NULL);
|
||||
+ }
|
||||
|
||||
- fprintf(f, "overhead %ub ", p->rate.overhead);
|
||||
+ print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead);
|
||||
linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK);
|
||||
if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
|
||||
- fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2));
|
||||
- fprintf(f, "\n\tref %d bind %d", p->refcnt, p->bindcnt);
|
||||
+ print_string(PRINT_ANY, "linklayer", "linklayer %s ",
|
||||
+ sprint_linklayer(linklayer, b2));
|
||||
+ print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt);
|
||||
+ print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt);
|
||||
if (show_stats) {
|
||||
if (tb[TCA_POLICE_TM]) {
|
||||
struct tcf_t *tm = RTA_DATA(tb[TCA_POLICE_TM]);
|
||||
@@ -352,7 +356,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
print_tm(f, tm);
|
||||
}
|
||||
}
|
||||
- fprintf(f, "\n");
|
||||
+ print_nl();
|
||||
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.31.1
|
||||
|
@ -0,0 +1,73 @@
|
||||
From 148b286b52aa8f38d8d7587b598522310067de7b Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <148b286b52aa8f38d8d7587b598522310067de7b.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Wed, 11 Aug 2021 12:55:14 +0200
|
||||
Subject: [PATCH] police: Fix normal output back to what it was
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
|
||||
Upstream Status: iproute2.git commit 71d36000
|
||||
|
||||
commit 71d36000dc9ce8397fc45b680e0c0340df5a28e5
|
||||
Author: Roi Dayan <roid@nvidia.com>
|
||||
Date: Mon Jul 12 15:26:53 2021 +0300
|
||||
|
||||
police: Fix normal output back to what it was
|
||||
|
||||
With the json support fix the normal output was
|
||||
changed. set it back to what it was.
|
||||
Print overhead with print_size().
|
||||
Print newline before ref.
|
||||
|
||||
Fixes: 0d5cf51e0d6c ("police: Add support for json output")
|
||||
Signed-off-by: Roi Dayan <roid@nvidia.com>
|
||||
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
tc/m_police.c | 10 ++++++----
|
||||
1 file changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/tc/m_police.c b/tc/m_police.c
|
||||
index 2594c089..f38ab90a 100644
|
||||
--- a/tc/m_police.c
|
||||
+++ b/tc/m_police.c
|
||||
@@ -278,7 +278,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
__u64 rate64, prate64;
|
||||
__u64 pps64, ppsburst64;
|
||||
|
||||
- print_string(PRINT_ANY, "kind", "%s", "police");
|
||||
+ print_string(PRINT_JSON, "kind", "%s", "police");
|
||||
if (arg == NULL)
|
||||
return 0;
|
||||
|
||||
@@ -301,7 +301,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64))
|
||||
rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]);
|
||||
|
||||
- print_uint(PRINT_ANY, "index", "\t index %u ", p->index);
|
||||
+ print_hex(PRINT_FP, NULL, " police 0x%x ", p->index);
|
||||
+ print_uint(PRINT_JSON, "index", NULL, p->index);
|
||||
tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64);
|
||||
buffer = tc_calc_xmitsize(rate64, p->burst);
|
||||
print_size(PRINT_FP, NULL, "burst %s ", buffer);
|
||||
@@ -342,12 +343,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
|
||||
print_string(PRINT_FP, NULL, " ", NULL);
|
||||
}
|
||||
|
||||
- print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead);
|
||||
+ print_size(PRINT_ANY, "overhead", "overhead %s ", p->rate.overhead);
|
||||
linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK);
|
||||
if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
|
||||
print_string(PRINT_ANY, "linklayer", "linklayer %s ",
|
||||
sprint_linklayer(linklayer, b2));
|
||||
- print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt);
|
||||
+ print_nl();
|
||||
+ print_int(PRINT_ANY, "ref", "\tref %d ", p->refcnt);
|
||||
print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt);
|
||||
if (show_stats) {
|
||||
if (tb[TCA_POLICE_TM]) {
|
||||
--
|
||||
2.31.1
|
||||
|
68
SOURCES/0008-tc-u32-Fix-key-folding-in-sample-option.patch
Normal file
68
SOURCES/0008-tc-u32-Fix-key-folding-in-sample-option.patch
Normal file
@ -0,0 +1,68 @@
|
||||
From 7fcfc0e4d6949ff32df3ed749bad8eb419cebbda Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <7fcfc0e4d6949ff32df3ed749bad8eb419cebbda.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Wed, 11 Aug 2021 14:49:33 +0200
|
||||
Subject: [PATCH] tc: u32: Fix key folding in sample option
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979425
|
||||
Upstream Status: iproute2.git commit 9b7ea92b
|
||||
|
||||
commit 9b7ea92b9e3feff2876f772ace01148b7406839c
|
||||
Author: Phil Sutter <phil@nwl.cc>
|
||||
Date: Wed Aug 4 11:18:28 2021 +0200
|
||||
|
||||
tc: u32: Fix key folding in sample option
|
||||
|
||||
In between Linux kernel 2.4 and 2.6, key folding for hash tables changed
|
||||
in kernel space. When iproute2 dropped support for the older algorithm,
|
||||
the wrong code was removed and kernel 2.4 folding method remained in
|
||||
place. To get things functional for recent kernels again, restoring the
|
||||
old code alone was not sufficient - additional byteorder fixes were
|
||||
needed.
|
||||
|
||||
While being at it, make use of ffs() and thereby align the code with how
|
||||
kernel determines the shift width.
|
||||
|
||||
Fixes: 267480f55383c ("Backout the 2.4 utsname hash patch.")
|
||||
Signed-off-by: Phil Sutter <phil@nwl.cc>
|
||||
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
tc/f_u32.c | 11 ++++++++---
|
||||
1 file changed, 8 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tc/f_u32.c b/tc/f_u32.c
|
||||
index 2ed5254a..a5747f67 100644
|
||||
--- a/tc/f_u32.c
|
||||
+++ b/tc/f_u32.c
|
||||
@@ -978,6 +978,13 @@ show_k:
|
||||
goto show_k;
|
||||
}
|
||||
|
||||
+static __u32 u32_hash_fold(struct tc_u32_key *key)
|
||||
+{
|
||||
+ __u8 fshift = key->mask ? ffs(ntohl(key->mask)) - 1 : 0;
|
||||
+
|
||||
+ return ntohl(key->val & key->mask) >> fshift;
|
||||
+}
|
||||
+
|
||||
static int u32_parse_opt(struct filter_util *qu, char *handle,
|
||||
int argc, char **argv, struct nlmsghdr *n)
|
||||
{
|
||||
@@ -1110,9 +1117,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
|
||||
}
|
||||
NEXT_ARG();
|
||||
}
|
||||
- hash = sel2.keys[0].val & sel2.keys[0].mask;
|
||||
- hash ^= hash >> 16;
|
||||
- hash ^= hash >> 8;
|
||||
+ hash = u32_hash_fold(&sel2.keys[0]);
|
||||
htid = ((hash % divisor) << 12) | (htid & 0xFFF00000);
|
||||
sample_ok = 1;
|
||||
continue;
|
||||
--
|
||||
2.31.1
|
||||
|
84
SOURCES/0009-tc-htb-improve-burst-error-messages.patch
Normal file
84
SOURCES/0009-tc-htb-improve-burst-error-messages.patch
Normal file
@ -0,0 +1,84 @@
|
||||
From 0b66dc13c157f4d34518c06dd774ef39be0df271 Mon Sep 17 00:00:00 2001
|
||||
Message-Id: <0b66dc13c157f4d34518c06dd774ef39be0df271.1628790091.git.aclaudi@redhat.com>
|
||||
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
|
||||
From: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Thu, 12 Aug 2021 18:26:39 +0200
|
||||
Subject: [PATCH] tc: htb: improve burst error messages
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1910745
|
||||
Upstream Status: iproute2.git commit e44786b2
|
||||
|
||||
commit e44786b26934e4fbf337b0af73a9e6f53d458a25
|
||||
Author: Andrea Claudi <aclaudi@redhat.com>
|
||||
Date: Thu May 6 12:42:06 2021 +0200
|
||||
|
||||
tc: htb: improve burst error messages
|
||||
|
||||
When a wrong value is provided for "burst" or "cburst" parameters, the
|
||||
resulting error message is unclear and can be misleading:
|
||||
|
||||
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger
|
||||
Illegal "buffer"
|
||||
|
||||
The message claims an illegal "buffer" is provided, but neither the
|
||||
inline help nor the man page list "buffer" among the htb parameters, and
|
||||
the only way to know that "burst", "maxburst" and "buffer" are synonyms
|
||||
is to look into tc/q_htb.c.
|
||||
|
||||
This commit tries to improve this simply changing the error string to
|
||||
the parameter name provided in the user-given command, clearly pointing
|
||||
out where the wrong value is.
|
||||
|
||||
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger
|
||||
Illegal "burst"
|
||||
|
||||
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100Kbps maxburst errtrigger
|
||||
Illegal "maxburst"
|
||||
|
||||
Reported-by: Sebastian Mitterle <smitterl@redhat.com>
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
Signed-off-by: David Ahern <dsahern@kernel.org>
|
||||
|
||||
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
|
||||
---
|
||||
tc/q_htb.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tc/q_htb.c b/tc/q_htb.c
|
||||
index 42566355..b5f95f67 100644
|
||||
--- a/tc/q_htb.c
|
||||
+++ b/tc/q_htb.c
|
||||
@@ -125,6 +125,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
|
||||
unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */
|
||||
struct rtattr *tail;
|
||||
__u64 ceil64 = 0, rate64 = 0;
|
||||
+ char *param;
|
||||
|
||||
while (argc > 0) {
|
||||
if (matches(*argv, "prio") == 0) {
|
||||
@@ -160,17 +161,19 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
|
||||
} else if (matches(*argv, "burst") == 0 ||
|
||||
strcmp(*argv, "buffer") == 0 ||
|
||||
strcmp(*argv, "maxburst") == 0) {
|
||||
+ param = *argv;
|
||||
NEXT_ARG();
|
||||
if (get_size_and_cell(&buffer, &cell_log, *argv) < 0) {
|
||||
- explain1("buffer");
|
||||
+ explain1(param);
|
||||
return -1;
|
||||
}
|
||||
} else if (matches(*argv, "cburst") == 0 ||
|
||||
strcmp(*argv, "cbuffer") == 0 ||
|
||||
strcmp(*argv, "cmaxburst") == 0) {
|
||||
+ param = *argv;
|
||||
NEXT_ARG();
|
||||
if (get_size_and_cell(&cbuffer, &ccell_log, *argv) < 0) {
|
||||
- explain1("cbuffer");
|
||||
+ explain1(param);
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(*argv, "ceil") == 0) {
|
||||
--
|
||||
2.31.1
|
||||
|
5
SOURCES/iproute2.sh
Normal file
5
SOURCES/iproute2.sh
Normal file
@ -0,0 +1,5 @@
|
||||
# tc initialization script (sh)
|
||||
|
||||
if [ -z "$TC_LIB_DIR" ]; then
|
||||
export TC_LIB_DIR=/usr/lib64/tc
|
||||
fi
|
@ -1,13 +1,21 @@
|
||||
Summary: Advanced IP routing and network device configuration tools
|
||||
Name: iproute
|
||||
Version: 5.12.0
|
||||
Release: 2%{?dist}%{?buildid}
|
||||
Release: 3%{?dist}%{?buildid}
|
||||
Group: Applications/System
|
||||
URL: http://kernel.org/pub/linux/utils/net/%{name}2/
|
||||
Source0: http://kernel.org/pub/linux/utils/net/%{name}2/%{name}2-%{version}.tar.xz
|
||||
Source1: rt_dsfield.deprecated
|
||||
Source2: iproute2.sh
|
||||
Patch0: 0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch
|
||||
Patch1: 0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch
|
||||
Patch2: 0003-mptcp-add-support-for-port-based-endpoint.patch
|
||||
Patch3: 0004-Update-kernel-headers.patch
|
||||
Patch4: 0005-police-add-support-for-packet-per-second-rate-limiti.patch
|
||||
Patch5: 0006-police-Add-support-for-json-output.patch
|
||||
Patch6: 0007-police-Fix-normal-output-back-to-what-it-was.patch
|
||||
Patch7: 0008-tc-u32-Fix-key-folding-in-sample-option.patch
|
||||
Patch8: 0009-tc-htb-improve-burst-error-messages.patch
|
||||
License: GPLv2+ and Public Domain
|
||||
BuildRequires: bison
|
||||
BuildRequires: elfutils-libelf-devel
|
||||
@ -92,6 +100,9 @@ rm -rf '%{buildroot}%{_docdir}'
|
||||
# Append deprecated values to rt_dsfield for compatibility reasons
|
||||
cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
|
||||
|
||||
# use TC_LIB_DIR environment variable
|
||||
install -D -m644 %{SOURCE2} %{buildroot}%{_sysconfdir}/profile.d/iproute2.sh
|
||||
|
||||
%files
|
||||
%dir %{_sysconfdir}/iproute2
|
||||
%{!?_licensedir:%global license %%doc}
|
||||
@ -110,6 +121,7 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
|
||||
%files tc
|
||||
%{!?_licensedir:%global license %%doc}
|
||||
%license COPYING
|
||||
%{_sysconfdir}/profile.d/iproute2.sh
|
||||
%{_mandir}/man7/tc-*
|
||||
%{_mandir}/man8/tc*
|
||||
%{_mandir}/man8/cbq*
|
||||
@ -134,6 +146,15 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
|
||||
%{_includedir}/iproute2/bpf_elf.h
|
||||
|
||||
%changelog
|
||||
* Thu Aug 12 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-3.el8]
|
||||
- tc: htb: improve burst error messages (Andrea Claudi) [1910745]
|
||||
- tc: u32: Fix key folding in sample option (Andrea Claudi) [1979425]
|
||||
- police: Fix normal output back to what it was (Andrea Claudi) [1981393]
|
||||
- police: Add support for json output (Andrea Claudi) [1981393]
|
||||
- police: add support for packet-per-second rate limiting (Andrea Claudi) [1981393]
|
||||
- Update kernel headers (Andrea Claudi) [1981393]
|
||||
- mptcp: add support for port based endpoint (Andrea Claudi) [1984733]
|
||||
|
||||
* Fri Aug 08 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-2.el8]
|
||||
- add build and run-time dependencies on libbpf (Andrea Claudi) [1990402]
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user