import iproute-5.12.0-4.el8

This commit is contained in:
CentOS Sources 2021-11-09 05:04:23 -05:00 committed by Stepan Oksanichenko
parent 9c9868f132
commit 60a68c2bd4
24 changed files with 1948 additions and 1525 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/iproute2-5.9.0.tar.xz SOURCES/iproute2-5.12.0.tar.xz

View File

@ -1 +1 @@
c9e0ca453307ce7c221ccffc10939f4136b4ad5d SOURCES/iproute2-5.9.0.tar.xz 4e18c1d72a29f41a5968ac8a9b266470f6ad89a7 SOURCES/iproute2-5.12.0.tar.xz

View File

@ -0,0 +1,73 @@
From d9bcc70051d23c62cc802a356dc7e4324398765e Mon Sep 17 00:00:00 2001
Message-Id: <d9bcc70051d23c62cc802a356dc7e4324398765e.1624894546.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Mon, 28 Jun 2021 15:22:17 +0200
Subject: [PATCH] tc: f_flower: Add option to match on related ct state
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1957243
Upstream Status: unknown commit 7fda6c58
commit 7fda6c588a295ad381fdf0b9b9971169b2f9d9dc
Author: Ariel Levkovich <lariel@nvidia.com>
Date: Fri May 21 20:07:06 2021 +0300
tc: f_flower: Add option to match on related ct state
Add support for matching on ct_state flag related.
The related state indicates a packet is associated with an existing
connection.
Example:
$ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \
ct_state -est-rel+trk \
action mirred egress redirect dev ens1f0_1
$ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \
ct_state +rel+trk \
action mirred egress redirect dev ens1f0_1
Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
---
man/man8/tc-flower.8 | 2 ++
tc/f_flower.c | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8
index f7336b62..4541d937 100644
--- a/man/man8/tc-flower.8
+++ b/man/man8/tc-flower.8
@@ -391,6 +391,8 @@ rpl - The packet is in the reply direction, meaning that it is in the opposite d
.TP
inv - The state is invalid. The packet couldn't be associated to a connection.
.TP
+rel - The packet is related to an existing connection.
+.TP
Example: +trk+est
.RE
.TP
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 53822a95..29db2e23 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -94,7 +94,7 @@ static void explain(void)
" LSE := lse depth DEPTH { label LABEL | tc TC | bos BOS | ttl TTL }\n"
" FILTERID := X:Y:Z\n"
" MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n"
- " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new\n"
+ " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel\n"
" ACTION-SPEC := ... look at individual actions\n"
"\n"
"NOTE: CLASSID, IP-PROTO are parsed as hexadecimal input.\n"
@@ -345,6 +345,7 @@ static struct flower_ct_states {
{ "trk", TCA_FLOWER_KEY_CT_FLAGS_TRACKED },
{ "new", TCA_FLOWER_KEY_CT_FLAGS_NEW },
{ "est", TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED },
+ { "rel", TCA_FLOWER_KEY_CT_FLAGS_RELATED },
{ "inv", TCA_FLOWER_KEY_CT_FLAGS_INVALID },
{ "rpl", TCA_FLOWER_KEY_CT_FLAGS_REPLY },
};
--
2.31.1

View File

@ -1,20 +0,0 @@
From cb7ce51cc1abd7b98370b903ec96205ebfe48661 Mon Sep 17 00:00:00 2001
Message-Id: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 15 Oct 2020 15:18:35 -0700
Subject: [PATCH] v5.9.0
---
include/version.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/version.h b/include/version.h
index 0088493d..89d05974 100644
--- a/include/version.h
+++ b/include/version.h
@@ -1 +1 @@
-static const char version[] = "5.8.0";
+static const char version[] = "5.9.0";
--
2.29.2

View File

@ -1,308 +0,0 @@
From 1b8a3c04bf8d115e2d427d41a437be03ecf34ce8 Mon Sep 17 00:00:00 2001
Message-Id: <1b8a3c04bf8d115e2d427d41a437be03ecf34ce8.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:34:34 +0100
Subject: [PATCH] Update kernel headers
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit 34be2d26
Conflicts: on include/uapi/linux/bpf.h, due to missing commits:
- c8eb4b52c1b1 ("Update kernel headers")
- f481515c89fa ("Update kernel headers")
commit 34be2d2619e29836605a7d1669d642f892fc725e
Author: David Ahern <dsahern@gmail.com>
Date: Wed Oct 7 00:01:26 2020 -0600
Update kernel headers
Update kernel headers to commit:
9faebeb2d800 ("Merge branch 'ethtool-allow-dumping-policies-to-user-space'")
Signed-off-by: David Ahern <dsahern@gmail.com>
---
include/uapi/linux/bpf.h | 64 +++++++++++++++++++++++++----
include/uapi/linux/devlink.h | 5 +++
include/uapi/linux/genetlink.h | 11 +++++
include/uapi/linux/l2tp.h | 1 +
include/uapi/linux/netlink.h | 2 +
include/uapi/linux/tc_act/tc_mpls.h | 1 +
include/uapi/linux/tc_act/tc_vlan.h | 4 ++
7 files changed, 79 insertions(+), 9 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b21cc6af..36e5bc2d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -404,6 +404,9 @@ enum {
/* Enable memory-mapping BPF map */
BPF_F_MMAPABLE = (1U << 10),
+
+/* Share perf_event among processes */
+ BPF_F_PRESERVE_ELEMS = (1U << 11),
};
/* Flags for BPF_PROG_QUERY. */
@@ -414,6 +417,11 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+/* Flags for BPF_PROG_TEST_RUN */
+
+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
+#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
+
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
/* enabled run_time_ns and run_cnt */
@@ -556,6 +564,8 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
+ __u32 flags;
+ __u32 cpu;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
@@ -622,8 +632,13 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
- __aligned_u64 iter_info; /* extra bpf_iter_link_info */
- __u32 iter_info_len; /* iter_info length */
+ union {
+ __u32 target_btf_id; /* btf_id of target to attach to */
+ struct {
+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */
+ __u32 iter_info_len; /* iter_info length */
+ };
+ };
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
@@ -2496,7 +2511,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * long bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
@@ -2676,7 +2691,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
@@ -2842,6 +2857,7 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
+ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
* long bpf_send_signal(u32 sig)
* Description
@@ -2858,7 +2874,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
@@ -3087,7 +3103,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
@@ -3215,11 +3231,11 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * u64 bpf_sk_cgroup_id(void *sk)
* Description
* Return the cgroup v2 id of the socket *sk*.
*
- * *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ * *sk* must be a non-**NULL** pointer to a socket, e.g. one
* returned from **bpf_sk_lookup_xxx**\ (),
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
* same as in **bpf_skb_cgroup_id**\ ().
@@ -3229,7 +3245,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *sk* at the *ancestor_level*. The root cgroup is at
@@ -4447,4 +4463,34 @@ struct bpf_sk_lookup {
__u32 local_port; /* Host byte order */
};
+/*
+ * struct btf_ptr is used for typed pointer representation; the
+ * type id is used to render the pointer data as the appropriate type
+ * via the bpf_snprintf_btf() helper described above. A flags field -
+ * potentially to specify additional details about the BTF pointer
+ * (rather than its mode of display) - is included for future use.
+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
+ */
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags; /* BTF ptr flags; unused at present. */
+};
+
+/*
+ * Flags to control bpf_snprintf_btf() behaviour.
+ * - BTF_F_COMPACT: no formatting around type information
+ * - BTF_F_NONAME: no struct/union member names/types
+ * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
+ * equivalent to %px.
+ * - BTF_F_ZERO: show zero-valued struct/union members; they
+ * are not displayed by default
+ */
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
+
#endif /* __LINUX_BPF_H__ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b7f23faa..e5586fa0 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -13,6 +13,8 @@
#ifndef _LINUX_DEVLINK_H_
#define _LINUX_DEVLINK_H_
+#include <linux/const.h>
+
#define DEVLINK_GENL_NAME "devlink"
#define DEVLINK_GENL_VERSION 0x1
#define DEVLINK_GENL_MCGRP_CONFIG_NAME "config"
@@ -193,6 +195,9 @@ enum devlink_port_flavour {
* port that faces the PCI VF.
*/
DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */
+ DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but
+ * is not used in any way.
+ */
};
enum devlink_param_cmode {
diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 7c6c390c..9fa720ee 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -64,6 +64,8 @@ enum {
CTRL_ATTR_OPS,
CTRL_ATTR_MCAST_GROUPS,
CTRL_ATTR_POLICY,
+ CTRL_ATTR_OP_POLICY,
+ CTRL_ATTR_OP,
__CTRL_ATTR_MAX,
};
@@ -85,6 +87,15 @@ enum {
__CTRL_ATTR_MCAST_GRP_MAX,
};
+enum {
+ CTRL_ATTR_POLICY_UNSPEC,
+ CTRL_ATTR_POLICY_DO,
+ CTRL_ATTR_POLICY_DUMP,
+
+ __CTRL_ATTR_POLICY_DUMP_MAX,
+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 131c3a26..abc0fc81 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -144,6 +144,7 @@ enum {
L2TP_ATTR_RX_OOS_PACKETS, /* u64 */
L2TP_ATTR_RX_ERRORS, /* u64 */
L2TP_ATTR_STATS_PAD,
+ L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */
__L2TP_ATTR_STATS_MAX,
};
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 695c88e3..f7749205 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -327,6 +327,7 @@ enum netlink_attribute_type {
* the index, if limited inside the nesting (U32)
* @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
* bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
* @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
*/
enum netlink_policy_type_attr {
@@ -342,6 +343,7 @@ enum netlink_policy_type_attr {
NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
NL_POLICY_TYPE_ATTR_PAD,
+ NL_POLICY_TYPE_ATTR_MASK,
/* keep last */
__NL_POLICY_TYPE_ATTR_MAX,
diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h
index 9360e952..9e4e8f52 100644
--- a/include/uapi/linux/tc_act/tc_mpls.h
+++ b/include/uapi/linux/tc_act/tc_mpls.h
@@ -10,6 +10,7 @@
#define TCA_MPLS_ACT_PUSH 2
#define TCA_MPLS_ACT_MODIFY 3
#define TCA_MPLS_ACT_DEC_TTL 4
+#define TCA_MPLS_ACT_MAC_PUSH 5
struct tc_mpls {
tc_gen; /* generic TC action fields. */
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 168995b5..5b306fe8 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,8 @@
#define TCA_VLAN_ACT_POP 1
#define TCA_VLAN_ACT_PUSH 2
#define TCA_VLAN_ACT_MODIFY 3
+#define TCA_VLAN_ACT_POP_ETH 4
+#define TCA_VLAN_ACT_PUSH_ETH 5
struct tc_vlan {
tc_gen;
@@ -30,6 +32,8 @@ enum {
TCA_VLAN_PUSH_VLAN_PROTOCOL,
TCA_VLAN_PAD,
TCA_VLAN_PUSH_VLAN_PRIORITY,
+ TCA_VLAN_PUSH_ETH_DST,
+ TCA_VLAN_PUSH_ETH_SRC,
__TCA_VLAN_MAX,
};
#define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
--
2.29.2

View File

@ -0,0 +1,43 @@
From 5f12d06dac98f9085273ce548d2ed13341c920fe Mon Sep 17 00:00:00 2001
Message-Id: <5f12d06dac98f9085273ce548d2ed13341c920fe.1624894546.git.aclaudi@redhat.com>
In-Reply-To: <d9bcc70051d23c62cc802a356dc7e4324398765e.1624894546.git.aclaudi@redhat.com>
References: <d9bcc70051d23c62cc802a356dc7e4324398765e.1624894546.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Mon, 28 Jun 2021 15:22:17 +0200
Subject: [PATCH] tc: f_flower: Add missing ct_state flags to usage description
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1957243
Upstream Status: unknown commit 825bd5da
commit 825bd5dacb98597a5595b470bd275bb103a7b9c2
Author: Ariel Levkovich <lariel@nvidia.com>
Date: Fri May 21 20:07:07 2021 +0300
tc: f_flower: Add missing ct_state flags to usage description
Add ct_state flags rpl and inv to the commands usage
description
Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
---
tc/f_flower.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 29db2e23..c5af0276 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -94,7 +94,7 @@ static void explain(void)
" LSE := lse depth DEPTH { label LABEL | tc TC | bos BOS | ttl TTL }\n"
" FILTERID := X:Y:Z\n"
" MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n"
- " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel\n"
+ " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel,rpl,inv\n"
" ACTION-SPEC := ... look at individual actions\n"
"\n"
"NOTE: CLASSID, IP-PROTO are parsed as hexadecimal input.\n"
--
2.31.1

View File

@ -1,343 +0,0 @@
From cac52dd831b6982f6b27b02c26243edbe0b7d747 Mon Sep 17 00:00:00 2001
Message-Id: <cac52dd831b6982f6b27b02c26243edbe0b7d747.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:03 +0100
Subject: [PATCH] m_vlan: add pop_eth and push_eth actions
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit d61167dd
commit d61167dd88b45832843b1458cd156f3b85c8ff16
Author: Guillaume Nault <gnault@redhat.com>
Date: Mon Oct 19 17:23:01 2020 +0200
m_vlan: add pop_eth and push_eth actions
Add support for the new TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH
actions (kernel commit 19fbcb36a39e ("net/sched: act_vlan:
Add {POP,PUSH}_ETH actions"). These action let TC remove or add the
Ethernet at the head of a frame.
Drop an Ethernet header:
# tc filter add dev ethX matchall action vlan pop_eth
Push an Ethernet header (the original frame must have no MAC header):
# tc filter add dev ethX matchall action vlan \
push_eth dst_mac 0a:00:00:00:00:02 src_mac 0a:00:00:00:00:01
Also add a test suite for m_vlan, which covers these new actions and
the pre-existing ones.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
man/man8/tc-vlan.8 | 39 +++++++++++++++++-
tc/m_vlan.c | 69 +++++++++++++++++++++++++++++++
testsuite/tests/tc/vlan.t | 86 +++++++++++++++++++++++++++++++++++++++
3 files changed, 192 insertions(+), 2 deletions(-)
create mode 100755 testsuite/tests/tc/vlan.t
diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8
index f5ffc25f..5c2808b1 100644
--- a/man/man8/tc-vlan.8
+++ b/man/man8/tc-vlan.8
@@ -5,8 +5,8 @@ vlan - vlan manipulation module
.SH SYNOPSIS
.in +8
.ti -8
-.BR tc " ... " "action vlan" " { " pop " |"
-.IR PUSH " | " MODIFY " } [ " CONTROL " ]"
+.BR tc " ... " "action vlan" " { " pop " | " pop_eth " |"
+.IR PUSH " | " MODIFY " | " PUSH_ETH " } [ " CONTROL " ]"
.ti -8
.IR PUSH " := "
@@ -24,6 +24,11 @@ vlan - vlan manipulation module
.IR VLANPRIO " ] "
.BI id " VLANID"
+.ti -8
+.IR PUSH_ETH " := "
+.B push_eth
+.BI dst_mac " LLADDR " src_mac " LLADDR "
+
.ti -8
.IR CONTROL " := { "
.BR reclassify " | " pipe " | " drop " | " continue " | " pass " | " goto " " chain " " CHAIN_INDEX " }"
@@ -43,6 +48,20 @@ modes require at least a
and allow to optionally choose the
.I VLANPROTO
to use.
+
+The
+.B vlan
+action can also be used to add or remove the base Ethernet header. The
+.B pop_eth
+mode, which takes no argument, is used to remove the base Ethernet header. All
+existing VLANs must have been previously dropped. The opposite operation,
+adding a base Ethernet header, is done with the
+.B push_eth
+mode. In that case, the packet must have no MAC header (stacking MAC headers is
+not permitted). This mode is mostly useful when a previous action has
+encapsulated the whole original frame behind a network header and one needs
+to prepend an Ethernet header before forwarding the resulting packet.
+
.SH OPTIONS
.TP
.B pop
@@ -58,6 +77,16 @@ Replace mode. Existing 802.1Q tag is replaced. Requires at least
.B id
option.
.TP
+.B pop_eth
+Ethernet header decapsulation mode. Only works on a plain Ethernet header:
+VLANs, if any, must be removed first.
+.TP
+.B push_eth
+Ethernet header encapsulation mode. The Ethertype is automatically set
+using the network header type. Chaining Ethernet headers is not allowed: the
+packet must have no MAC header when using this mode. Requires the
+.BR "dst_mac " and " src_mac " options.
+.TP
.BI id " VLANID"
Specify the VLAN ID to encapsulate into.
.I VLANID
@@ -73,6 +102,12 @@ Choose the VLAN protocol to use. At the time of writing, the kernel accepts only
.BI priority " VLANPRIO"
Choose the VLAN priority to use. Decimal number in range of 0-7.
.TP
+.BI dst_mac " LLADDR"
+Choose the destination MAC address to use.
+.TP
+.BI src_mac " LLADDR"
+Choose the source MAC address to use.
+.TP
.I CONTROL
How to continue after executing this action.
.RS
diff --git a/tc/m_vlan.c b/tc/m_vlan.c
index 1096ba0f..e6b21330 100644
--- a/tc/m_vlan.c
+++ b/tc/m_vlan.c
@@ -23,6 +23,8 @@ static const char * const action_names[] = {
[TCA_VLAN_ACT_POP] = "pop",
[TCA_VLAN_ACT_PUSH] = "push",
[TCA_VLAN_ACT_MODIFY] = "modify",
+ [TCA_VLAN_ACT_POP_ETH] = "pop_eth",
+ [TCA_VLAN_ACT_PUSH_ETH] = "push_eth",
};
static void explain(void)
@@ -31,6 +33,8 @@ static void explain(void)
"Usage: vlan pop\n"
" vlan push [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n"
" vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n"
+ " vlan pop_eth [CONTROL]\n"
+ " vlan push_eth dst_mac LLADDR src_mac LLADDR [CONTROL]\n"
" VLANPROTO is one of 802.1Q or 802.1AD\n"
" with default: 802.1Q\n"
" CONTROL := reclassify | pipe | drop | continue | pass |\n"
@@ -63,6 +67,10 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p,
char **argv = *argv_p;
struct rtattr *tail;
int action = 0;
+ char dst_mac[ETH_ALEN] = {};
+ int dst_mac_set = 0;
+ char src_mac[ETH_ALEN] = {};
+ int src_mac_set = 0;
__u16 id;
int id_set = 0;
__u16 proto;
@@ -95,6 +103,18 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p,
return -1;
}
action = TCA_VLAN_ACT_MODIFY;
+ } else if (matches(*argv, "pop_eth") == 0) {
+ if (action) {
+ unexpected(*argv);
+ return -1;
+ }
+ action = TCA_VLAN_ACT_POP_ETH;
+ } else if (matches(*argv, "push_eth") == 0) {
+ if (action) {
+ unexpected(*argv);
+ return -1;
+ }
+ action = TCA_VLAN_ACT_PUSH_ETH;
} else if (matches(*argv, "id") == 0) {
if (!has_push_attribs(action))
invarg("only valid for push/modify", *argv);
@@ -119,6 +139,22 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p,
if (get_u8(&prio, *argv, 0) || (prio & ~0x7))
invarg("prio is invalid", *argv);
prio_set = 1;
+ } else if (matches(*argv, "dst_mac") == 0) {
+ if (action != TCA_VLAN_ACT_PUSH_ETH)
+ invarg("only valid for push_eth", *argv);
+
+ NEXT_ARG();
+ if (ll_addr_a2n(dst_mac, sizeof(dst_mac), *argv) < 0)
+ invarg("dst_mac is invalid", *argv);
+ dst_mac_set = 1;
+ } else if (matches(*argv, "src_mac") == 0) {
+ if (action != TCA_VLAN_ACT_PUSH_ETH)
+ invarg("only valid for push_eth", *argv);
+
+ NEXT_ARG();
+ if (ll_addr_a2n(src_mac, sizeof(src_mac), *argv) < 0)
+ invarg("src_mac is invalid", *argv);
+ src_mac_set = 1;
} else if (matches(*argv, "help") == 0) {
usage();
} else {
@@ -150,6 +186,20 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p,
return -1;
}
+ if (action == TCA_VLAN_ACT_PUSH_ETH) {
+ if (!dst_mac_set) {
+ fprintf(stderr, "dst_mac needs to be set for %s\n",
+ action_names[action]);
+ explain();
+ return -1;
+ } else if (!src_mac_set) {
+ fprintf(stderr, "src_mac needs to be set for %s\n",
+ action_names[action]);
+ explain();
+ return -1;
+ }
+ }
+
parm.v_action = action;
tail = addattr_nest(n, MAX_MSG, tca_id);
addattr_l(n, MAX_MSG, TCA_VLAN_PARMS, &parm, sizeof(parm));
@@ -167,6 +217,12 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p,
}
if (prio_set)
addattr8(n, MAX_MSG, TCA_VLAN_PUSH_VLAN_PRIORITY, prio);
+ if (dst_mac_set)
+ addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_DST, dst_mac,
+ sizeof(dst_mac));
+ if (src_mac_set)
+ addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_SRC, src_mac,
+ sizeof(src_mac));
addattr_nest_end(n, tail);
@@ -216,6 +272,19 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg)
print_uint(PRINT_ANY, "priority", " priority %u", val);
}
break;
+ case TCA_VLAN_ACT_PUSH_ETH:
+ if (tb[TCA_VLAN_PUSH_ETH_DST] &&
+ RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_DST]) == ETH_ALEN) {
+ ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_DST]),
+ ETH_ALEN, 0, b1, sizeof(b1));
+ print_string(PRINT_ANY, "dst_mac", " dst_mac %s", b1);
+ }
+ if (tb[TCA_VLAN_PUSH_ETH_SRC &&
+ RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_SRC]) == ETH_ALEN]) {
+ ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_SRC]),
+ ETH_ALEN, 0, b1, sizeof(b1));
+ print_string(PRINT_ANY, "src_mac", " src_mac %s", b1);
+ }
}
print_action_control(f, " ", parm->action, "");
diff --git a/testsuite/tests/tc/vlan.t b/testsuite/tests/tc/vlan.t
new file mode 100755
index 00000000..b86dc364
--- /dev/null
+++ b/testsuite/tests/tc/vlan.t
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+. lib/generic.sh
+
+DEV="$(rand_dev)"
+ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy
+ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress
+
+reset_qdisc()
+{
+ ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress
+ ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress
+}
+
+ts_tc "$0" "Add vlan action pop" \
+ filter add dev $DEV ingress matchall action vlan pop
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "pop"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action push (default parameters)" \
+ filter add dev $DEV ingress matchall action vlan push id 5
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "push"
+test_on "id 5"
+test_on "protocol 802.1Q"
+test_on "priority 0"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action push (explicit parameters)" \
+ filter add dev $DEV ingress matchall \
+ action vlan push id 5 protocol 802.1ad priority 2
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "push"
+test_on "id 5"
+test_on "protocol 802.1ad"
+test_on "priority 2"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action modify (default parameters)" \
+ filter add dev $DEV ingress matchall action vlan modify id 5
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "modify"
+test_on "id 5"
+test_on "protocol 802.1Q"
+test_on "priority 0"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action modify (explicit parameters)" \
+ filter add dev $DEV ingress matchall \
+ action vlan modify id 5 protocol 802.1ad priority 2
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "modify"
+test_on "id 5"
+test_on "protocol 802.1ad"
+test_on "priority 2"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action pop_eth" \
+ filter add dev $DEV ingress matchall action vlan pop_eth
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "pop_eth"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add vlan action push_eth" \
+ filter add dev $DEV ingress matchall \
+ action vlan push_eth dst_mac 02:00:00:00:00:02 \
+ src_mac 02:00:00:00:00:01
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "vlan"
+test_on "push_eth"
+test_on "dst_mac 02:00:00:00:00:02"
+test_on "src_mac 02:00:00:00:00:01"
+test_on "pipe"
--
2.29.2

View File

@ -0,0 +1,123 @@
From 0ccd2dbb3eca44a892a183db8c2e4221488ecf51 Mon Sep 17 00:00:00 2001
Message-Id: <0ccd2dbb3eca44a892a183db8c2e4221488ecf51.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Mon, 9 Aug 2021 15:18:11 +0200
Subject: [PATCH] mptcp: add support for port based endpoint
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1984733
Upstream Status: iproute2.git commit 42fbca91
commit 42fbca91cd616ae714c3f6aa2d4e2c3399498e38
Author: Paolo Abeni <pabeni@redhat.com>
Date: Fri Feb 19 21:42:55 2021 +0100
mptcp: add support for port based endpoint
The feature is supported by the kernel since 5.11-net-next,
let's allow user-space to use it.
Just parse and dump an additional, per endpoint, u16 attribute
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
ip/ipmptcp.c | 16 ++++++++++++++--
man/man8/ip-mptcp.8 | 8 ++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/ip/ipmptcp.c b/ip/ipmptcp.c
index e1ffafb3..5f659b59 100644
--- a/ip/ipmptcp.c
+++ b/ip/ipmptcp.c
@@ -17,7 +17,7 @@ static void usage(void)
{
fprintf(stderr,
"Usage: ip mptcp endpoint add ADDRESS [ dev NAME ] [ id ID ]\n"
- " [ FLAG-LIST ]\n"
+ " [ port NR ] [ FLAG-LIST ]\n"
" ip mptcp endpoint delete id ID\n"
" ip mptcp endpoint show [ id ID ]\n"
" ip mptcp endpoint flush\n"
@@ -97,6 +97,7 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
bool id_set = false;
__u32 index = 0;
__u32 flags = 0;
+ __u16 port = 0;
__u8 id = 0;
ll_init_map(&rth);
@@ -123,6 +124,10 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
if (!index)
invarg("device does not exist\n", ifname);
+ } else if (matches(*argv, "port") == 0) {
+ NEXT_ARG();
+ if (get_u16(&port, *argv, 0))
+ invarg("expected port", *argv);
} else if (get_addr(&address, *argv, AF_UNSPEC) == 0) {
addr_set = true;
} else {
@@ -145,6 +150,8 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n,
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_FLAGS, flags);
if (index)
addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_IF_IDX, index);
+ if (port)
+ addattr16(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_PORT, port);
if (addr_set) {
int type;
@@ -181,8 +188,8 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo)
__u8 family = AF_UNSPEC, addr_attr_type;
const char *ifname;
unsigned int flags;
+ __u16 id, port;
int index;
- __u16 id;
parse_rtattr_nested(tb, MPTCP_PM_ADDR_ATTR_MAX, addrinfo);
@@ -196,6 +203,11 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo)
print_string(PRINT_ANY, "address", "%s ",
format_host_rta(family, tb[addr_attr_type]));
}
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
+ port = rta_getattr_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]);
+ if (port)
+ print_uint(PRINT_ANY, "port", "port %u ", port);
+ }
if (tb[MPTCP_PM_ADDR_ATTR_ID]) {
id = rta_getattr_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
print_uint(PRINT_ANY, "id", "id %u ", id);
diff --git a/man/man8/ip-mptcp.8 b/man/man8/ip-mptcp.8
index ef8409ea..98cb93b9 100644
--- a/man/man8/ip-mptcp.8
+++ b/man/man8/ip-mptcp.8
@@ -20,6 +20,8 @@ ip-mptcp \- MPTCP path manager configuration
.ti -8
.BR "ip mptcp endpoint add "
.IR IFADDR
+.RB "[ " port
+.IR PORT " ]"
.RB "[ " dev
.IR IFNAME " ]"
.RB "[ " id
@@ -87,6 +89,12 @@ ip mptcp endpoint flush flush all existing MPTCP endpoints
.TE
.TP
+.IR PORT
+When a port number is specified, incoming MPTCP subflows for already
+established MPTCP sockets will be accepted on the specified port, regardless
+the original listener port accepting the first MPTCP subflow and/or
+this peer being actually on the client side.
+
.IR ID
is a unique numeric identifier for the given endpoint
--
2.31.1

View File

@ -0,0 +1,986 @@
From 2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1 Mon Sep 17 00:00:00 2001
Message-Id: <2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Wed, 11 Aug 2021 12:55:14 +0200
Subject: [PATCH] Update kernel headers
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
Upstream Status: iproute2.git commit a5b355c0
commit a5b355c08c62fb5b3a42d0e27ef05571c7b30e2e
Author: David Ahern <dsahern@kernel.org>
Date: Fri Mar 19 14:59:17 2021 +0000
Update kernel headers
Update kernel headers to commit:
38cb57602369 ("selftests: net: forwarding: Fix a typo")
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
include/uapi/linux/bpf.h | 764 ++++++++++++++++++++++++++++++++-
include/uapi/linux/btf.h | 5 +-
include/uapi/linux/nexthop.h | 47 +-
include/uapi/linux/pkt_cls.h | 2 +
include/uapi/linux/rtnetlink.h | 7 +
5 files changed, 818 insertions(+), 7 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b1aba6af..502934f7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
} map;
};
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ * Description
+ * Create a map and return a file descriptor that refers to the
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ * is automatically enabled for the new file descriptor.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ * Description
+ * Look up an element with a given *key* in the map referred to
+ * by the file descriptor *map_fd*.
+ *
+ * The *flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ * Description
+ * Create or update an element (key/value pair) in a specified map.
+ *
+ * The *flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create a new element or update an existing element.
+ * **BPF_NOEXIST**
+ * Create a new element only if it did not exist.
+ * **BPF_EXIST**
+ * Update an existing element.
+ * **BPF_F_LOCK**
+ * Update a spin_lock-ed map element.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ * **E2BIG**
+ * The number of elements in the map reached the
+ * *max_entries* limit specified at map creation time.
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ * Description
+ * Look up and delete an element by key in a specified map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ * Description
+ * Look up an element by key in a specified map and return the key
+ * of the next element. Can be used to iterate over all elements
+ * in the map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * The following cases can be used to iterate over all elements of
+ * the map:
+ *
+ * * If *key* is not found, the operation returns zero and sets
+ * the *next_key* pointer to the key of the first element.
+ * * If *key* is found, the operation returns zero and sets the
+ * *next_key* pointer to the key of the next element.
+ * * If *key* is the last element, returns -1 and *errno* is set
+ * to **ENOENT**.
+ *
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ * **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ * Description
+ * Verify and load an eBPF program, returning a new file
+ * descriptor associated with the program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ * automatically enabled for the new file descriptor.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ * Description
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
+ * to the provided *pathname* on the filesystem.
+ *
+ * The *pathname* argument must not contain a dot (".").
+ *
+ * On success, *pathname* retains a reference to the eBPF object,
+ * preventing deallocation of the object when the original
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ * process.
+ *
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
+ * unpins the object from the filesystem, removing the reference.
+ * If no other file descriptors or filesystem nodes refer to the
+ * same object, it will be deallocated (see NOTES).
+ *
+ * The filesystem type for the parent directory of *pathname* must
+ * be **BPF_FS_MAGIC**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_OBJ_GET
+ * Description
+ * Open a file descriptor for the eBPF object pinned to the
+ * specified *pathname*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook.
+ *
+ * The *attach_type* specifies the eBPF attachment point to
+ * attach the program to, and must be one of *bpf_attach_type*
+ * (see below).
+ *
+ * The *attach_bpf_fd* must be a valid file descriptor for a
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ * or sock_ops type corresponding to the specified *attach_type*.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_TYPE_SK_SKB**,
+ * **BPF_PROG_TYPE_SK_MSG**
+ *
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ * Description
+ * Detach the eBPF program associated with the *target_fd* at the
+ * hook specified by *attach_type*. The program must have been
+ * previously attached using **BPF_PROG_ATTACH**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ * Description
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
+ * number of times against a provided program context *ctx_in* and
+ * data *data_in*, and return the modified program context
+ * *ctx_out*, *data_out* (for example, packet data), result of the
+ * execution *retval*, and *duration* of the test run.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * **ENOSPC**
+ * Either *data_size_out* or *ctx_size_out* is too small.
+ * **ENOTSUPP**
+ * This command is not supported by the program type of
+ * the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF program currently loaded into the kernel.
+ *
+ * Looks for the eBPF program with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF programs
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF map currently loaded into the kernel.
+ *
+ * Looks for the eBPF map with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF maps
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF program corresponding to
+ * *prog_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF map corresponding to
+ * *map_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ * Description
+ * Obtain information about the eBPF object corresponding to
+ * *bpf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info*, which will be in
+ * one of the following formats depending on the eBPF object type
+ * of *bpf_fd*:
+ *
+ * * **struct bpf_prog_info**
+ * * **struct bpf_map_info**
+ * * **struct bpf_btf_info**
+ * * **struct bpf_link_info**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * specified *attach_type* hook.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_QUERY** always fetches the number of programs
+ * attached and the *attach_flags* which were used to attach those
+ * programs. Additionally, if *prog_ids* is nonzero and the number
+ * of attached programs is less than *prog_cnt*, populates
+ * *prog_ids* with the eBPF program ids of the programs attached
+ * at *target_fd*.
+ *
+ * The following flags may alter the result:
+ *
+ * **BPF_F_QUERY_EFFECTIVE**
+ * Only return information regarding programs which are
+ * currently effective at the specified *target_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ * Description
+ * Attach an eBPF program to a tracepoint *name* to access kernel
+ * internal arguments of the tracepoint in their raw form.
+ *
+ * The *prog_fd* must be a valid file descriptor associated with
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ * No ABI guarantees are made about the content of tracepoint
+ * arguments exposed to the corresponding eBPF program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ * Description
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
+ * returning a new file descriptor associated with the metadata.
+ * BTF is described in more detail at
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ * The *btf* parameter must point to valid memory providing
+ * *btf_size* bytes of BTF binary metadata.
+ *
+ * The returned file descriptor can be passed to other **bpf**\ ()
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ * associate the BTF with those objects.
+ *
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
+ * *btf_log_level* which allow the kernel to return freeform log
+ * output regarding the BTF verification process.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the BPF Type Format (BTF)
+ * corresponding to *btf_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * target process identified by *pid* and *fd*.
+ *
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
+ * be populated with the eBPF program id and file descriptor type
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
+ * uprobe, the *probe_offset* and *probe_addr* will also be
+ * populated. Optionally, if *buf* is provided, then up to
+ * *buf_len* bytes of *buf* will be populated with the name of
+ * the tracepoint, kprobe or uprobe.
+ *
+ * The resulting *prog_id* may be introspected in deeper detail
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ * Description
+ * Look up an element with the given *key* in the map referred to
+ * by the file descriptor *fd*, and if found, delete the element.
+ *
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ * implement this command as a "pop" operation, deleting the top
+ * element rather than one corresponding to *key*.
+ * The *key* and *key_len* parameters should be zeroed when
+ * issuing this operation for these map types.
+ *
+ * This command is only valid for the following map types:
+ * * **BPF_MAP_TYPE_QUEUE**
+ * * **BPF_MAP_TYPE_STACK**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ * Description
+ * Freeze the permissions of the specified map.
+ *
+ * Write permissions may be frozen by passing zero *flags*.
+ * Upon success, no future syscall invocations may alter the
+ * map state of *map_fd*. Write operations from eBPF programs
+ * are still possible for a frozen map.
+ *
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ * Description
+ * Fetch the next BPF Type Format (BTF) object currently loaded
+ * into the kernel.
+ *
+ * Looks for the BTF object with an id greater than *start_id*
+ * and updates *next_id* on success. If no other BTF objects
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ * Description
+ * Iterate and fetch multiple elements in a map.
+ *
+ * Two opaque values are used to manage batch operations,
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ * to NULL to begin the batched operation. After each subsequent
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ * *out_batch* as the *in_batch* for the next operation to
+ * continue iteration from the current point.
+ *
+ * The *keys* and *values* are output parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are copied into the
+ * user buffer, with the keys copied into *keys* and the values
+ * copied into the corresponding indices in *values*.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
+ * *values* is too small to dump an entire bucket during
+ * iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ * Description
+ * Iterate and delete all elements in a map.
+ *
+ * This operation has the same behavior as
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ * * Every element that is successfully returned is also deleted
+ * from the map. This is at least *count* elements. Note that
+ * *count* is both an input and an output parameter.
+ * * Upon returning with *errno* set to **EFAULT**, up to
+ * *count* elements may be deleted without returning the keys
+ * and values of the deleted elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ * Description
+ * Update multiple elements in a map by *key*.
+ *
+ * The *keys* and *values* are input parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially updated to the
+ * value in the corresponding index in *values*. The *in_batch*
+ * and *out_batch* parameters are ignored and should be zeroed.
+ *
+ * The *elem_flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create new elements or update a existing elements.
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ * **BPF_EXIST**
+ * Update existing elements.
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
+ * the map reached the *max_entries* limit specified at map
+ * creation time.
+ *
+ * May set *errno* to one of the following error codes under
+ * specific circumstances:
+ *
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ * Description
+ * Delete multiple elements in a map by *key*.
+ *
+ * The *keys* parameter is an input parameter which must point
+ * to memory large enough to hold *count* items based on the key
+ * size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially deleted. The
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
+ * and should be zeroed.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements. If
+ * *errno* is **EFAULT**, up to *count* elements may be been
+ * deleted.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook and return a file descriptor handle for
+ * managing the link.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ * Description
+ * Update the eBPF program in the specified *link_fd* to
+ * *new_prog_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF Link corresponding to
+ * *link_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF link currently loaded into the kernel.
+ *
+ * Looks for the eBPF link with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF links
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ * Description
+ * Enable eBPF runtime statistics gathering.
+ *
+ * Runtime statistics gathering for the eBPF runtime is disabled
+ * by default to minimize the corresponding performance overhead.
+ * This command enables statistics globally.
+ *
+ * Multiple programs may independently enable statistics.
+ * After gathering the desired statistics, eBPF runtime statistics
+ * may be disabled again by calling **close**\ (2) for the file
+ * descriptor returned by this function. Statistics will only be
+ * disabled system-wide when all outstanding file descriptors
+ * returned by prior calls for this subcommand are closed.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ * Description
+ * Create an iterator on top of the specified *link_fd* (as
+ * previously created using **BPF_LINK_CREATE**) and return a
+ * file descriptor that can be used to trigger the iteration.
+ *
+ * If the resulting file descriptor is pinned to the filesystem
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ * for that path will trigger the iterator to read kernel state
+ * using the eBPF program attached to *link_fd*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ * Description
+ * Forcefully detach the specified *link_fd* from its
+ * corresponding attachment point.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ * Description
+ * Bind a map to the lifetime of an eBPF program.
+ *
+ * The map identified by *map_fd* is bound to the program
+ * identified by *prog_fd* and only released when *prog_fd* is
+ * released. This may be used in cases where metadata should be
+ * associated with a program which otherwise does not contain any
+ * references to the map (for example, embedded in the eBPF
+ * program instructions).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * NOTES
+ * eBPF objects (maps and programs) can be shared between processes.
+ *
+ * * After **fork**\ (2), the child inherits file descriptors
+ * referring to the same eBPF objects.
+ * * File descriptors referring to eBPF objects can be transferred over
+ * **unix**\ (7) domain sockets.
+ * * File descriptors referring to eBPF objects can be duplicated in the
+ * usual way, using **dup**\ (2) and similar calls.
+ * * File descriptors referring to eBPF objects can be pinned to the
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ * An eBPF object is deallocated only after all file descriptors referring
+ * to the object have been closed and no references remain pinned to the
+ * filesystem or attached (for example, bound to a program or device).
+ */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@@ -393,6 +1103,15 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
+ * insn[0].imm: insn offset to the func
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the function
+ * verifier type: PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
@@ -720,7 +1439,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
- * $ ./scripts/bpf_helpers_doc.py \
+ * $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@@ -1765,6 +2484,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ * L2 type as Ethernet.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -3850,7 +4573,7 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
- * Check packet size against exceeding MTU of net device (based
+ * Check ctx packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
*
@@ -3915,6 +4638,34 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For each element in **map**, call **callback_fn** function with
+ * **map**, **callback_ctx** and other map-specific parameters.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0.
+ *
+ * The following are a list of supported map types and their
+ * respective expected callback signatures:
+ *
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ * For per_cpu maps, the map_value is the value on the cpu where the
+ * bpf_prog is running.
+ *
+ * If **callback_fn** return 0, the helper will continue to the next
+ * element. If return value is 1, the helper will skip the rest of
+ * elements and return. Other return values are not used now.
+ *
+ * Return
+ * The number of traversed map elements for success, **-EINVAL** for
+ * invalid **flags**.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4081,6 +4832,7 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
+ FN(for_each_map_elem), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4174,6 +4926,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@@ -5211,7 +5964,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ union {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 4a42eb48..2c42dcac 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
-#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index b0a56139..37b14b4e 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -21,7 +21,10 @@ struct nexthop_grp {
};
enum {
- NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
+ NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group
+ * default type if not specified
+ */
+ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */
__NEXTHOP_GRP_TYPE_MAX,
};
@@ -52,8 +55,50 @@ enum {
NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+ /* nested; resilient nexthop group attributes */
+ NHA_RES_GROUP,
+ /* nested; nexthop bucket attributes */
+ NHA_RES_BUCKET,
+
__NHA_MAX,
};
#define NHA_MAX (__NHA_MAX - 1)
+
+enum {
+ NHA_RES_GROUP_UNSPEC,
+ /* Pad attribute for 64-bit alignment. */
+ NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+ /* u16; number of nexthop buckets in a resilient nexthop group */
+ NHA_RES_GROUP_BUCKETS,
+ /* clock_t as u32; nexthop bucket idle timer (per-group) */
+ NHA_RES_GROUP_IDLE_TIMER,
+ /* clock_t as u32; nexthop unbalanced timer */
+ NHA_RES_GROUP_UNBALANCED_TIMER,
+ /* clock_t as u64; nexthop unbalanced time */
+ NHA_RES_GROUP_UNBALANCED_TIME,
+
+ __NHA_RES_GROUP_MAX,
+};
+
+#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1)
+
+enum {
+ NHA_RES_BUCKET_UNSPEC,
+ /* Pad attribute for 64-bit alignment. */
+ NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
+
+ /* u16; nexthop bucket index */
+ NHA_RES_BUCKET_INDEX,
+ /* clock_t as u64; nexthop bucket idle time */
+ NHA_RES_BUCKET_IDLE_TIME,
+ /* u32; nexthop id assigned to the nexthop bucket */
+ NHA_RES_BUCKET_NH_ID,
+
+ __NHA_RES_BUCKET_MAX,
+};
+
+#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1)
+
#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7ea59cfe..025c40fe 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -190,6 +190,8 @@ enum {
TCA_POLICE_PAD,
TCA_POLICE_RATE64,
TCA_POLICE_PEAKRATE64,
+ TCA_POLICE_PKTRATE64,
+ TCA_POLICE_PKTBURST64,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index b34b9add..f62cccc1 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -178,6 +178,13 @@ enum {
RTM_GETVLAN,
#define RTM_GETVLAN RTM_GETVLAN
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
--
2.31.1

View File

@ -1,342 +0,0 @@
From 0afe12a4a9471ed1343693338ec6350dc66ba295 Mon Sep 17 00:00:00 2001
Message-Id: <0afe12a4a9471ed1343693338ec6350dc66ba295.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:03 +0100
Subject: [PATCH] m_mpls: add mac_push action
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit 02a261b5
commit 02a261b5ba1c8580ac2a35bc6c87faa2ec9f5c96
Author: Guillaume Nault <gnault@redhat.com>
Date: Mon Oct 19 17:23:08 2020 +0200
m_mpls: add mac_push action
Add support for the new TCA_MPLS_ACT_MAC_PUSH action (kernel commit
a45294af9e96 ("net/sched: act_mpls: Add action to push MPLS LSE before
Ethernet header")). This action let TC push an MPLS header before the
MAC header of a frame.
Example (encapsulate all outgoing frames with label 20, then add an
outer Ethernet header):
# tc filter add dev ethX matchall \
action mpls mac_push label 20 ttl 64 \
action vlan push_eth dst_mac 0a:00:00:00:00:02 \
src_mac 0a:00:00:00:00:01
This patch also adds an alias for ETH_P_TEB, since it is useful when
decapsulating MPLS packets that contain an Ethernet frame.
With MAC_PUSH, there's no previous Ethertype to modify. However, the
"protocol" option is still needed, because the kernel uses it to set
skb->protocol. So rename can_modify_ethtype() to can_set_ethtype().
Also add a test suite for m_mpls, which covers the new action and the
pre-existing ones.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
lib/ll_proto.c | 1 +
man/man8/tc-mpls.8 | 44 +++++++++++++++++++++++--
man/man8/tc-vlan.8 | 5 ++-
tc/m_mpls.c | 43 ++++++++++++++++--------
testsuite/tests/tc/mpls.t | 69 +++++++++++++++++++++++++++++++++++++++
5 files changed, 145 insertions(+), 17 deletions(-)
create mode 100755 testsuite/tests/tc/mpls.t
diff --git a/lib/ll_proto.c b/lib/ll_proto.c
index 2a0c1cb3..78179311 100644
--- a/lib/ll_proto.c
+++ b/lib/ll_proto.c
@@ -80,6 +80,7 @@ __PF(8021Q,802.1Q)
__PF(8021AD,802.1ad)
__PF(MPLS_UC,mpls_uc)
__PF(MPLS_MC,mpls_mc)
+__PF(TEB,teb)
{ 0x8100, "802.1Q" },
{ 0x88cc, "LLDP" },
diff --git a/man/man8/tc-mpls.8 b/man/man8/tc-mpls.8
index 84ef2ef1..9e563e98 100644
--- a/man/man8/tc-mpls.8
+++ b/man/man8/tc-mpls.8
@@ -17,7 +17,7 @@ mpls - mpls manipulation module
.ti -8
.IR PUSH " := "
-.BR push " [ " protocol
+.RB "{ " push " | " mac_push " } [ " protocol
.IR MPLS_PROTO " ]"
.RB " [ " tc
.IR MPLS_TC " ] "
@@ -64,7 +64,14 @@ requires no arguments and simply subtracts 1 from the MPLS header TTL field.
Decapsulation mode. Requires the protocol of the next header.
.TP
.B push
-Encapsulation mode. Requires at least the
+Encapsulation mode. Adds the MPLS header between the MAC and the network
+headers. Requires at least the
+.B label
+option.
+.TP
+.B mac_push
+Encapsulation mode. Adds the MPLS header before the MAC header. Requires at
+least the
.B label
option.
.TP
@@ -152,5 +159,36 @@ ip packets and output to eth1:
.EE
.RE
+Here is another example, where incoming Ethernet frames are encapsulated into
+MPLS with label 123 and TTL 64. Then, an outer Ethernet header is added and the
+resulting frame is finally sent on eth1:
+
+.RS
+.EX
+#tc qdisc add dev eth0 ingress
+#tc filter add dev eth0 ingress matchall \\
+ action mpls mac_push label 123 ttl 64 \\
+ action vlan push_eth \\
+ dst_mac 02:00:00:00:00:02 \\
+ src_mac 02:00:00:00:00:01 \\
+ action mirred egress redirect dev eth1
+.EE
+.RE
+
+The following example assumes that incoming MPLS packets with label 123
+transport Ethernet frames. The outer Ethernet and the MPLS headers are
+stripped, then the inner Ethernet frame is sent on eth1:
+
+.RS
+.EX
+#tc qdisc add dev eth0 ingress
+#tc filter add dev eth0 ingress protocol mpls_uc \\
+ flower mpls_label 123 mpls_bos 1 \\
+ action vlan pop_eth \\
+ action mpls pop protocol teb \\
+ action mirred egress redirect dev eth1
+.EE
+.RE
+
.SH SEE ALSO
-.BR tc (8)
+.BR tc "(8), " tc-mirred "(8), " tc-vlan (8)
diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8
index 5c2808b1..264053d3 100644
--- a/man/man8/tc-vlan.8
+++ b/man/man8/tc-vlan.8
@@ -157,5 +157,8 @@ process then restarted for the plain packet:
.EE
.RE
+For an example of the
+.BR pop_eth " and " push_eth " modes, see " tc-mpls (8).
+
.SH SEE ALSO
-.BR tc (8)
+.BR tc "(8), " tc-mpls (8)
diff --git a/tc/m_mpls.c b/tc/m_mpls.c
index 3d5d9b25..cb8019b1 100644
--- a/tc/m_mpls.c
+++ b/tc/m_mpls.c
@@ -17,6 +17,7 @@ static const char * const action_names[] = {
[TCA_MPLS_ACT_PUSH] = "push",
[TCA_MPLS_ACT_MODIFY] = "modify",
[TCA_MPLS_ACT_DEC_TTL] = "dec_ttl",
+ [TCA_MPLS_ACT_MAC_PUSH] = "mac_push",
};
static void explain(void)
@@ -25,9 +26,11 @@ static void explain(void)
"Usage: mpls pop [ protocol MPLS_PROTO ]\n"
" mpls push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n"
" [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n"
+ " mpls mac_push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n"
+ " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n"
" mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ] [CONTROL]\n"
- " for pop MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n"
- " for push MPLS_PROTO is one of mpls_uc or mpls_mc\n"
+ " for pop, MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n"
+ " for push and mac_push, MPLS_PROTO is one of mpls_uc or mpls_mc\n"
" with default: mpls_uc\n"
" CONTROL := reclassify | pipe | drop | continue | pass |\n"
" goto chain <CHAIN_INDEX>\n");
@@ -41,12 +44,14 @@ static void usage(void)
static bool can_modify_mpls_fields(unsigned int action)
{
- return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MODIFY;
+ return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH ||
+ action == TCA_MPLS_ACT_MODIFY;
}
-static bool can_modify_ethtype(unsigned int action)
+static bool can_set_ethtype(unsigned int action)
{
- return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_POP;
+ return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH ||
+ action == TCA_MPLS_ACT_POP;
}
static bool is_valid_label(__u32 label)
@@ -94,6 +99,10 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
if (check_double_action(action, *argv))
return -1;
action = TCA_MPLS_ACT_PUSH;
+ } else if (matches(*argv, "mac_push") == 0) {
+ if (check_double_action(action, *argv))
+ return -1;
+ action = TCA_MPLS_ACT_MAC_PUSH;
} else if (matches(*argv, "modify") == 0) {
if (check_double_action(action, *argv))
return -1;
@@ -104,31 +113,36 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
action = TCA_MPLS_ACT_DEC_TTL;
} else if (matches(*argv, "label") == 0) {
if (!can_modify_mpls_fields(action))
- invarg("only valid for push/modify", *argv);
+ invarg("only valid for push, mac_push and modify",
+ *argv);
NEXT_ARG();
if (get_u32(&label, *argv, 0) || !is_valid_label(label))
invarg("label must be <=0xFFFFF", *argv);
} else if (matches(*argv, "tc") == 0) {
if (!can_modify_mpls_fields(action))
- invarg("only valid for push/modify", *argv);
+ invarg("only valid for push, mac_push and modify",
+ *argv);
NEXT_ARG();
if (get_u8(&tc, *argv, 0) || (tc & ~0x7))
invarg("tc field is 3 bits max", *argv);
} else if (matches(*argv, "ttl") == 0) {
if (!can_modify_mpls_fields(action))
- invarg("only valid for push/modify", *argv);
+ invarg("only valid for push, mac_push and modify",
+ *argv);
NEXT_ARG();
if (get_u8(&ttl, *argv, 0) || !ttl)
invarg("ttl must be >0 and <=255", *argv);
} else if (matches(*argv, "bos") == 0) {
if (!can_modify_mpls_fields(action))
- invarg("only valid for push/modify", *argv);
+ invarg("only valid for push, mac_push and modify",
+ *argv);
NEXT_ARG();
if (get_u8(&bos, *argv, 0) || (bos & ~0x1))
invarg("bos must be 0 or 1", *argv);
} else if (matches(*argv, "protocol") == 0) {
- if (!can_modify_ethtype(action))
- invarg("only valid for push/pop", *argv);
+ if (!can_set_ethtype(action))
+ invarg("only valid for push, mac_push and pop",
+ *argv);
NEXT_ARG();
if (ll_proto_a2n(&proto, *argv))
invarg("protocol is invalid", *argv);
@@ -159,10 +173,12 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
if (action == TCA_MPLS_ACT_PUSH && label == 0xffffffff)
missarg("label");
- if (action == TCA_MPLS_ACT_PUSH && proto &&
+ if ((action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH) &&
+ proto &&
proto != htons(ETH_P_MPLS_UC) && proto != htons(ETH_P_MPLS_MC)) {
fprintf(stderr,
- "invalid push protocol \"0x%04x\" - use mpls_(uc|mc)\n",
+ "invalid %spush protocol \"0x%04x\" - use mpls_(uc|mc)\n",
+ action == TCA_MPLS_ACT_MAC_PUSH ? "mac_" : "",
ntohs(proto));
return -1;
}
@@ -223,6 +239,7 @@ static int print_mpls(struct action_util *au, FILE *f, struct rtattr *arg)
}
break;
case TCA_MPLS_ACT_PUSH:
+ case TCA_MPLS_ACT_MAC_PUSH:
if (tb[TCA_MPLS_PROTO]) {
__u16 proto;
diff --git a/testsuite/tests/tc/mpls.t b/testsuite/tests/tc/mpls.t
new file mode 100755
index 00000000..cb25f361
--- /dev/null
+++ b/testsuite/tests/tc/mpls.t
@@ -0,0 +1,69 @@
+#!/bin/sh
+
+. lib/generic.sh
+
+DEV="$(rand_dev)"
+ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy
+ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress
+
+reset_qdisc()
+{
+ ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress
+ ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress
+}
+
+ts_tc "$0" "Add mpls action pop" \
+ filter add dev $DEV ingress protocol mpls_uc matchall \
+ action mpls pop protocol ip
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "mpls"
+test_on "pop protocol ip pipe"
+
+reset_qdisc
+ts_tc "$0" "Add mpls action push" \
+ filter add dev $DEV ingress protocol ip matchall \
+ action mpls push protocol mpls_uc label 20 tc 3 bos 1 ttl 64
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "mpls"
+test_on "push"
+test_on "protocol mpls_uc"
+test_on "label 20"
+test_on "tc 3"
+test_on "bos 1"
+test_on "ttl 64"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add mpls action mac_push" \
+ filter add dev $DEV ingress matchall \
+ action mpls mac_push protocol mpls_uc label 20 tc 3 bos 1 ttl 64
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "mpls"
+test_on "mac_push"
+test_on "protocol mpls_uc"
+test_on "label 20"
+test_on "tc 3"
+test_on "bos 1"
+test_on "ttl 64"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add mpls action modify" \
+ filter add dev $DEV ingress protocol mpls_uc matchall \
+ action mpls modify label 20 tc 3 ttl 64
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "mpls"
+test_on "modify"
+test_on "label 20"
+test_on "tc 3"
+test_on "ttl 64"
+test_on "pipe"
+
+reset_qdisc
+ts_tc "$0" "Add mpls action dec_ttl" \
+ filter add dev $DEV ingress protocol mpls_uc matchall \
+ action mpls dec_ttl
+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress
+test_on "mpls"
+test_on "dec_ttl"
+test_on "pipe"
--
2.29.2

View File

@ -1,58 +0,0 @@
From 8c66f562e88887d2bf1c1064117496c4cb862b11 Mon Sep 17 00:00:00 2001
Message-Id: <8c66f562e88887d2bf1c1064117496c4cb862b11.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:03 +0100
Subject: [PATCH] m_mpls: test the 'mac_push' action after 'modify'
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit f1298d76
commit f1298d76606a581cf3ab9ec45a92b41e72a6b4f0
Author: Guillaume Nault <gnault@redhat.com>
Date: Thu Oct 22 11:11:44 2020 +0200
m_mpls: test the 'mac_push' action after 'modify'
Commit 02a261b5ba1c ("m_mpls: add mac_push action") added a matches()
test for the "mac_push" string before the test for "modify".
This changes the previous behaviour as 'action m' used to match
"modify" while it now matches "mac_push".
Revert to the original behaviour by moving the "mac_push" test after
"modify".
Fixes: 02a261b5ba1c ("m_mpls: add mac_push action")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
---
tc/m_mpls.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tc/m_mpls.c b/tc/m_mpls.c
index cb8019b1..2c3752ba 100644
--- a/tc/m_mpls.c
+++ b/tc/m_mpls.c
@@ -99,14 +99,14 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p,
if (check_double_action(action, *argv))
return -1;
action = TCA_MPLS_ACT_PUSH;
- } else if (matches(*argv, "mac_push") == 0) {
- if (check_double_action(action, *argv))
- return -1;
- action = TCA_MPLS_ACT_MAC_PUSH;
} else if (matches(*argv, "modify") == 0) {
if (check_double_action(action, *argv))
return -1;
action = TCA_MPLS_ACT_MODIFY;
+ } else if (matches(*argv, "mac_push") == 0) {
+ if (check_double_action(action, *argv))
+ return -1;
+ action = TCA_MPLS_ACT_MAC_PUSH;
} else if (matches(*argv, "dec_ttl") == 0) {
if (check_double_action(action, *argv))
return -1;
--
2.29.2

View File

@ -0,0 +1,221 @@
From b061aeba93b1c730b7dafeece6b90aad2e7afce8 Mon Sep 17 00:00:00 2001
Message-Id: <b061aeba93b1c730b7dafeece6b90aad2e7afce8.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Wed, 11 Aug 2021 12:55:14 +0200
Subject: [PATCH] police: add support for packet-per-second rate limiting
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
Upstream Status: iproute2.git commit cf9ae1bd
commit cf9ae1bd31187d8ae62bc1bb408e443dbc8bd6a0
Author: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri Mar 26 13:50:18 2021 +0100
police: add support for packet-per-second rate limiting
Allow a policer action to enforce a rate-limit based on packets-per-second,
configurable using a packet-per-second rate and burst parameters.
e.g.
# $TC actions add action police pkts_rate 1000 pkts_burst 200 index 1
# $TC actions ls action police
total acts 1
action order 0: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200
ref 1 bind 0
Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
man/man8/tc-police.8 | 35 ++++++++++++++++++++++++-------
tc/m_police.c | 50 +++++++++++++++++++++++++++++++++++++++++---
2 files changed, 75 insertions(+), 10 deletions(-)
diff --git a/man/man8/tc-police.8 b/man/man8/tc-police.8
index 52279755..86e263bb 100644
--- a/man/man8/tc-police.8
+++ b/man/man8/tc-police.8
@@ -5,9 +5,11 @@ police - policing action
.SH SYNOPSIS
.in +8
.ti -8
-.BR tc " ... " "action police"
+.BR tc " ... " "action police ["
.BI rate " RATE " burst
-.IR BYTES [\fB/ BYTES "] ["
+.IR BYTES [\fB/ BYTES "] ] ["
+.BI pkts_rate " RATE " pkts_burst
+.IR PACKETS "] ["
.B mtu
.IR BYTES [\fB/ BYTES "] ] ["
.BI peakrate " RATE"
@@ -34,19 +36,29 @@ police - policing action
.SH DESCRIPTION
The
.B police
-action allows to limit bandwidth of traffic matched by the filter it is
-attached to. Basically there are two different algorithms available to measure
-the packet rate: The first one uses an internal dual token bucket and is
-configured using the
+action allows limiting of the byte or packet rate of traffic matched by the
+filter it is attached to.
+.P
+There are two different algorithms available to measure the byte rate: The
+first one uses an internal dual token bucket and is configured using the
.BR rate ", " burst ", " mtu ", " peakrate ", " overhead " and " linklayer
parameters. The second one uses an in-kernel sampling mechanism. It can be
fine-tuned using the
.B estimator
filter parameter.
+.P
+There is one algorithm available to measure packet rate and it is similar to
+the first algorithm described for byte rate. It is configured using the
+.BR pkt_rate " and " pkt_burst
+parameters.
+.P
+At least one of the
+.BR rate " and " pkt_rate "
+parameters must be configured.
.SH OPTIONS
.TP
.BI rate " RATE"
-The maximum traffic rate of packets passing this action. Those exceeding it will
+The maximum byte rate of packets passing this action. Those exceeding it will
be treated as defined by the
.B conform-exceed
option.
@@ -55,6 +67,15 @@ option.
Set the maximum allowed burst in bytes, optionally followed by a slash ('/')
sign and cell size which must be a power of 2.
.TP
+.BI pkt_rate " RATE"
+The maximum packet rate or packets passing this action. Those exceeding it will
+be treated as defined by the
+.B conform-exceed
+option.
+.TP
+.BI pkt_burst " PACKETS"
+Set the maximum allowed burst in packets.
+.TP
.BI mtu " BYTES\fR[\fB/\fIBYTES\fR]"
This is the maximum packet size handled by the policer (larger ones will be
handled like they exceeded the configured rate). Setting this value correctly
diff --git a/tc/m_police.c b/tc/m_police.c
index bb51df68..9ef0e40b 100644
--- a/tc/m_police.c
+++ b/tc/m_police.c
@@ -38,7 +38,8 @@ struct action_util police_action_util = {
static void usage(void)
{
fprintf(stderr,
- "Usage: ... police rate BPS burst BYTES[/BYTES] [ mtu BYTES[/BYTES] ]\n"
+ "Usage: ... police [ rate BPS burst BYTES[/BYTES] ] \n"
+ " [ pkts_rate RATE pkts_burst PACKETS ] [ mtu BYTES[/BYTES] ]\n"
" [ peakrate BPS ] [ avrate BPS ] [ overhead BYTES ]\n"
" [ linklayer TYPE ] [ CONTROL ]\n"
"Where: CONTROL := conform-exceed <EXCEEDACT>[/NOTEXCEEDACT]\n"
@@ -67,6 +68,7 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p,
int Rcell_log = -1, Pcell_log = -1;
struct rtattr *tail;
__u64 rate64 = 0, prate64 = 0;
+ __u64 pps64 = 0, ppsburst64 = 0;
if (a) /* new way of doing things */
NEXT_ARG();
@@ -144,6 +146,18 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p,
NEXT_ARG();
if (get_linklayer(&linklayer, *argv))
invarg("linklayer", *argv);
+ } else if (matches(*argv, "pkts_rate") == 0) {
+ NEXT_ARG();
+ if (pps64)
+ duparg("pkts_rate", *argv);
+ if (get_u64(&pps64, *argv, 10))
+ invarg("pkts_rate", *argv);
+ } else if (matches(*argv, "pkts_burst") == 0) {
+ NEXT_ARG();
+ if (ppsburst64)
+ duparg("pkts_burst", *argv);
+ if (get_u64(&ppsburst64, *argv, 10))
+ invarg("pkts_burst", *argv);
} else if (strcmp(*argv, "help") == 0) {
usage();
} else {
@@ -161,8 +175,8 @@ action_ctrl_ok:
return -1;
/* Must at least do late binding, use TB or ewma policing */
- if (!rate64 && !avrate && !p.index && !mtu) {
- fprintf(stderr, "'rate' or 'avrate' or 'mtu' MUST be specified.\n");
+ if (!rate64 && !avrate && !p.index && !mtu && !pps64) {
+ fprintf(stderr, "'rate' or 'avrate' or 'mtu' or 'pkts_rate' MUST be specified.\n");
return -1;
}
@@ -172,6 +186,18 @@ action_ctrl_ok:
return -1;
}
+ /* When the packets TB policer is used, pkts_burst is required */
+ if (pps64 && !ppsburst64) {
+ fprintf(stderr, "'pkts_burst' requires 'pkts_rate'.\n");
+ return -1;
+ }
+
+ /* forbid rate and pkts_rate in same action */
+ if (pps64 && rate64) {
+ fprintf(stderr, "'rate' and 'pkts_rate' are not allowed in same action.\n");
+ return -1;
+ }
+
if (prate64) {
if (!rate64) {
fprintf(stderr, "'peakrate' requires 'rate'.\n");
@@ -223,6 +249,12 @@ action_ctrl_ok:
if (presult)
addattr32(n, MAX_MSG, TCA_POLICE_RESULT, presult);
+ if (pps64) {
+ addattr64(n, MAX_MSG, TCA_POLICE_PKTRATE64, pps64);
+ ppsburst64 = tc_calc_xmittime(pps64, ppsburst64);
+ addattr64(n, MAX_MSG, TCA_POLICE_PKTBURST64, ppsburst64);
+ }
+
addattr_nest_end(n, tail);
res = 0;
@@ -244,6 +276,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
unsigned int buffer;
unsigned int linklayer;
__u64 rate64, prate64;
+ __u64 pps64, ppsburst64;
if (arg == NULL)
return 0;
@@ -287,6 +320,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
tc_print_rate(PRINT_FP, NULL, "avrate %s ",
rta_getattr_u32(tb[TCA_POLICE_AVRATE]));
+ if ((tb[TCA_POLICE_PKTRATE64] &&
+ RTA_PAYLOAD(tb[TCA_POLICE_PKTRATE64]) >= sizeof(pps64)) &&
+ (tb[TCA_POLICE_PKTBURST64] &&
+ RTA_PAYLOAD(tb[TCA_POLICE_PKTBURST64]) >= sizeof(ppsburst64))) {
+ pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]);
+ ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]);
+ ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64);
+ fprintf(f, "pkts_rate %llu ", pps64);
+ fprintf(f, "pkts_burst %llu ", ppsburst64);
+ }
+
print_action_control(f, "action ", p->action, "");
if (tb[TCA_POLICE_RESULT]) {
--
2.31.1

View File

@ -0,0 +1,159 @@
From 04b921c03a4680931df6660b88444f2478fb585c Mon Sep 17 00:00:00 2001
Message-Id: <04b921c03a4680931df6660b88444f2478fb585c.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Wed, 11 Aug 2021 12:55:14 +0200
Subject: [PATCH] police: Add support for json output
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
Upstream Status: iproute2.git commit 0d5cf51e
commit 0d5cf51e0d6c7bfdc51754381b85367b5f8e254a
Author: Roi Dayan <roid@nvidia.com>
Date: Mon Jun 7 09:44:08 2021 +0300
police: Add support for json output
Change to use the print wrappers instead of fprintf().
This is example output of the options part before this commit:
"options": {
"handle": 1,
"in_hw": true,
"actions": [ {
"order": 1 police 0x2 ,
"control_action": {
"type": "drop"
},
"control_action": {
"type": "continue"
}overhead 0b linklayer unspec
ref 1 bind 1
,
"used_hw_stats": [ "delayed" ]
} ]
}
This is the output of the same dump with this commit:
"options": {
"handle": 1,
"in_hw": true,
"actions": [ {
"order": 1,
"kind": "police",
"index": 2,
"control_action": {
"type": "drop"
},
"control_action": {
"type": "continue"
},
"overhead": 0,
"linklayer": "unspec",
"ref": 1,
"bind": 1,
"used_hw_stats": [ "delayed" ]
} ]
}
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
tc/m_police.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/tc/m_police.c b/tc/m_police.c
index 9ef0e40b..2594c089 100644
--- a/tc/m_police.c
+++ b/tc/m_police.c
@@ -278,18 +278,19 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
__u64 rate64, prate64;
__u64 pps64, ppsburst64;
+ print_string(PRINT_ANY, "kind", "%s", "police");
if (arg == NULL)
return 0;
parse_rtattr_nested(tb, TCA_POLICE_MAX, arg);
if (tb[TCA_POLICE_TBF] == NULL) {
- fprintf(f, "[NULL police tbf]");
- return 0;
+ fprintf(stderr, "[NULL police tbf]");
+ return -1;
}
#ifndef STOOPID_8BYTE
if (RTA_PAYLOAD(tb[TCA_POLICE_TBF]) < sizeof(*p)) {
- fprintf(f, "[truncated police tbf]");
+ fprintf(stderr, "[truncated police tbf]");
return -1;
}
#endif
@@ -300,13 +301,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64))
rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]);
- fprintf(f, " police 0x%x ", p->index);
+ print_uint(PRINT_ANY, "index", "\t index %u ", p->index);
tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64);
buffer = tc_calc_xmitsize(rate64, p->burst);
print_size(PRINT_FP, NULL, "burst %s ", buffer);
print_size(PRINT_FP, NULL, "mtu %s ", p->mtu);
if (show_raw)
- fprintf(f, "[%08x] ", p->burst);
+ print_hex(PRINT_FP, NULL, "[%08x] ", p->burst);
prate64 = p->peakrate.rate;
if (tb[TCA_POLICE_PEAKRATE64] &&
@@ -327,8 +328,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]);
ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]);
ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64);
- fprintf(f, "pkts_rate %llu ", pps64);
- fprintf(f, "pkts_burst %llu ", ppsburst64);
+ print_u64(PRINT_ANY, "pkts_rate", "pkts_rate %llu ", pps64);
+ print_u64(PRINT_ANY, "pkts_burst", "pkts_burst %llu ", ppsburst64);
}
print_action_control(f, "action ", p->action, "");
@@ -337,14 +338,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
__u32 action = rta_getattr_u32(tb[TCA_POLICE_RESULT]);
print_action_control(f, "/", action, " ");
- } else
- fprintf(f, " ");
+ } else {
+ print_string(PRINT_FP, NULL, " ", NULL);
+ }
- fprintf(f, "overhead %ub ", p->rate.overhead);
+ print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead);
linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK);
if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
- fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2));
- fprintf(f, "\n\tref %d bind %d", p->refcnt, p->bindcnt);
+ print_string(PRINT_ANY, "linklayer", "linklayer %s ",
+ sprint_linklayer(linklayer, b2));
+ print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt);
+ print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt);
if (show_stats) {
if (tb[TCA_POLICE_TM]) {
struct tcf_t *tm = RTA_DATA(tb[TCA_POLICE_TM]);
@@ -352,7 +356,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
print_tm(f, tm);
}
}
- fprintf(f, "\n");
+ print_nl();
return 0;
--
2.31.1

View File

@ -1,52 +0,0 @@
From cdb8197d0e7380b3679ded6bab398883aead92dc Mon Sep 17 00:00:00 2001
Message-Id: <cdb8197d0e7380b3679ded6bab398883aead92dc.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:03 +0100
Subject: [PATCH] tc-vlan: fix help and error message strings
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit 7c7a0fe0
commit 7c7a0fe0c81cdff258c4314c629d7a52ae331dc4
Author: Guillaume Nault <gnault@redhat.com>
Date: Mon Nov 2 11:59:46 2020 +0100
tc-vlan: fix help and error message strings
* "vlan pop" can be followed by a CONTROL keyword.
* Add missing space in error message.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
tc/m_vlan.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tc/m_vlan.c b/tc/m_vlan.c
index e6b21330..57722b73 100644
--- a/tc/m_vlan.c
+++ b/tc/m_vlan.c
@@ -30,7 +30,7 @@ static const char * const action_names[] = {
static void explain(void)
{
fprintf(stderr,
- "Usage: vlan pop\n"
+ "Usage: vlan pop [CONTROL]\n"
" vlan push [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n"
" vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n"
" vlan pop_eth [CONTROL]\n"
@@ -244,7 +244,7 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg)
parse_rtattr_nested(tb, TCA_VLAN_MAX, arg);
if (!tb[TCA_VLAN_PARMS]) {
- fprintf(stderr, "Missing vlanparameters\n");
+ fprintf(stderr, "Missing vlan parameters\n");
return -1;
}
parm = RTA_DATA(tb[TCA_VLAN_PARMS]);
--
2.29.2

View File

@ -0,0 +1,73 @@
From 148b286b52aa8f38d8d7587b598522310067de7b Mon Sep 17 00:00:00 2001
Message-Id: <148b286b52aa8f38d8d7587b598522310067de7b.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Wed, 11 Aug 2021 12:55:14 +0200
Subject: [PATCH] police: Fix normal output back to what it was
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393
Upstream Status: iproute2.git commit 71d36000
commit 71d36000dc9ce8397fc45b680e0c0340df5a28e5
Author: Roi Dayan <roid@nvidia.com>
Date: Mon Jul 12 15:26:53 2021 +0300
police: Fix normal output back to what it was
With the json support fix the normal output was
changed. set it back to what it was.
Print overhead with print_size().
Print newline before ref.
Fixes: 0d5cf51e0d6c ("police: Add support for json output")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
tc/m_police.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/tc/m_police.c b/tc/m_police.c
index 2594c089..f38ab90a 100644
--- a/tc/m_police.c
+++ b/tc/m_police.c
@@ -278,7 +278,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
__u64 rate64, prate64;
__u64 pps64, ppsburst64;
- print_string(PRINT_ANY, "kind", "%s", "police");
+ print_string(PRINT_JSON, "kind", "%s", "police");
if (arg == NULL)
return 0;
@@ -301,7 +301,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64))
rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]);
- print_uint(PRINT_ANY, "index", "\t index %u ", p->index);
+ print_hex(PRINT_FP, NULL, " police 0x%x ", p->index);
+ print_uint(PRINT_JSON, "index", NULL, p->index);
tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64);
buffer = tc_calc_xmitsize(rate64, p->burst);
print_size(PRINT_FP, NULL, "burst %s ", buffer);
@@ -342,12 +343,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg)
print_string(PRINT_FP, NULL, " ", NULL);
}
- print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead);
+ print_size(PRINT_ANY, "overhead", "overhead %s ", p->rate.overhead);
linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK);
if (linklayer > TC_LINKLAYER_ETHERNET || show_details)
print_string(PRINT_ANY, "linklayer", "linklayer %s ",
sprint_linklayer(linklayer, b2));
- print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt);
+ print_nl();
+ print_int(PRINT_ANY, "ref", "\tref %d ", p->refcnt);
print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt);
if (show_stats) {
if (tb[TCA_POLICE_TM]) {
--
2.31.1

View File

@ -1,82 +0,0 @@
From 8953735b551d5f3c18c9523ea24055f4a7f9b927 Mon Sep 17 00:00:00 2001
Message-Id: <8953735b551d5f3c18c9523ea24055f4a7f9b927.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:03 +0100
Subject: [PATCH] tc-mpls: fix manpage example and help message string
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit 8682f588
commit 8682f588bfed7862233a22626562696d662ca60c
Author: Guillaume Nault <gnault@redhat.com>
Date: Mon Nov 2 12:24:25 2020 +0100
tc-mpls: fix manpage example and help message string
Manpage:
* Remove the extra "and to ip packets" part from command description
to make it more understandable.
* Redirect packets to eth1, instead of eth0, as told in the
description.
Help string:
* "mpls pop" can be followed by a CONTROL keyword.
* "mpls modify" can also set the MPLS_BOS field.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
man/man8/tc-mpls.8 | 6 +++---
tc/m_mpls.c | 5 +++--
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/man/man8/tc-mpls.8 b/man/man8/tc-mpls.8
index 9e563e98..7f8be221 100644
--- a/man/man8/tc-mpls.8
+++ b/man/man8/tc-mpls.8
@@ -147,15 +147,15 @@ a label 123 and sends them out eth1:
.EE
.RE
-In this example, incoming MPLS unicast packets on eth0 are decapsulated and to
-ip packets and output to eth1:
+In this example, incoming MPLS unicast packets on eth0 are decapsulated
+and redirected to eth1:
.RS
.EX
#tc qdisc add dev eth0 handle ffff: ingress
#tc filter add dev eth0 protocol mpls_uc parent ffff: flower \\
action mpls pop protocol ipv4 \\
- action mirred egress redirect dev eth0
+ action mirred egress redirect dev eth1
.EE
.RE
diff --git a/tc/m_mpls.c b/tc/m_mpls.c
index 2c3752ba..9fee22e3 100644
--- a/tc/m_mpls.c
+++ b/tc/m_mpls.c
@@ -23,12 +23,13 @@ static const char * const action_names[] = {
static void explain(void)
{
fprintf(stderr,
- "Usage: mpls pop [ protocol MPLS_PROTO ]\n"
+ "Usage: mpls pop [ protocol MPLS_PROTO ] [CONTROL]\n"
" mpls push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n"
" [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n"
" mpls mac_push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n"
" [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n"
- " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ] [CONTROL]\n"
+ " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ]\n"
+ " [ bos MPLS_BOS ] [CONTROL]\n"
" for pop, MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n"
" for push and mac_push, MPLS_PROTO is one of mpls_uc or mpls_mc\n"
" with default: mpls_uc\n"
--
2.29.2

View File

@ -1,120 +0,0 @@
From 52754e4b7d4b52e9852869d7e2f6af1b890677f1 Mon Sep 17 00:00:00 2001
Message-Id: <52754e4b7d4b52e9852869d7e2f6af1b890677f1.1611877215.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1611877215.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Fri, 29 Jan 2021 00:35:04 +0100
Subject: [PATCH] tc: flower: fix json output with mpls lse
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770
Upstream Status: unknown commit 676a1a70
commit 676a1a708f8e99d6a4faa3de8a093f8f8c14b9da
Author: Guillaume Nault <gnault@redhat.com>
Date: Tue Jan 12 11:30:53 2021 +0100
tc: flower: fix json output with mpls lse
The json output of the TCA_FLOWER_KEY_MPLS_OPTS attribute was invalid.
Example:
$ tc filter add dev eth0 ingress protocol mpls_uc flower mpls \
lse depth 1 label 100 \
lse depth 2 label 200
$ tc -json filter show dev eth0 ingress
...{"eth_type":"8847",
" mpls":[" lse":["depth":1,"label":100],
" lse":["depth":2,"label":200]]}...
This is invalid as the arrays, introduced by "[", can't contain raw
string:value pairs. Those must be enclosed into "{}" to form valid json
ojects. Also, there are spurious whitespaces before the mpls and lse
strings because of the indentation used for normal output.
Fix this by putting all LSE parameters (depth, label, tc, bos and ttl)
into the same json object. The "mpls" key now directly contains a list
of such objects.
Also, handle strings differently for normal and json output, so that
json strings don't get spurious indentation whitespaces.
Normal output isn't modified.
The json output now looks like:
$ tc -json filter show dev eth0 ingress
...{"eth_type":"8847",
"mpls":[{"depth":1,"label":100},
{"depth":2,"label":200}]}...
Fixes: eb09a15c12fb ("tc: flower: support multiple MPLS LSE match")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
tc/f_flower.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 00c919fd..27731078 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -2476,7 +2476,7 @@ static void flower_print_u32(const char *name, struct rtattr *attr)
print_uint(PRINT_ANY, name, namefrm, rta_getattr_u32(attr));
}
-static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse)
+static void flower_print_mpls_opt_lse(struct rtattr *lse)
{
struct rtattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
struct rtattr *attr;
@@ -2493,7 +2493,8 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse)
RTA_PAYLOAD(lse));
print_nl();
- open_json_array(PRINT_ANY, name);
+ print_string(PRINT_FP, NULL, " lse", NULL);
+ open_json_object(NULL);
attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH];
if (attr)
print_hhu(PRINT_ANY, "depth", " depth %u",
@@ -2511,10 +2512,10 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse)
attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL];
if (attr)
print_hhu(PRINT_ANY, "ttl", " ttl %u", rta_getattr_u8(attr));
- close_json_array(PRINT_JSON, NULL);
+ close_json_object();
}
-static void flower_print_mpls_opts(const char *name, struct rtattr *attr)
+static void flower_print_mpls_opts(struct rtattr *attr)
{
struct rtattr *lse;
int rem;
@@ -2523,11 +2524,12 @@ static void flower_print_mpls_opts(const char *name, struct rtattr *attr)
return;
print_nl();
- open_json_array(PRINT_ANY, name);
+ print_string(PRINT_FP, NULL, " mpls", NULL);
+ open_json_array(PRINT_JSON, "mpls");
rem = RTA_PAYLOAD(attr);
lse = RTA_DATA(attr);
while (RTA_OK(lse, rem)) {
- flower_print_mpls_opt_lse(" lse", lse);
+ flower_print_mpls_opt_lse(lse);
lse = RTA_NEXT(lse, rem);
};
if (rem)
@@ -2650,7 +2652,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
flower_print_ip_attr("ip_ttl", tb[TCA_FLOWER_KEY_IP_TTL],
tb[TCA_FLOWER_KEY_IP_TTL_MASK]);
- flower_print_mpls_opts(" mpls", tb[TCA_FLOWER_KEY_MPLS_OPTS]);
+ flower_print_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS]);
flower_print_u32("mpls_label", tb[TCA_FLOWER_KEY_MPLS_LABEL]);
flower_print_u8("mpls_tc", tb[TCA_FLOWER_KEY_MPLS_TC]);
flower_print_u8("mpls_bos", tb[TCA_FLOWER_KEY_MPLS_BOS]);
--
2.29.2

View File

@ -0,0 +1,68 @@
From 7fcfc0e4d6949ff32df3ed749bad8eb419cebbda Mon Sep 17 00:00:00 2001
Message-Id: <7fcfc0e4d6949ff32df3ed749bad8eb419cebbda.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Wed, 11 Aug 2021 14:49:33 +0200
Subject: [PATCH] tc: u32: Fix key folding in sample option
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979425
Upstream Status: iproute2.git commit 9b7ea92b
commit 9b7ea92b9e3feff2876f772ace01148b7406839c
Author: Phil Sutter <phil@nwl.cc>
Date: Wed Aug 4 11:18:28 2021 +0200
tc: u32: Fix key folding in sample option
In between Linux kernel 2.4 and 2.6, key folding for hash tables changed
in kernel space. When iproute2 dropped support for the older algorithm,
the wrong code was removed and kernel 2.4 folding method remained in
place. To get things functional for recent kernels again, restoring the
old code alone was not sufficient - additional byteorder fixes were
needed.
While being at it, make use of ffs() and thereby align the code with how
kernel determines the shift width.
Fixes: 267480f55383c ("Backout the 2.4 utsname hash patch.")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
tc/f_u32.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/tc/f_u32.c b/tc/f_u32.c
index 2ed5254a..a5747f67 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -978,6 +978,13 @@ show_k:
goto show_k;
}
+static __u32 u32_hash_fold(struct tc_u32_key *key)
+{
+ __u8 fshift = key->mask ? ffs(ntohl(key->mask)) - 1 : 0;
+
+ return ntohl(key->val & key->mask) >> fshift;
+}
+
static int u32_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
@@ -1110,9 +1117,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle,
}
NEXT_ARG();
}
- hash = sel2.keys[0].val & sel2.keys[0].mask;
- hash ^= hash >> 16;
- hash ^= hash >> 8;
+ hash = u32_hash_fold(&sel2.keys[0]);
htid = ((hash % divisor) << 12) | (htid & 0xFFF00000);
sample_ok = 1;
continue;
--
2.31.1

View File

@ -1,67 +0,0 @@
From f2cb0f1570ca603c5d92d6a7d87596d07fdb01cd Mon Sep 17 00:00:00 2001
Message-Id: <f2cb0f1570ca603c5d92d6a7d87596d07fdb01cd.1612868485.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1612868485.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1612868485.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Tue, 9 Feb 2021 12:00:58 +0100
Subject: [PATCH] iproute: force rtm_dst_len to 32/128
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1852038
Upstream Status: unknown commit 5a37254b
commit 5a37254b71249bfb73d44d6278d767a6b127a2f9
Author: Luca Boccassi <bluca@debian.org>
Date: Sun Jan 24 17:36:58 2021 +0000
iproute: force rtm_dst_len to 32/128
Since NETLINK_GET_STRICT_CHK was enabled, the kernel rejects commands
that pass a prefix length, eg:
ip route get `1.0.0.0/1
Error: ipv4: Invalid values in header for route get request.
ip route get 0.0.0.0/0
Error: ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4
Since there's no point in setting a rtm_dst_len that we know is going
to be rejected, just force it to the right value if it's passed on
the command line. Print a warning to stderr to notify users.
Bug-Debian: https://bugs.debian.org/944730
Reported-By: Clément 'wxcafé' Hertling <wxcafe@wxcafe.net>
Signed-off-by: Luca Boccassi <bluca@debian.org>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
ip/iproute.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/ip/iproute.c b/ip/iproute.c
index 05ec2c29..1f3c347e 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -2067,7 +2067,18 @@ static int iproute_get(int argc, char **argv)
if (addr.bytelen)
addattr_l(&req.n, sizeof(req),
RTA_DST, &addr.data, addr.bytelen);
- req.r.rtm_dst_len = addr.bitlen;
+ if (req.r.rtm_family == AF_INET && addr.bitlen != 32) {
+ fprintf(stderr,
+ "Warning: /%u as prefix is invalid, only /32 (or none) is supported.\n",
+ addr.bitlen);
+ req.r.rtm_dst_len = 32;
+ } else if (req.r.rtm_family == AF_INET6 && addr.bitlen != 128) {
+ fprintf(stderr,
+ "Warning: /%u as prefix is invalid, only /128 (or none) is supported.\n",
+ addr.bitlen);
+ req.r.rtm_dst_len = 128;
+ } else
+ req.r.rtm_dst_len = addr.bitlen;
address_found = true;
}
argc--; argv++;
--
2.29.2

View File

@ -0,0 +1,84 @@
From 0b66dc13c157f4d34518c06dd774ef39be0df271 Mon Sep 17 00:00:00 2001
Message-Id: <0b66dc13c157f4d34518c06dd774ef39be0df271.1628790091.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Thu, 12 Aug 2021 18:26:39 +0200
Subject: [PATCH] tc: htb: improve burst error messages
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1910745
Upstream Status: iproute2.git commit e44786b2
commit e44786b26934e4fbf337b0af73a9e6f53d458a25
Author: Andrea Claudi <aclaudi@redhat.com>
Date: Thu May 6 12:42:06 2021 +0200
tc: htb: improve burst error messages
When a wrong value is provided for "burst" or "cburst" parameters, the
resulting error message is unclear and can be misleading:
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger
Illegal "buffer"
The message claims an illegal "buffer" is provided, but neither the
inline help nor the man page list "buffer" among the htb parameters, and
the only way to know that "burst", "maxburst" and "buffer" are synonyms
is to look into tc/q_htb.c.
This commit tries to improve this simply changing the error string to
the parameter name provided in the user-given command, clearly pointing
out where the wrong value is.
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger
Illegal "burst"
$ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100Kbps maxburst errtrigger
Illegal "maxburst"
Reported-by: Sebastian Mitterle <smitterl@redhat.com>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
---
tc/q_htb.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/tc/q_htb.c b/tc/q_htb.c
index 42566355..b5f95f67 100644
--- a/tc/q_htb.c
+++ b/tc/q_htb.c
@@ -125,6 +125,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */
struct rtattr *tail;
__u64 ceil64 = 0, rate64 = 0;
+ char *param;
while (argc > 0) {
if (matches(*argv, "prio") == 0) {
@@ -160,17 +161,19 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
} else if (matches(*argv, "burst") == 0 ||
strcmp(*argv, "buffer") == 0 ||
strcmp(*argv, "maxburst") == 0) {
+ param = *argv;
NEXT_ARG();
if (get_size_and_cell(&buffer, &cell_log, *argv) < 0) {
- explain1("buffer");
+ explain1(param);
return -1;
}
} else if (matches(*argv, "cburst") == 0 ||
strcmp(*argv, "cbuffer") == 0 ||
strcmp(*argv, "cmaxburst") == 0) {
+ param = *argv;
NEXT_ARG();
if (get_size_and_cell(&cbuffer, &ccell_log, *argv) < 0) {
- explain1("cbuffer");
+ explain1(param);
return -1;
}
} else if (strcmp(*argv, "ceil") == 0) {
--
2.31.1

View File

@ -1,104 +0,0 @@
From e5143d1e2787fca4ea365c4010e0da5bcbbbba36 Mon Sep 17 00:00:00 2001
Message-Id: <e5143d1e2787fca4ea365c4010e0da5bcbbbba36.1615575079.git.aclaudi@redhat.com>
In-Reply-To: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1615575079.git.aclaudi@redhat.com>
References: <cb7ce51cc1abd7b98370b903ec96205ebfe48661.1615575079.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Mon, 8 Mar 2021 12:52:23 +0100
Subject: [PATCH] iplink_bareudp: cleanup help message and man page
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1912412
Upstream Status: unknown commit 86d9660d
commit 86d9660dc1805be4435497ff194f618535e8fc97
Author: Guillaume Nault <gnault@redhat.com>
Date: Mon Feb 1 18:44:07 2021 +0100
iplink_bareudp: cleanup help message and man page
* Fix PROTO description in help message (mpls isn't a valid argument).
* Remove SRCPORTMIN description from help message since it doesn't
appear in the syntax string.
* Use same keywords in help message and in man page.
* Use the "ethertype" option name (.B ethertype) rather than the
option value (.I ETHERTYPE) in the man page description of
[no]multiproto.
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
ip/iplink_bareudp.c | 8 +++++---
man/man8/ip-link.8.in | 15 +++++++++------
2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/ip/iplink_bareudp.c b/ip/iplink_bareudp.c
index 860ec699..aa311106 100644
--- a/ip/iplink_bareudp.c
+++ b/ip/iplink_bareudp.c
@@ -22,9 +22,11 @@ static void print_explain(FILE *f)
" [ srcportmin PORT ]\n"
" [ [no]multiproto ]\n"
"\n"
- "Where: PORT := 0-65535\n"
- " PROTO := NUMBER | ip | mpls\n"
- " SRCPORTMIN := 0-65535\n"
+ "Where: PORT := UDP_PORT\n"
+ " PROTO := ETHERTYPE\n"
+ "\n"
+ "Note: ETHERTYPE can be given as number or as protocol name (\"ipv4\", \"ipv6\",\n"
+ " \"mpls_uc\", etc.).\n"
);
}
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index f451ecf3..ce3c8636 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -1304,9 +1304,9 @@ For a link of type
the following additional arguments are supported:
.BI "ip link add " DEVICE
-.BI type " bareudp " dstport " PORT " ethertype " ETHERTYPE"
+.BI type " bareudp " dstport " PORT " ethertype " PROTO"
[
-.BI srcportmin " SRCPORTMIN "
+.BI srcportmin " PORT "
] [
.RB [ no ] multiproto
]
@@ -1317,11 +1317,14 @@ the following additional arguments are supported:
- specifies the destination port for the UDP tunnel.
.sp
-.BI ethertype " ETHERTYPE"
+.BI ethertype " PROTO"
- specifies the ethertype of the L3 protocol being tunnelled.
+.B ethertype
+can be given as plain Ethernet protocol number or using the protocol name
+("ipv4", "ipv6", "mpls_uc", etc.).
.sp
-.BI srcportmin " SRCPORTMIN"
+.BI srcportmin " PORT"
- selects the lowest value of the UDP tunnel source port range.
.sp
@@ -1329,11 +1332,11 @@ the following additional arguments are supported:
- activates support for protocols similar to the one
.RB "specified by " ethertype .
When
-.I ETHERTYPE
+.B ethertype
is "mpls_uc" (that is, unicast MPLS), this allows the tunnel to also handle
multicast MPLS.
When
-.I ETHERTYPE
+.B ethertype
is "ipv4", this allows the tunnel to also handle IPv6. This option is disabled
by default.
--
2.30.2

View File

@ -0,0 +1,67 @@
From d1f0f7f4e3e3a372a51e64bdd88f8ddecde1fbbf Mon Sep 17 00:00:00 2001
Message-Id: <d1f0f7f4e3e3a372a51e64bdd88f8ddecde1fbbf.1633614399.git.aclaudi@redhat.com>
In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1633614399.git.aclaudi@redhat.com>
References: <650694eb0120722499207078f965442ef7343bb1.1633614399.git.aclaudi@redhat.com>
From: Andrea Claudi <aclaudi@redhat.com>
Date: Tue, 28 Sep 2021 11:46:43 +0200
Subject: [PATCH] lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1995082
Upstream Status: iproute2.git commit 2f5825cb
commit 2f5825cb38028a14961a79844a069be4e3057eca
Author: Andrea Claudi <aclaudi@redhat.com>
Date: Tue Sep 21 11:33:24 2021 +0200
lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists
bpf selftests using iproute2 fails with:
$ ip link set dev veth0 xdp object ../bpf/xdp_dummy.o section xdp_dummy
Continuing without mounted eBPF fs. Too old kernel?
mkdir (null)/globals failed: No such file or directory
Unable to load program
This happens when the /sys/fs/bpf directory exists. In this case, mkdir
in bpf_mnt_check_target() fails with errno == EEXIST, and the function
returns -1. Thus bpf_get_work_dir() does not call bpf_mnt_fs() and the
bpffs is not mounted.
Fix this in bpf_mnt_check_target(), returning 0 when the mountpoint
exists.
Fixes: d4fcdbbec9df ("lib/bpf: Fix and simplify bpf_mnt_check_target()")
Reported-by: Mingyu Shi <mshi@redhat.com>
Reported-by: Jiri Benc <jbenc@redhat.com>
Suggested-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
lib/bpf_legacy.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c
index 7ec9ce9d..f9dfad6e 100644
--- a/lib/bpf_legacy.c
+++ b/lib/bpf_legacy.c
@@ -513,9 +513,12 @@ static int bpf_mnt_check_target(const char *target)
int ret;
ret = mkdir(target, S_IRWXU);
- if (ret && errno != EEXIST)
+ if (ret) {
+ if (errno == EEXIST)
+ return 0;
fprintf(stderr, "mkdir %s failed: %s\n", target,
strerror(errno));
+ }
return ret;
}
--
2.31.1

5
SOURCES/iproute2.sh Normal file
View File

@ -0,0 +1,5 @@
# tc initialization script (sh)
if [ -z "$TC_LIB_DIR" ]; then
export TC_LIB_DIR=/usr/lib64/tc
fi

View File

@ -1,30 +1,28 @@
%define rpmversion 5.9.0
%define specrelease 4%{?dist}
%define pkg_release %{specrelease}%{?buildid}
Summary: Advanced IP routing and network device configuration tools Summary: Advanced IP routing and network device configuration tools
Name: iproute Name: iproute
Version: %{rpmversion} Version: 5.12.0
Release: %{pkg_release} Release: 4%{?dist}%{?buildid}
Group: Applications/System Group: Applications/System
URL: http://kernel.org/pub/linux/utils/net/%{name}2/ URL: http://kernel.org/pub/linux/utils/net/%{name}2/
Source0: http://kernel.org/pub/linux/utils/net/%{name}2/%{name}2-%{version}.tar.xz Source0: http://kernel.org/pub/linux/utils/net/%{name}2/%{name}2-%{version}.tar.xz
Source1: rt_dsfield.deprecated Source1: rt_dsfield.deprecated
Patch0: 0001-v5.9.0.patch Source2: iproute2.sh
Patch1: 0002-Update-kernel-headers.patch Patch0: 0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch
Patch2: 0003-m_vlan-add-pop_eth-and-push_eth-actions.patch Patch1: 0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch
Patch3: 0004-m_mpls-add-mac_push-action.patch Patch2: 0003-mptcp-add-support-for-port-based-endpoint.patch
Patch4: 0005-m_mpls-test-the-mac_push-action-after-modify.patch Patch3: 0004-Update-kernel-headers.patch
Patch5: 0006-tc-vlan-fix-help-and-error-message-strings.patch Patch4: 0005-police-add-support-for-packet-per-second-rate-limiti.patch
Patch6: 0007-tc-mpls-fix-manpage-example-and-help-message-string.patch Patch5: 0006-police-Add-support-for-json-output.patch
Patch7: 0008-tc-flower-fix-json-output-with-mpls-lse.patch Patch6: 0007-police-Fix-normal-output-back-to-what-it-was.patch
Patch8: 0009-iproute-force-rtm_dst_len-to-32-128.patch Patch7: 0008-tc-u32-Fix-key-folding-in-sample-option.patch
Patch9: 0010-iplink_bareudp-cleanup-help-message-and-man-page.patch Patch8: 0009-tc-htb-improve-burst-error-messages.patch
Patch9: 0010-lib-bpf_legacy-fix-bpffs-mount-when-sys-fs-bpf-exist.patch
License: GPLv2+ and Public Domain License: GPLv2+ and Public Domain
BuildRequires: bison BuildRequires: bison
BuildRequires: elfutils-libelf-devel BuildRequires: elfutils-libelf-devel
BuildRequires: flex BuildRequires: flex
BuildRequires: iptables-devel >= 1.4.5 BuildRequires: iptables-devel >= 1.4.5
BuildRequires: libbpf-devel
BuildRequires: libdb-devel BuildRequires: libdb-devel
BuildRequires: libmnl-devel BuildRequires: libmnl-devel
BuildRequires: libselinux-devel BuildRequires: libselinux-devel
@ -36,6 +34,7 @@ BuildRequires: linux-atm-libs-devel
%endif %endif
# For the UsrMove transition period # For the UsrMove transition period
Conflicts: filesystem < 3 Conflicts: filesystem < 3
Requires: libbpf
Provides: /sbin/ip Provides: /sbin/ip
Obsoletes: %{name} < 4.5.0-3 Obsoletes: %{name} < 4.5.0-3
@ -50,7 +49,7 @@ Group: Applications/System
License: GPLv2+ License: GPLv2+
Obsoletes: %{name} < 4.5.0-3 Obsoletes: %{name} < 4.5.0-3
Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}%{?_isa} = %{version}-%{release}
Provides: tc Provides: /sbin/tc
%description tc %description tc
The Traffic Control utility manages queueing disciplines, their classes and The Traffic Control utility manages queueing disciplines, their classes and
@ -82,20 +81,12 @@ The libnetlink static library.
%autosetup -p1 -n %{name}2-%{version} %autosetup -p1 -n %{name}2-%{version}
%build %build
export CFLAGS="%{optflags} -fno-exceptions" %configure
export LDFLAGS="%{build_ldflags}" %make_build
export LIBDIR=/%{_libdir}
export IPT_LIB_DIR=/%{_lib}/xtables
./configure
make %{?_smp_mflags}
%install %install
export DESTDIR='%{buildroot}'
export SBINDIR='%{_sbindir}' export SBINDIR='%{_sbindir}'
export MANDIR='%{_mandir}'
export LIBDIR='%{_libdir}' export LIBDIR='%{_libdir}'
export CONFDIR='%{_sysconfdir}/iproute2'
export DOCDIR='%{_docdir}'
%make_install %make_install
echo '.so man8/tc-cbq.8' > %{buildroot}%{_mandir}/man8/cbq.8 echo '.so man8/tc-cbq.8' > %{buildroot}%{_mandir}/man8/cbq.8
@ -110,6 +101,9 @@ rm -rf '%{buildroot}%{_docdir}'
# Append deprecated values to rt_dsfield for compatibility reasons # Append deprecated values to rt_dsfield for compatibility reasons
cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
# use TC_LIB_DIR environment variable
install -D -m644 %{SOURCE2} %{buildroot}%{_sysconfdir}/profile.d/iproute2.sh
%files %files
%dir %{_sysconfdir}/iproute2 %dir %{_sysconfdir}/iproute2
%{!?_licensedir:%global license %%doc} %{!?_licensedir:%global license %%doc}
@ -128,6 +122,7 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
%files tc %files tc
%{!?_licensedir:%global license %%doc} %{!?_licensedir:%global license %%doc}
%license COPYING %license COPYING
%{_sysconfdir}/profile.d/iproute2.sh
%{_mandir}/man7/tc-* %{_mandir}/man7/tc-*
%{_mandir}/man8/tc* %{_mandir}/man8/tc*
%{_mandir}/man8/cbq* %{_mandir}/man8/cbq*
@ -152,6 +147,28 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield
%{_includedir}/iproute2/bpf_elf.h %{_includedir}/iproute2/bpf_elf.h
%changelog %changelog
* Thu Oct 07 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-4.el8]
- lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists (Andrea Claudi) [1995082]
* Thu Aug 12 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-3.el8]
- tc: htb: improve burst error messages (Andrea Claudi) [1910745]
- tc: u32: Fix key folding in sample option (Andrea Claudi) [1979425]
- police: Fix normal output back to what it was (Andrea Claudi) [1981393]
- police: Add support for json output (Andrea Claudi) [1981393]
- police: add support for packet-per-second rate limiting (Andrea Claudi) [1981393]
- Update kernel headers (Andrea Claudi) [1981393]
- mptcp: add support for port based endpoint (Andrea Claudi) [1984733]
* Fri Aug 08 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-2.el8]
- add build and run-time dependencies on libbpf (Andrea Claudi) [1990402]
* Mon Jun 28 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-1.el8]
- tc: f_flower: Add missing ct_state flags to usage description (Andrea Claudi) [1957243]
- tc: f_flower: Add option to match on related ct state (Andrea Claudi) [1957243]
* Thu Apr 29 2021 Andrea Claudi <aclaudi@redhat.com> [5.12.0-0.el8]
- New version 5.12.0 [1939382]
* Fri Mar 12 2021 Andrea Claudi <aclaudi@redhat.com> [5.9.0-4.el8] * Fri Mar 12 2021 Andrea Claudi <aclaudi@redhat.com> [5.9.0-4.el8]
- iplink_bareudp: cleanup help message and man page (Andrea Claudi) [1912412] - iplink_bareudp: cleanup help message and man page (Andrea Claudi) [1912412]