diff --git a/.gitignore b/.gitignore index 581167a..8da3976 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/iproute2-5.9.0.tar.xz +SOURCES/iproute2-5.12.0.tar.xz diff --git a/.iproute.metadata b/.iproute.metadata index 3b76ec4..7ed2e23 100644 --- a/.iproute.metadata +++ b/.iproute.metadata @@ -1 +1 @@ -c9e0ca453307ce7c221ccffc10939f4136b4ad5d SOURCES/iproute2-5.9.0.tar.xz +4e18c1d72a29f41a5968ac8a9b266470f6ad89a7 SOURCES/iproute2-5.12.0.tar.xz diff --git a/SOURCES/0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch b/SOURCES/0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch new file mode 100644 index 0000000..d68d739 --- /dev/null +++ b/SOURCES/0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch @@ -0,0 +1,73 @@ +From d9bcc70051d23c62cc802a356dc7e4324398765e Mon Sep 17 00:00:00 2001 +Message-Id: +From: Andrea Claudi +Date: Mon, 28 Jun 2021 15:22:17 +0200 +Subject: [PATCH] tc: f_flower: Add option to match on related ct state + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1957243 +Upstream Status: unknown commit 7fda6c58 + +commit 7fda6c588a295ad381fdf0b9b9971169b2f9d9dc +Author: Ariel Levkovich +Date: Fri May 21 20:07:06 2021 +0300 + + tc: f_flower: Add option to match on related ct state + + Add support for matching on ct_state flag related. + The related state indicates a packet is associated with an existing + connection. + + Example: + $ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \ + ct_state -est-rel+trk \ + action mirred egress redirect dev ens1f0_1 + + $ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \ + ct_state +rel+trk \ + action mirred egress redirect dev ens1f0_1 + + Signed-off-by: Ariel Levkovich + Reviewed-by: Jiri Pirko + Signed-off-by: David Ahern +--- + man/man8/tc-flower.8 | 2 ++ + tc/f_flower.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8 +index f7336b62..4541d937 100644 +--- a/man/man8/tc-flower.8 ++++ b/man/man8/tc-flower.8 +@@ -391,6 +391,8 @@ rpl - The packet is in the reply direction, meaning that it is in the opposite d + .TP + inv - The state is invalid. The packet couldn't be associated to a connection. + .TP ++rel - The packet is related to an existing connection. ++.TP + Example: +trk+est + .RE + .TP +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 53822a95..29db2e23 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -94,7 +94,7 @@ static void explain(void) + " LSE := lse depth DEPTH { label LABEL | tc TC | bos BOS | ttl TTL }\n" + " FILTERID := X:Y:Z\n" + " MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n" +- " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new\n" ++ " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel\n" + " ACTION-SPEC := ... look at individual actions\n" + "\n" + "NOTE: CLASSID, IP-PROTO are parsed as hexadecimal input.\n" +@@ -345,6 +345,7 @@ static struct flower_ct_states { + { "trk", TCA_FLOWER_KEY_CT_FLAGS_TRACKED }, + { "new", TCA_FLOWER_KEY_CT_FLAGS_NEW }, + { "est", TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED }, ++ { "rel", TCA_FLOWER_KEY_CT_FLAGS_RELATED }, + { "inv", TCA_FLOWER_KEY_CT_FLAGS_INVALID }, + { "rpl", TCA_FLOWER_KEY_CT_FLAGS_REPLY }, + }; +-- +2.31.1 + diff --git a/SOURCES/0001-v5.9.0.patch b/SOURCES/0001-v5.9.0.patch deleted file mode 100644 index f0868bb..0000000 --- a/SOURCES/0001-v5.9.0.patch +++ /dev/null @@ -1,20 +0,0 @@ -From cb7ce51cc1abd7b98370b903ec96205ebfe48661 Mon Sep 17 00:00:00 2001 -Message-Id: -From: Stephen Hemminger -Date: Thu, 15 Oct 2020 15:18:35 -0700 -Subject: [PATCH] v5.9.0 - ---- - include/version.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/version.h b/include/version.h -index 0088493d..89d05974 100644 ---- a/include/version.h -+++ b/include/version.h -@@ -1 +1 @@ --static const char version[] = "5.8.0"; -+static const char version[] = "5.9.0"; --- -2.29.2 - diff --git a/SOURCES/0002-Update-kernel-headers.patch b/SOURCES/0002-Update-kernel-headers.patch deleted file mode 100644 index 60a94f4..0000000 --- a/SOURCES/0002-Update-kernel-headers.patch +++ /dev/null @@ -1,308 +0,0 @@ -From 1b8a3c04bf8d115e2d427d41a437be03ecf34ce8 Mon Sep 17 00:00:00 2001 -Message-Id: <1b8a3c04bf8d115e2d427d41a437be03ecf34ce8.1611877215.git.aclaudi@redhat.com> -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:34:34 +0100 -Subject: [PATCH] Update kernel headers - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit 34be2d26 -Conflicts: on include/uapi/linux/bpf.h, due to missing commits: - - c8eb4b52c1b1 ("Update kernel headers") - - f481515c89fa ("Update kernel headers") - -commit 34be2d2619e29836605a7d1669d642f892fc725e -Author: David Ahern -Date: Wed Oct 7 00:01:26 2020 -0600 - - Update kernel headers - - Update kernel headers to commit: - 9faebeb2d800 ("Merge branch 'ethtool-allow-dumping-policies-to-user-space'") - - Signed-off-by: David Ahern ---- - include/uapi/linux/bpf.h | 64 +++++++++++++++++++++++++---- - include/uapi/linux/devlink.h | 5 +++ - include/uapi/linux/genetlink.h | 11 +++++ - include/uapi/linux/l2tp.h | 1 + - include/uapi/linux/netlink.h | 2 + - include/uapi/linux/tc_act/tc_mpls.h | 1 + - include/uapi/linux/tc_act/tc_vlan.h | 4 ++ - 7 files changed, 79 insertions(+), 9 deletions(-) - -diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h -index b21cc6af..36e5bc2d 100644 ---- a/include/uapi/linux/bpf.h -+++ b/include/uapi/linux/bpf.h -@@ -404,6 +404,9 @@ enum { - - /* Enable memory-mapping BPF map */ - BPF_F_MMAPABLE = (1U << 10), -+ -+/* Share perf_event among processes */ -+ BPF_F_PRESERVE_ELEMS = (1U << 11), - }; - - /* Flags for BPF_PROG_QUERY. */ -@@ -414,6 +417,11 @@ enum { - */ - #define BPF_F_QUERY_EFFECTIVE (1U << 0) - -+/* Flags for BPF_PROG_TEST_RUN */ -+ -+/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ -+#define BPF_F_TEST_RUN_ON_CPU (1U << 0) -+ - /* type for BPF_ENABLE_STATS */ - enum bpf_stats_type { - /* enabled run_time_ns and run_cnt */ -@@ -556,6 +564,8 @@ union bpf_attr { - */ - __aligned_u64 ctx_in; - __aligned_u64 ctx_out; -+ __u32 flags; -+ __u32 cpu; - } test; - - struct { /* anonymous struct used by BPF_*_GET_*_ID */ -@@ -622,8 +632,13 @@ union bpf_attr { - }; - __u32 attach_type; /* attach type */ - __u32 flags; /* extra flags */ -- __aligned_u64 iter_info; /* extra bpf_iter_link_info */ -- __u32 iter_info_len; /* iter_info length */ -+ union { -+ __u32 target_btf_id; /* btf_id of target to attach to */ -+ struct { -+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */ -+ __u32 iter_info_len; /* iter_info length */ -+ }; -+ }; - } link_create; - - struct { /* struct used by BPF_LINK_UPDATE command */ -@@ -2496,7 +2511,7 @@ union bpf_attr { - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - * -- * long bpf_sk_release(struct bpf_sock *sock) -+ * long bpf_sk_release(void *sock) - * Description - * Release the reference held by *sock*. *sock* must be a - * non-**NULL** pointer that was returned from -@@ -2676,7 +2691,7 @@ union bpf_attr { - * result is from *reuse*\ **->socks**\ [] using the hash of the - * tuple. - * -- * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) -+ * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) - * Description - * Check whether *iph* and *th* contain a valid SYN cookie ACK for - * the listening socket in *sk*. -@@ -2842,6 +2857,7 @@ union bpf_attr { - * 0 on success. - * - * **-ENOENT** if the bpf-local-storage cannot be found. -+ * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). - * - * long bpf_send_signal(u32 sig) - * Description -@@ -2858,7 +2874,7 @@ union bpf_attr { - * - * **-EAGAIN** if bpf program can try again. - * -- * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) -+ * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) - * Description - * Try to issue a SYN cookie for the packet with corresponding - * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. -@@ -3087,7 +3103,7 @@ union bpf_attr { - * Return - * The id is returned or 0 in case the id could not be retrieved. - * -- * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) -+ * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) - * Description - * Helper is overloaded depending on BPF program type. This - * description applies to **BPF_PROG_TYPE_SCHED_CLS** and -@@ -3215,11 +3231,11 @@ union bpf_attr { - * - * **-EOVERFLOW** if an overflow happened: The same object will be tried again. - * -- * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) -+ * u64 bpf_sk_cgroup_id(void *sk) - * Description - * Return the cgroup v2 id of the socket *sk*. - * -- * *sk* must be a non-**NULL** pointer to a full socket, e.g. one -+ * *sk* must be a non-**NULL** pointer to a socket, e.g. one - * returned from **bpf_sk_lookup_xxx**\ (), - * **bpf_sk_fullsock**\ (), etc. The format of returned id is - * same as in **bpf_skb_cgroup_id**\ (). -@@ -3229,7 +3245,7 @@ union bpf_attr { - * Return - * The id is returned or 0 in case the id could not be retrieved. - * -- * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) -+ * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *sk* at the *ancestor_level*. The root cgroup is at -@@ -4447,4 +4463,34 @@ struct bpf_sk_lookup { - __u32 local_port; /* Host byte order */ - }; - -+/* -+ * struct btf_ptr is used for typed pointer representation; the -+ * type id is used to render the pointer data as the appropriate type -+ * via the bpf_snprintf_btf() helper described above. A flags field - -+ * potentially to specify additional details about the BTF pointer -+ * (rather than its mode of display) - is included for future use. -+ * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. -+ */ -+struct btf_ptr { -+ void *ptr; -+ __u32 type_id; -+ __u32 flags; /* BTF ptr flags; unused at present. */ -+}; -+ -+/* -+ * Flags to control bpf_snprintf_btf() behaviour. -+ * - BTF_F_COMPACT: no formatting around type information -+ * - BTF_F_NONAME: no struct/union member names/types -+ * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; -+ * equivalent to %px. -+ * - BTF_F_ZERO: show zero-valued struct/union members; they -+ * are not displayed by default -+ */ -+enum { -+ BTF_F_COMPACT = (1ULL << 0), -+ BTF_F_NONAME = (1ULL << 1), -+ BTF_F_PTR_RAW = (1ULL << 2), -+ BTF_F_ZERO = (1ULL << 3), -+}; -+ - #endif /* __LINUX_BPF_H__ */ -diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h -index b7f23faa..e5586fa0 100644 ---- a/include/uapi/linux/devlink.h -+++ b/include/uapi/linux/devlink.h -@@ -13,6 +13,8 @@ - #ifndef _LINUX_DEVLINK_H_ - #define _LINUX_DEVLINK_H_ - -+#include -+ - #define DEVLINK_GENL_NAME "devlink" - #define DEVLINK_GENL_VERSION 0x1 - #define DEVLINK_GENL_MCGRP_CONFIG_NAME "config" -@@ -193,6 +195,9 @@ enum devlink_port_flavour { - * port that faces the PCI VF. - */ - DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ -+ DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but -+ * is not used in any way. -+ */ - }; - - enum devlink_param_cmode { -diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h -index 7c6c390c..9fa720ee 100644 ---- a/include/uapi/linux/genetlink.h -+++ b/include/uapi/linux/genetlink.h -@@ -64,6 +64,8 @@ enum { - CTRL_ATTR_OPS, - CTRL_ATTR_MCAST_GROUPS, - CTRL_ATTR_POLICY, -+ CTRL_ATTR_OP_POLICY, -+ CTRL_ATTR_OP, - __CTRL_ATTR_MAX, - }; - -@@ -85,6 +87,15 @@ enum { - __CTRL_ATTR_MCAST_GRP_MAX, - }; - -+enum { -+ CTRL_ATTR_POLICY_UNSPEC, -+ CTRL_ATTR_POLICY_DO, -+ CTRL_ATTR_POLICY_DUMP, -+ -+ __CTRL_ATTR_POLICY_DUMP_MAX, -+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 -+}; -+ - #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) - - -diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h -index 131c3a26..abc0fc81 100644 ---- a/include/uapi/linux/l2tp.h -+++ b/include/uapi/linux/l2tp.h -@@ -144,6 +144,7 @@ enum { - L2TP_ATTR_RX_OOS_PACKETS, /* u64 */ - L2TP_ATTR_RX_ERRORS, /* u64 */ - L2TP_ATTR_STATS_PAD, -+ L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ - __L2TP_ATTR_STATS_MAX, - }; - -diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h -index 695c88e3..f7749205 100644 ---- a/include/uapi/linux/netlink.h -+++ b/include/uapi/linux/netlink.h -@@ -327,6 +327,7 @@ enum netlink_attribute_type { - * the index, if limited inside the nesting (U32) - * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the - * bitfield32 type (U32) -+ * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) - * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment - */ - enum netlink_policy_type_attr { -@@ -342,6 +343,7 @@ enum netlink_policy_type_attr { - NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, - NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, - NL_POLICY_TYPE_ATTR_PAD, -+ NL_POLICY_TYPE_ATTR_MASK, - - /* keep last */ - __NL_POLICY_TYPE_ATTR_MAX, -diff --git a/include/uapi/linux/tc_act/tc_mpls.h b/include/uapi/linux/tc_act/tc_mpls.h -index 9360e952..9e4e8f52 100644 ---- a/include/uapi/linux/tc_act/tc_mpls.h -+++ b/include/uapi/linux/tc_act/tc_mpls.h -@@ -10,6 +10,7 @@ - #define TCA_MPLS_ACT_PUSH 2 - #define TCA_MPLS_ACT_MODIFY 3 - #define TCA_MPLS_ACT_DEC_TTL 4 -+#define TCA_MPLS_ACT_MAC_PUSH 5 - - struct tc_mpls { - tc_gen; /* generic TC action fields. */ -diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h -index 168995b5..5b306fe8 100644 ---- a/include/uapi/linux/tc_act/tc_vlan.h -+++ b/include/uapi/linux/tc_act/tc_vlan.h -@@ -16,6 +16,8 @@ - #define TCA_VLAN_ACT_POP 1 - #define TCA_VLAN_ACT_PUSH 2 - #define TCA_VLAN_ACT_MODIFY 3 -+#define TCA_VLAN_ACT_POP_ETH 4 -+#define TCA_VLAN_ACT_PUSH_ETH 5 - - struct tc_vlan { - tc_gen; -@@ -30,6 +32,8 @@ enum { - TCA_VLAN_PUSH_VLAN_PROTOCOL, - TCA_VLAN_PAD, - TCA_VLAN_PUSH_VLAN_PRIORITY, -+ TCA_VLAN_PUSH_ETH_DST, -+ TCA_VLAN_PUSH_ETH_SRC, - __TCA_VLAN_MAX, - }; - #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) --- -2.29.2 - diff --git a/SOURCES/0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch b/SOURCES/0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch new file mode 100644 index 0000000..023a925 --- /dev/null +++ b/SOURCES/0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch @@ -0,0 +1,43 @@ +From 5f12d06dac98f9085273ce548d2ed13341c920fe Mon Sep 17 00:00:00 2001 +Message-Id: <5f12d06dac98f9085273ce548d2ed13341c920fe.1624894546.git.aclaudi@redhat.com> +In-Reply-To: +References: +From: Andrea Claudi +Date: Mon, 28 Jun 2021 15:22:17 +0200 +Subject: [PATCH] tc: f_flower: Add missing ct_state flags to usage description + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1957243 +Upstream Status: unknown commit 825bd5da + +commit 825bd5dacb98597a5595b470bd275bb103a7b9c2 +Author: Ariel Levkovich +Date: Fri May 21 20:07:07 2021 +0300 + + tc: f_flower: Add missing ct_state flags to usage description + + Add ct_state flags rpl and inv to the commands usage + description + + Signed-off-by: Ariel Levkovich + Reviewed-by: Jiri Pirko + Signed-off-by: David Ahern +--- + tc/f_flower.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tc/f_flower.c b/tc/f_flower.c +index 29db2e23..c5af0276 100644 +--- a/tc/f_flower.c ++++ b/tc/f_flower.c +@@ -94,7 +94,7 @@ static void explain(void) + " LSE := lse depth DEPTH { label LABEL | tc TC | bos BOS | ttl TTL }\n" + " FILTERID := X:Y:Z\n" + " MASKED_LLADDR := { LLADDR | LLADDR/MASK | LLADDR/BITS }\n" +- " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel\n" ++ " MASKED_CT_STATE := combination of {+|-} and flags trk,est,new,rel,rpl,inv\n" + " ACTION-SPEC := ... look at individual actions\n" + "\n" + "NOTE: CLASSID, IP-PROTO are parsed as hexadecimal input.\n" +-- +2.31.1 + diff --git a/SOURCES/0003-m_vlan-add-pop_eth-and-push_eth-actions.patch b/SOURCES/0003-m_vlan-add-pop_eth-and-push_eth-actions.patch deleted file mode 100644 index fa1b401..0000000 --- a/SOURCES/0003-m_vlan-add-pop_eth-and-push_eth-actions.patch +++ /dev/null @@ -1,343 +0,0 @@ -From cac52dd831b6982f6b27b02c26243edbe0b7d747 Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:03 +0100 -Subject: [PATCH] m_vlan: add pop_eth and push_eth actions - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit d61167dd - -commit d61167dd88b45832843b1458cd156f3b85c8ff16 -Author: Guillaume Nault -Date: Mon Oct 19 17:23:01 2020 +0200 - - m_vlan: add pop_eth and push_eth actions - - Add support for the new TCA_VLAN_ACT_POP_ETH and TCA_VLAN_ACT_PUSH_ETH - actions (kernel commit 19fbcb36a39e ("net/sched: act_vlan: - Add {POP,PUSH}_ETH actions"). These action let TC remove or add the - Ethernet at the head of a frame. - - Drop an Ethernet header: - # tc filter add dev ethX matchall action vlan pop_eth - - Push an Ethernet header (the original frame must have no MAC header): - # tc filter add dev ethX matchall action vlan \ - push_eth dst_mac 0a:00:00:00:00:02 src_mac 0a:00:00:00:00:01 - - Also add a test suite for m_vlan, which covers these new actions and - the pre-existing ones. - - Signed-off-by: Guillaume Nault - Signed-off-by: David Ahern ---- - man/man8/tc-vlan.8 | 39 +++++++++++++++++- - tc/m_vlan.c | 69 +++++++++++++++++++++++++++++++ - testsuite/tests/tc/vlan.t | 86 +++++++++++++++++++++++++++++++++++++++ - 3 files changed, 192 insertions(+), 2 deletions(-) - create mode 100755 testsuite/tests/tc/vlan.t - -diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 -index f5ffc25f..5c2808b1 100644 ---- a/man/man8/tc-vlan.8 -+++ b/man/man8/tc-vlan.8 -@@ -5,8 +5,8 @@ vlan - vlan manipulation module - .SH SYNOPSIS - .in +8 - .ti -8 --.BR tc " ... " "action vlan" " { " pop " |" --.IR PUSH " | " MODIFY " } [ " CONTROL " ]" -+.BR tc " ... " "action vlan" " { " pop " | " pop_eth " |" -+.IR PUSH " | " MODIFY " | " PUSH_ETH " } [ " CONTROL " ]" - - .ti -8 - .IR PUSH " := " -@@ -24,6 +24,11 @@ vlan - vlan manipulation module - .IR VLANPRIO " ] " - .BI id " VLANID" - -+.ti -8 -+.IR PUSH_ETH " := " -+.B push_eth -+.BI dst_mac " LLADDR " src_mac " LLADDR " -+ - .ti -8 - .IR CONTROL " := { " - .BR reclassify " | " pipe " | " drop " | " continue " | " pass " | " goto " " chain " " CHAIN_INDEX " }" -@@ -43,6 +48,20 @@ modes require at least a - and allow to optionally choose the - .I VLANPROTO - to use. -+ -+The -+.B vlan -+action can also be used to add or remove the base Ethernet header. The -+.B pop_eth -+mode, which takes no argument, is used to remove the base Ethernet header. All -+existing VLANs must have been previously dropped. The opposite operation, -+adding a base Ethernet header, is done with the -+.B push_eth -+mode. In that case, the packet must have no MAC header (stacking MAC headers is -+not permitted). This mode is mostly useful when a previous action has -+encapsulated the whole original frame behind a network header and one needs -+to prepend an Ethernet header before forwarding the resulting packet. -+ - .SH OPTIONS - .TP - .B pop -@@ -58,6 +77,16 @@ Replace mode. Existing 802.1Q tag is replaced. Requires at least - .B id - option. - .TP -+.B pop_eth -+Ethernet header decapsulation mode. Only works on a plain Ethernet header: -+VLANs, if any, must be removed first. -+.TP -+.B push_eth -+Ethernet header encapsulation mode. The Ethertype is automatically set -+using the network header type. Chaining Ethernet headers is not allowed: the -+packet must have no MAC header when using this mode. Requires the -+.BR "dst_mac " and " src_mac " options. -+.TP - .BI id " VLANID" - Specify the VLAN ID to encapsulate into. - .I VLANID -@@ -73,6 +102,12 @@ Choose the VLAN protocol to use. At the time of writing, the kernel accepts only - .BI priority " VLANPRIO" - Choose the VLAN priority to use. Decimal number in range of 0-7. - .TP -+.BI dst_mac " LLADDR" -+Choose the destination MAC address to use. -+.TP -+.BI src_mac " LLADDR" -+Choose the source MAC address to use. -+.TP - .I CONTROL - How to continue after executing this action. - .RS -diff --git a/tc/m_vlan.c b/tc/m_vlan.c -index 1096ba0f..e6b21330 100644 ---- a/tc/m_vlan.c -+++ b/tc/m_vlan.c -@@ -23,6 +23,8 @@ static const char * const action_names[] = { - [TCA_VLAN_ACT_POP] = "pop", - [TCA_VLAN_ACT_PUSH] = "push", - [TCA_VLAN_ACT_MODIFY] = "modify", -+ [TCA_VLAN_ACT_POP_ETH] = "pop_eth", -+ [TCA_VLAN_ACT_PUSH_ETH] = "push_eth", - }; - - static void explain(void) -@@ -31,6 +33,8 @@ static void explain(void) - "Usage: vlan pop\n" - " vlan push [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" - " vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" -+ " vlan pop_eth [CONTROL]\n" -+ " vlan push_eth dst_mac LLADDR src_mac LLADDR [CONTROL]\n" - " VLANPROTO is one of 802.1Q or 802.1AD\n" - " with default: 802.1Q\n" - " CONTROL := reclassify | pipe | drop | continue | pass |\n" -@@ -63,6 +67,10 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, - char **argv = *argv_p; - struct rtattr *tail; - int action = 0; -+ char dst_mac[ETH_ALEN] = {}; -+ int dst_mac_set = 0; -+ char src_mac[ETH_ALEN] = {}; -+ int src_mac_set = 0; - __u16 id; - int id_set = 0; - __u16 proto; -@@ -95,6 +103,18 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, - return -1; - } - action = TCA_VLAN_ACT_MODIFY; -+ } else if (matches(*argv, "pop_eth") == 0) { -+ if (action) { -+ unexpected(*argv); -+ return -1; -+ } -+ action = TCA_VLAN_ACT_POP_ETH; -+ } else if (matches(*argv, "push_eth") == 0) { -+ if (action) { -+ unexpected(*argv); -+ return -1; -+ } -+ action = TCA_VLAN_ACT_PUSH_ETH; - } else if (matches(*argv, "id") == 0) { - if (!has_push_attribs(action)) - invarg("only valid for push/modify", *argv); -@@ -119,6 +139,22 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, - if (get_u8(&prio, *argv, 0) || (prio & ~0x7)) - invarg("prio is invalid", *argv); - prio_set = 1; -+ } else if (matches(*argv, "dst_mac") == 0) { -+ if (action != TCA_VLAN_ACT_PUSH_ETH) -+ invarg("only valid for push_eth", *argv); -+ -+ NEXT_ARG(); -+ if (ll_addr_a2n(dst_mac, sizeof(dst_mac), *argv) < 0) -+ invarg("dst_mac is invalid", *argv); -+ dst_mac_set = 1; -+ } else if (matches(*argv, "src_mac") == 0) { -+ if (action != TCA_VLAN_ACT_PUSH_ETH) -+ invarg("only valid for push_eth", *argv); -+ -+ NEXT_ARG(); -+ if (ll_addr_a2n(src_mac, sizeof(src_mac), *argv) < 0) -+ invarg("src_mac is invalid", *argv); -+ src_mac_set = 1; - } else if (matches(*argv, "help") == 0) { - usage(); - } else { -@@ -150,6 +186,20 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, - return -1; - } - -+ if (action == TCA_VLAN_ACT_PUSH_ETH) { -+ if (!dst_mac_set) { -+ fprintf(stderr, "dst_mac needs to be set for %s\n", -+ action_names[action]); -+ explain(); -+ return -1; -+ } else if (!src_mac_set) { -+ fprintf(stderr, "src_mac needs to be set for %s\n", -+ action_names[action]); -+ explain(); -+ return -1; -+ } -+ } -+ - parm.v_action = action; - tail = addattr_nest(n, MAX_MSG, tca_id); - addattr_l(n, MAX_MSG, TCA_VLAN_PARMS, &parm, sizeof(parm)); -@@ -167,6 +217,12 @@ static int parse_vlan(struct action_util *a, int *argc_p, char ***argv_p, - } - if (prio_set) - addattr8(n, MAX_MSG, TCA_VLAN_PUSH_VLAN_PRIORITY, prio); -+ if (dst_mac_set) -+ addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_DST, dst_mac, -+ sizeof(dst_mac)); -+ if (src_mac_set) -+ addattr_l(n, MAX_MSG, TCA_VLAN_PUSH_ETH_SRC, src_mac, -+ sizeof(src_mac)); - - addattr_nest_end(n, tail); - -@@ -216,6 +272,19 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) - print_uint(PRINT_ANY, "priority", " priority %u", val); - } - break; -+ case TCA_VLAN_ACT_PUSH_ETH: -+ if (tb[TCA_VLAN_PUSH_ETH_DST] && -+ RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_DST]) == ETH_ALEN) { -+ ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_DST]), -+ ETH_ALEN, 0, b1, sizeof(b1)); -+ print_string(PRINT_ANY, "dst_mac", " dst_mac %s", b1); -+ } -+ if (tb[TCA_VLAN_PUSH_ETH_SRC && -+ RTA_PAYLOAD(tb[TCA_VLAN_PUSH_ETH_SRC]) == ETH_ALEN]) { -+ ll_addr_n2a(RTA_DATA(tb[TCA_VLAN_PUSH_ETH_SRC]), -+ ETH_ALEN, 0, b1, sizeof(b1)); -+ print_string(PRINT_ANY, "src_mac", " src_mac %s", b1); -+ } - } - print_action_control(f, " ", parm->action, ""); - -diff --git a/testsuite/tests/tc/vlan.t b/testsuite/tests/tc/vlan.t -new file mode 100755 -index 00000000..b86dc364 ---- /dev/null -+++ b/testsuite/tests/tc/vlan.t -@@ -0,0 +1,86 @@ -+#!/bin/sh -+ -+. lib/generic.sh -+ -+DEV="$(rand_dev)" -+ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy -+ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress -+ -+reset_qdisc() -+{ -+ ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress -+ ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress -+} -+ -+ts_tc "$0" "Add vlan action pop" \ -+ filter add dev $DEV ingress matchall action vlan pop -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "pop" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action push (default parameters)" \ -+ filter add dev $DEV ingress matchall action vlan push id 5 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "push" -+test_on "id 5" -+test_on "protocol 802.1Q" -+test_on "priority 0" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action push (explicit parameters)" \ -+ filter add dev $DEV ingress matchall \ -+ action vlan push id 5 protocol 802.1ad priority 2 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "push" -+test_on "id 5" -+test_on "protocol 802.1ad" -+test_on "priority 2" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action modify (default parameters)" \ -+ filter add dev $DEV ingress matchall action vlan modify id 5 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "modify" -+test_on "id 5" -+test_on "protocol 802.1Q" -+test_on "priority 0" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action modify (explicit parameters)" \ -+ filter add dev $DEV ingress matchall \ -+ action vlan modify id 5 protocol 802.1ad priority 2 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "modify" -+test_on "id 5" -+test_on "protocol 802.1ad" -+test_on "priority 2" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action pop_eth" \ -+ filter add dev $DEV ingress matchall action vlan pop_eth -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "pop_eth" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add vlan action push_eth" \ -+ filter add dev $DEV ingress matchall \ -+ action vlan push_eth dst_mac 02:00:00:00:00:02 \ -+ src_mac 02:00:00:00:00:01 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "vlan" -+test_on "push_eth" -+test_on "dst_mac 02:00:00:00:00:02" -+test_on "src_mac 02:00:00:00:00:01" -+test_on "pipe" --- -2.29.2 - diff --git a/SOURCES/0003-mptcp-add-support-for-port-based-endpoint.patch b/SOURCES/0003-mptcp-add-support-for-port-based-endpoint.patch new file mode 100644 index 0000000..1633cd0 --- /dev/null +++ b/SOURCES/0003-mptcp-add-support-for-port-based-endpoint.patch @@ -0,0 +1,123 @@ +From 0ccd2dbb3eca44a892a183db8c2e4221488ecf51 Mon Sep 17 00:00:00 2001 +Message-Id: <0ccd2dbb3eca44a892a183db8c2e4221488ecf51.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Mon, 9 Aug 2021 15:18:11 +0200 +Subject: [PATCH] mptcp: add support for port based endpoint + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1984733 +Upstream Status: iproute2.git commit 42fbca91 + +commit 42fbca91cd616ae714c3f6aa2d4e2c3399498e38 +Author: Paolo Abeni +Date: Fri Feb 19 21:42:55 2021 +0100 + + mptcp: add support for port based endpoint + + The feature is supported by the kernel since 5.11-net-next, + let's allow user-space to use it. + + Just parse and dump an additional, per endpoint, u16 attribute + + Signed-off-by: Paolo Abeni + Signed-off-by: David Ahern + +Signed-off-by: Andrea Claudi +--- + ip/ipmptcp.c | 16 ++++++++++++++-- + man/man8/ip-mptcp.8 | 8 ++++++++ + 2 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/ip/ipmptcp.c b/ip/ipmptcp.c +index e1ffafb3..5f659b59 100644 +--- a/ip/ipmptcp.c ++++ b/ip/ipmptcp.c +@@ -17,7 +17,7 @@ static void usage(void) + { + fprintf(stderr, + "Usage: ip mptcp endpoint add ADDRESS [ dev NAME ] [ id ID ]\n" +- " [ FLAG-LIST ]\n" ++ " [ port NR ] [ FLAG-LIST ]\n" + " ip mptcp endpoint delete id ID\n" + " ip mptcp endpoint show [ id ID ]\n" + " ip mptcp endpoint flush\n" +@@ -97,6 +97,7 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n, + bool id_set = false; + __u32 index = 0; + __u32 flags = 0; ++ __u16 port = 0; + __u8 id = 0; + + ll_init_map(&rth); +@@ -123,6 +124,10 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n, + if (!index) + invarg("device does not exist\n", ifname); + ++ } else if (matches(*argv, "port") == 0) { ++ NEXT_ARG(); ++ if (get_u16(&port, *argv, 0)) ++ invarg("expected port", *argv); + } else if (get_addr(&address, *argv, AF_UNSPEC) == 0) { + addr_set = true; + } else { +@@ -145,6 +150,8 @@ static int mptcp_parse_opt(int argc, char **argv, struct nlmsghdr *n, + addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_FLAGS, flags); + if (index) + addattr32(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_IF_IDX, index); ++ if (port) ++ addattr16(n, MPTCP_BUFLEN, MPTCP_PM_ADDR_ATTR_PORT, port); + if (addr_set) { + int type; + +@@ -181,8 +188,8 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo) + __u8 family = AF_UNSPEC, addr_attr_type; + const char *ifname; + unsigned int flags; ++ __u16 id, port; + int index; +- __u16 id; + + parse_rtattr_nested(tb, MPTCP_PM_ADDR_ATTR_MAX, addrinfo); + +@@ -196,6 +203,11 @@ static int print_mptcp_addrinfo(struct rtattr *addrinfo) + print_string(PRINT_ANY, "address", "%s ", + format_host_rta(family, tb[addr_attr_type])); + } ++ if (tb[MPTCP_PM_ADDR_ATTR_PORT]) { ++ port = rta_getattr_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]); ++ if (port) ++ print_uint(PRINT_ANY, "port", "port %u ", port); ++ } + if (tb[MPTCP_PM_ADDR_ATTR_ID]) { + id = rta_getattr_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); + print_uint(PRINT_ANY, "id", "id %u ", id); +diff --git a/man/man8/ip-mptcp.8 b/man/man8/ip-mptcp.8 +index ef8409ea..98cb93b9 100644 +--- a/man/man8/ip-mptcp.8 ++++ b/man/man8/ip-mptcp.8 +@@ -20,6 +20,8 @@ ip-mptcp \- MPTCP path manager configuration + .ti -8 + .BR "ip mptcp endpoint add " + .IR IFADDR ++.RB "[ " port ++.IR PORT " ]" + .RB "[ " dev + .IR IFNAME " ]" + .RB "[ " id +@@ -87,6 +89,12 @@ ip mptcp endpoint flush flush all existing MPTCP endpoints + .TE + + .TP ++.IR PORT ++When a port number is specified, incoming MPTCP subflows for already ++established MPTCP sockets will be accepted on the specified port, regardless ++the original listener port accepting the first MPTCP subflow and/or ++this peer being actually on the client side. ++ + .IR ID + is a unique numeric identifier for the given endpoint + +-- +2.31.1 + diff --git a/SOURCES/0004-Update-kernel-headers.patch b/SOURCES/0004-Update-kernel-headers.patch new file mode 100644 index 0000000..7f5a9c6 --- /dev/null +++ b/SOURCES/0004-Update-kernel-headers.patch @@ -0,0 +1,986 @@ +From 2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1 Mon Sep 17 00:00:00 2001 +Message-Id: <2e5b8fd1e0e8fc4135bd6a162f32df5e624262b1.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Wed, 11 Aug 2021 12:55:14 +0200 +Subject: [PATCH] Update kernel headers + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393 +Upstream Status: iproute2.git commit a5b355c0 + +commit a5b355c08c62fb5b3a42d0e27ef05571c7b30e2e +Author: David Ahern +Date: Fri Mar 19 14:59:17 2021 +0000 + + Update kernel headers + + Update kernel headers to commit: + 38cb57602369 ("selftests: net: forwarding: Fix a typo") + + Signed-off-by: David Ahern + +Signed-off-by: Andrea Claudi +--- + include/uapi/linux/bpf.h | 764 ++++++++++++++++++++++++++++++++- + include/uapi/linux/btf.h | 5 +- + include/uapi/linux/nexthop.h | 47 +- + include/uapi/linux/pkt_cls.h | 2 + + include/uapi/linux/rtnetlink.h | 7 + + 5 files changed, 818 insertions(+), 7 deletions(-) + +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index b1aba6af..502934f7 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -93,7 +93,717 @@ union bpf_iter_link_info { + } map; + }; + +-/* BPF syscall commands, see bpf(2) man-page for details. */ ++/* BPF syscall commands, see bpf(2) man-page for more details. */ ++/** ++ * DOC: eBPF Syscall Preamble ++ * ++ * The operation to be performed by the **bpf**\ () system call is determined ++ * by the *cmd* argument. Each operation takes an accompanying argument, ++ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see ++ * below). The size argument is the size of the union pointed to by *attr*. ++ */ ++/** ++ * DOC: eBPF Syscall Commands ++ * ++ * BPF_MAP_CREATE ++ * Description ++ * Create a map and return a file descriptor that refers to the ++ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) ++ * is automatically enabled for the new file descriptor. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_MAP_CREATE** will delete the map (but see NOTES). ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_MAP_LOOKUP_ELEM ++ * Description ++ * Look up an element with a given *key* in the map referred to ++ * by the file descriptor *map_fd*. ++ * ++ * The *flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_UPDATE_ELEM ++ * Description ++ * Create or update an element (key/value pair) in a specified map. ++ * ++ * The *flags* argument should be specified as one of the ++ * following: ++ * ++ * **BPF_ANY** ++ * Create a new element or update an existing element. ++ * **BPF_NOEXIST** ++ * Create a new element only if it did not exist. ++ * **BPF_EXIST** ++ * Update an existing element. ++ * **BPF_F_LOCK** ++ * Update a spin_lock-ed map element. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, ++ * **E2BIG**, **EEXIST**, or **ENOENT**. ++ * ++ * **E2BIG** ++ * The number of elements in the map reached the ++ * *max_entries* limit specified at map creation time. ++ * **EEXIST** ++ * If *flags* specifies **BPF_NOEXIST** and the element ++ * with *key* already exists in the map. ++ * **ENOENT** ++ * If *flags* specifies **BPF_EXIST** and the element with ++ * *key* does not exist in the map. ++ * ++ * BPF_MAP_DELETE_ELEM ++ * Description ++ * Look up and delete an element by key in a specified map. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_GET_NEXT_KEY ++ * Description ++ * Look up an element by key in a specified map and return the key ++ * of the next element. Can be used to iterate over all elements ++ * in the map. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * The following cases can be used to iterate over all elements of ++ * the map: ++ * ++ * * If *key* is not found, the operation returns zero and sets ++ * the *next_key* pointer to the key of the first element. ++ * * If *key* is found, the operation returns zero and sets the ++ * *next_key* pointer to the key of the next element. ++ * * If *key* is the last element, returns -1 and *errno* is set ++ * to **ENOENT**. ++ * ++ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or ++ * **EINVAL** on error. ++ * ++ * BPF_PROG_LOAD ++ * Description ++ * Verify and load an eBPF program, returning a new file ++ * descriptor associated with the program. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). ++ * ++ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is ++ * automatically enabled for the new file descriptor. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_OBJ_PIN ++ * Description ++ * Pin an eBPF program or map referred by the specified *bpf_fd* ++ * to the provided *pathname* on the filesystem. ++ * ++ * The *pathname* argument must not contain a dot ("."). ++ * ++ * On success, *pathname* retains a reference to the eBPF object, ++ * preventing deallocation of the object when the original ++ * *bpf_fd* is closed. This allow the eBPF object to live beyond ++ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent ++ * process. ++ * ++ * Applying **unlink**\ (2) or similar calls to the *pathname* ++ * unpins the object from the filesystem, removing the reference. ++ * If no other file descriptors or filesystem nodes refer to the ++ * same object, it will be deallocated (see NOTES). ++ * ++ * The filesystem type for the parent directory of *pathname* must ++ * be **BPF_FS_MAGIC**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_OBJ_GET ++ * Description ++ * Open a file descriptor for the eBPF object pinned to the ++ * specified *pathname*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_PROG_ATTACH ++ * Description ++ * Attach an eBPF program to a *target_fd* at the specified ++ * *attach_type* hook. ++ * ++ * The *attach_type* specifies the eBPF attachment point to ++ * attach the program to, and must be one of *bpf_attach_type* ++ * (see below). ++ * ++ * The *attach_bpf_fd* must be a valid file descriptor for a ++ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap ++ * or sock_ops type corresponding to the specified *attach_type*. ++ * ++ * The *target_fd* must be a valid file descriptor for a kernel ++ * object which depends on the attach type of *attach_bpf_fd*: ++ * ++ * **BPF_PROG_TYPE_CGROUP_DEVICE**, ++ * **BPF_PROG_TYPE_CGROUP_SKB**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, ++ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, ++ * **BPF_PROG_TYPE_CGROUP_SYSCTL**, ++ * **BPF_PROG_TYPE_SOCK_OPS** ++ * ++ * Control Group v2 hierarchy with the eBPF controller ++ * enabled. Requires the kernel to be compiled with ++ * **CONFIG_CGROUP_BPF**. ++ * ++ * **BPF_PROG_TYPE_FLOW_DISSECTOR** ++ * ++ * Network namespace (eg /proc/self/ns/net). ++ * ++ * **BPF_PROG_TYPE_LIRC_MODE2** ++ * ++ * LIRC device path (eg /dev/lircN). Requires the kernel ++ * to be compiled with **CONFIG_BPF_LIRC_MODE2**. ++ * ++ * **BPF_PROG_TYPE_SK_SKB**, ++ * **BPF_PROG_TYPE_SK_MSG** ++ * ++ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_DETACH ++ * Description ++ * Detach the eBPF program associated with the *target_fd* at the ++ * hook specified by *attach_type*. The program must have been ++ * previously attached using **BPF_PROG_ATTACH**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_TEST_RUN ++ * Description ++ * Run the eBPF program associated with the *prog_fd* a *repeat* ++ * number of times against a provided program context *ctx_in* and ++ * data *data_in*, and return the modified program context ++ * *ctx_out*, *data_out* (for example, packet data), result of the ++ * execution *retval*, and *duration* of the test run. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * **ENOSPC** ++ * Either *data_size_out* or *ctx_size_out* is too small. ++ * **ENOTSUPP** ++ * This command is not supported by the program type of ++ * the program referred to by *prog_fd*. ++ * ++ * BPF_PROG_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF program currently loaded into the kernel. ++ * ++ * Looks for the eBPF program with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF programs ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_MAP_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF map currently loaded into the kernel. ++ * ++ * Looks for the eBPF map with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF maps ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_PROG_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF program corresponding to ++ * *prog_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_MAP_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF map corresponding to ++ * *map_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_OBJ_GET_INFO_BY_FD ++ * Description ++ * Obtain information about the eBPF object corresponding to ++ * *bpf_fd*. ++ * ++ * Populates up to *info_len* bytes of *info*, which will be in ++ * one of the following formats depending on the eBPF object type ++ * of *bpf_fd*: ++ * ++ * * **struct bpf_prog_info** ++ * * **struct bpf_map_info** ++ * * **struct bpf_btf_info** ++ * * **struct bpf_link_info** ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_QUERY ++ * Description ++ * Obtain information about eBPF programs associated with the ++ * specified *attach_type* hook. ++ * ++ * The *target_fd* must be a valid file descriptor for a kernel ++ * object which depends on the attach type of *attach_bpf_fd*: ++ * ++ * **BPF_PROG_TYPE_CGROUP_DEVICE**, ++ * **BPF_PROG_TYPE_CGROUP_SKB**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK**, ++ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, ++ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, ++ * **BPF_PROG_TYPE_CGROUP_SYSCTL**, ++ * **BPF_PROG_TYPE_SOCK_OPS** ++ * ++ * Control Group v2 hierarchy with the eBPF controller ++ * enabled. Requires the kernel to be compiled with ++ * **CONFIG_CGROUP_BPF**. ++ * ++ * **BPF_PROG_TYPE_FLOW_DISSECTOR** ++ * ++ * Network namespace (eg /proc/self/ns/net). ++ * ++ * **BPF_PROG_TYPE_LIRC_MODE2** ++ * ++ * LIRC device path (eg /dev/lircN). Requires the kernel ++ * to be compiled with **CONFIG_BPF_LIRC_MODE2**. ++ * ++ * **BPF_PROG_QUERY** always fetches the number of programs ++ * attached and the *attach_flags* which were used to attach those ++ * programs. Additionally, if *prog_ids* is nonzero and the number ++ * of attached programs is less than *prog_cnt*, populates ++ * *prog_ids* with the eBPF program ids of the programs attached ++ * at *target_fd*. ++ * ++ * The following flags may alter the result: ++ * ++ * **BPF_F_QUERY_EFFECTIVE** ++ * Only return information regarding programs which are ++ * currently effective at the specified *target_fd*. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_RAW_TRACEPOINT_OPEN ++ * Description ++ * Attach an eBPF program to a tracepoint *name* to access kernel ++ * internal arguments of the tracepoint in their raw form. ++ * ++ * The *prog_fd* must be a valid file descriptor associated with ++ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. ++ * ++ * No ABI guarantees are made about the content of tracepoint ++ * arguments exposed to the corresponding eBPF program. ++ * ++ * Applying **close**\ (2) to the file descriptor returned by ++ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_BTF_LOAD ++ * Description ++ * Verify and load BPF Type Format (BTF) metadata into the kernel, ++ * returning a new file descriptor associated with the metadata. ++ * BTF is described in more detail at ++ * https://www.kernel.org/doc/html/latest/bpf/btf.html. ++ * ++ * The *btf* parameter must point to valid memory providing ++ * *btf_size* bytes of BTF binary metadata. ++ * ++ * The returned file descriptor can be passed to other **bpf**\ () ++ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to ++ * associate the BTF with those objects. ++ * ++ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional ++ * parameters to specify a *btf_log_buf*, *btf_log_size* and ++ * *btf_log_level* which allow the kernel to return freeform log ++ * output regarding the BTF verification process. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_BTF_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the BPF Type Format (BTF) ++ * corresponding to *btf_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_TASK_FD_QUERY ++ * Description ++ * Obtain information about eBPF programs associated with the ++ * target process identified by *pid* and *fd*. ++ * ++ * If the *pid* and *fd* are associated with a tracepoint, kprobe ++ * or uprobe perf event, then the *prog_id* and *fd_type* will ++ * be populated with the eBPF program id and file descriptor type ++ * of type **bpf_task_fd_type**. If associated with a kprobe or ++ * uprobe, the *probe_offset* and *probe_addr* will also be ++ * populated. Optionally, if *buf* is provided, then up to ++ * *buf_len* bytes of *buf* will be populated with the name of ++ * the tracepoint, kprobe or uprobe. ++ * ++ * The resulting *prog_id* may be introspected in deeper detail ++ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_LOOKUP_AND_DELETE_ELEM ++ * Description ++ * Look up an element with the given *key* in the map referred to ++ * by the file descriptor *fd*, and if found, delete the element. ++ * ++ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types ++ * implement this command as a "pop" operation, deleting the top ++ * element rather than one corresponding to *key*. ++ * The *key* and *key_len* parameters should be zeroed when ++ * issuing this operation for these map types. ++ * ++ * This command is only valid for the following map types: ++ * * **BPF_MAP_TYPE_QUEUE** ++ * * **BPF_MAP_TYPE_STACK** ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_FREEZE ++ * Description ++ * Freeze the permissions of the specified map. ++ * ++ * Write permissions may be frozen by passing zero *flags*. ++ * Upon success, no future syscall invocations may alter the ++ * map state of *map_fd*. Write operations from eBPF programs ++ * are still possible for a frozen map. ++ * ++ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_BTF_GET_NEXT_ID ++ * Description ++ * Fetch the next BPF Type Format (BTF) object currently loaded ++ * into the kernel. ++ * ++ * Looks for the BTF object with an id greater than *start_id* ++ * and updates *next_id* on success. If no other BTF objects ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_MAP_LOOKUP_BATCH ++ * Description ++ * Iterate and fetch multiple elements in a map. ++ * ++ * Two opaque values are used to manage batch operations, ++ * *in_batch* and *out_batch*. Initially, *in_batch* must be set ++ * to NULL to begin the batched operation. After each subsequent ++ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant ++ * *out_batch* as the *in_batch* for the next operation to ++ * continue iteration from the current point. ++ * ++ * The *keys* and *values* are output parameters which must point ++ * to memory large enough to hold *count* items based on the key ++ * and value size of the map *map_fd*. The *keys* buffer must be ++ * of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * The *elem_flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * On success, *count* elements from the map are copied into the ++ * user buffer, with the keys copied into *keys* and the values ++ * copied into the corresponding indices in *values*. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **ENOSPC** to indicate that *keys* or ++ * *values* is too small to dump an entire bucket during ++ * iteration of a hash-based map type. ++ * ++ * BPF_MAP_LOOKUP_AND_DELETE_BATCH ++ * Description ++ * Iterate and delete all elements in a map. ++ * ++ * This operation has the same behavior as ++ * **BPF_MAP_LOOKUP_BATCH** with two exceptions: ++ * ++ * * Every element that is successfully returned is also deleted ++ * from the map. This is at least *count* elements. Note that ++ * *count* is both an input and an output parameter. ++ * * Upon returning with *errno* set to **EFAULT**, up to ++ * *count* elements may be deleted without returning the keys ++ * and values of the deleted elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_MAP_UPDATE_BATCH ++ * Description ++ * Update multiple elements in a map by *key*. ++ * ++ * The *keys* and *values* are input parameters which must point ++ * to memory large enough to hold *count* items based on the key ++ * and value size of the map *map_fd*. The *keys* buffer must be ++ * of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * Each element specified in *keys* is sequentially updated to the ++ * value in the corresponding index in *values*. The *in_batch* ++ * and *out_batch* parameters are ignored and should be zeroed. ++ * ++ * The *elem_flags* argument should be specified as one of the ++ * following: ++ * ++ * **BPF_ANY** ++ * Create new elements or update a existing elements. ++ * **BPF_NOEXIST** ++ * Create new elements only if they do not exist. ++ * **BPF_EXIST** ++ * Update existing elements. ++ * **BPF_F_LOCK** ++ * Update spin_lock-ed map elements. This must be ++ * specified if the map value contains a spinlock. ++ * ++ * On success, *count* elements from the map are updated. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or ++ * **E2BIG**. **E2BIG** indicates that the number of elements in ++ * the map reached the *max_entries* limit specified at map ++ * creation time. ++ * ++ * May set *errno* to one of the following error codes under ++ * specific circumstances: ++ * ++ * **EEXIST** ++ * If *flags* specifies **BPF_NOEXIST** and the element ++ * with *key* already exists in the map. ++ * **ENOENT** ++ * If *flags* specifies **BPF_EXIST** and the element with ++ * *key* does not exist in the map. ++ * ++ * BPF_MAP_DELETE_BATCH ++ * Description ++ * Delete multiple elements in a map by *key*. ++ * ++ * The *keys* parameter is an input parameter which must point ++ * to memory large enough to hold *count* items based on the key ++ * size of the map *map_fd*, that is, *key_size* * *count*. ++ * ++ * Each element specified in *keys* is sequentially deleted. The ++ * *in_batch*, *out_batch*, and *values* parameters are ignored ++ * and should be zeroed. ++ * ++ * The *elem_flags* argument may be specified as one of the ++ * following: ++ * ++ * **BPF_F_LOCK** ++ * Look up the value of a spin-locked map without ++ * returning the lock. This must be specified if the ++ * elements contain a spinlock. ++ * ++ * On success, *count* elements from the map are updated. ++ * ++ * If an error is returned and *errno* is not **EFAULT**, *count* ++ * is set to the number of successfully processed elements. If ++ * *errno* is **EFAULT**, up to *count* elements may be been ++ * deleted. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_LINK_CREATE ++ * Description ++ * Attach an eBPF program to a *target_fd* at the specified ++ * *attach_type* hook and return a file descriptor handle for ++ * managing the link. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_UPDATE ++ * Description ++ * Update the eBPF program in the specified *link_fd* to ++ * *new_prog_fd*. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_LINK_GET_FD_BY_ID ++ * Description ++ * Open a file descriptor for the eBPF Link corresponding to ++ * *link_id*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_GET_NEXT_ID ++ * Description ++ * Fetch the next eBPF link currently loaded into the kernel. ++ * ++ * Looks for the eBPF link with an id greater than *start_id* ++ * and updates *next_id* on success. If no other eBPF links ++ * remain with ids higher than *start_id*, returns -1 and sets ++ * *errno* to **ENOENT**. ++ * ++ * Return ++ * Returns zero on success. On error, or when no id remains, -1 ++ * is returned and *errno* is set appropriately. ++ * ++ * BPF_ENABLE_STATS ++ * Description ++ * Enable eBPF runtime statistics gathering. ++ * ++ * Runtime statistics gathering for the eBPF runtime is disabled ++ * by default to minimize the corresponding performance overhead. ++ * This command enables statistics globally. ++ * ++ * Multiple programs may independently enable statistics. ++ * After gathering the desired statistics, eBPF runtime statistics ++ * may be disabled again by calling **close**\ (2) for the file ++ * descriptor returned by this function. Statistics will only be ++ * disabled system-wide when all outstanding file descriptors ++ * returned by prior calls for this subcommand are closed. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_ITER_CREATE ++ * Description ++ * Create an iterator on top of the specified *link_fd* (as ++ * previously created using **BPF_LINK_CREATE**) and return a ++ * file descriptor that can be used to trigger the iteration. ++ * ++ * If the resulting file descriptor is pinned to the filesystem ++ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls ++ * for that path will trigger the iterator to read kernel state ++ * using the eBPF program attached to *link_fd*. ++ * ++ * Return ++ * A new file descriptor (a nonnegative integer), or -1 if an ++ * error occurred (in which case, *errno* is set appropriately). ++ * ++ * BPF_LINK_DETACH ++ * Description ++ * Forcefully detach the specified *link_fd* from its ++ * corresponding attachment point. ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * BPF_PROG_BIND_MAP ++ * Description ++ * Bind a map to the lifetime of an eBPF program. ++ * ++ * The map identified by *map_fd* is bound to the program ++ * identified by *prog_fd* and only released when *prog_fd* is ++ * released. This may be used in cases where metadata should be ++ * associated with a program which otherwise does not contain any ++ * references to the map (for example, embedded in the eBPF ++ * program instructions). ++ * ++ * Return ++ * Returns zero on success. On error, -1 is returned and *errno* ++ * is set appropriately. ++ * ++ * NOTES ++ * eBPF objects (maps and programs) can be shared between processes. ++ * ++ * * After **fork**\ (2), the child inherits file descriptors ++ * referring to the same eBPF objects. ++ * * File descriptors referring to eBPF objects can be transferred over ++ * **unix**\ (7) domain sockets. ++ * * File descriptors referring to eBPF objects can be duplicated in the ++ * usual way, using **dup**\ (2) and similar calls. ++ * * File descriptors referring to eBPF objects can be pinned to the ++ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). ++ * ++ * An eBPF object is deallocated only after all file descriptors referring ++ * to the object have been closed and no references remain pinned to the ++ * filesystem or attached (for example, bound to a program or device). ++ */ + enum bpf_cmd { + BPF_MAP_CREATE, + BPF_MAP_LOOKUP_ELEM, +@@ -393,6 +1103,15 @@ enum bpf_link_type { + * is struct/union. + */ + #define BPF_PSEUDO_BTF_ID 3 ++/* insn[0].src_reg: BPF_PSEUDO_FUNC ++ * insn[0].imm: insn offset to the func ++ * insn[1].imm: 0 ++ * insn[0].off: 0 ++ * insn[1].off: 0 ++ * ldimm64 rewrite: address of the function ++ * verifier type: PTR_TO_FUNC. ++ */ ++#define BPF_PSEUDO_FUNC 4 + + /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function +@@ -720,7 +1439,7 @@ union bpf_attr { + * parsed and used to produce a manual page. The workflow is the following, + * and requires the rst2man utility: + * +- * $ ./scripts/bpf_helpers_doc.py \ ++ * $ ./scripts/bpf_doc.py \ + * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst + * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 + * $ man /tmp/bpf-helpers.7 +@@ -1765,6 +2484,10 @@ union bpf_attr { + * Use with ENCAP_L3/L4 flags to further specify the tunnel + * type; *len* is the length of the inner MAC header. + * ++ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: ++ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the ++ * L2 type as Ethernet. ++ * + * A call to this helper is susceptible to change the underlying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be +@@ -3850,7 +4573,7 @@ union bpf_attr { + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description +- * Check packet size against exceeding MTU of net device (based ++ * Check ctx packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * +@@ -3915,6 +4638,34 @@ union bpf_attr { + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * ++ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * For each element in **map**, call **callback_fn** function with ++ * **map**, **callback_ctx** and other map-specific parameters. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. ++ * ++ * The following are a list of supported map types and their ++ * respective expected callback signatures: ++ * ++ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, ++ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, ++ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY ++ * ++ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); ++ * ++ * For per_cpu maps, the map_value is the value on the cpu where the ++ * bpf_prog is running. ++ * ++ * If **callback_fn** return 0, the helper will continue to the next ++ * element. If return value is 1, the helper will skip the rest of ++ * elements and return. Other return values are not used now. ++ * ++ * Return ++ * The number of traversed map elements for success, **-EINVAL** for ++ * invalid **flags**. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -4081,6 +4832,7 @@ union bpf_attr { + FN(ima_inode_hash), \ + FN(sock_from_file), \ + FN(check_mtu), \ ++ FN(for_each_map_elem), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +@@ -4174,6 +4926,7 @@ enum { + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), + BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), ++ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + }; + + enum { +@@ -5211,7 +5964,10 @@ struct bpf_pidns_info { + + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ + struct bpf_sk_lookup { +- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ union { ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ ++ }; + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ +diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h +index 4a42eb48..2c42dcac 100644 +--- a/include/uapi/linux/btf.h ++++ b/include/uapi/linux/btf.h +@@ -52,7 +52,7 @@ struct btf_type { + }; + }; + +-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) ++#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) + #define BTF_INFO_VLEN(info) ((info) & 0xffff) + #define BTF_INFO_KFLAG(info) ((info) >> 31) + +@@ -72,7 +72,8 @@ struct btf_type { + #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ + #define BTF_KIND_VAR 14 /* Variable */ + #define BTF_KIND_DATASEC 15 /* Section */ +-#define BTF_KIND_MAX BTF_KIND_DATASEC ++#define BTF_KIND_FLOAT 16 /* Floating point */ ++#define BTF_KIND_MAX BTF_KIND_FLOAT + #define NR_BTF_KINDS (BTF_KIND_MAX + 1) + + /* For some specific BTF_KIND, "struct btf_type" is immediately +diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h +index b0a56139..37b14b4e 100644 +--- a/include/uapi/linux/nexthop.h ++++ b/include/uapi/linux/nexthop.h +@@ -21,7 +21,10 @@ struct nexthop_grp { + }; + + enum { +- NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */ ++ NEXTHOP_GRP_TYPE_MPATH, /* hash-threshold nexthop group ++ * default type if not specified ++ */ ++ NEXTHOP_GRP_TYPE_RES, /* resilient nexthop group */ + __NEXTHOP_GRP_TYPE_MAX, + }; + +@@ -52,8 +55,50 @@ enum { + NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ + /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + ++ /* nested; resilient nexthop group attributes */ ++ NHA_RES_GROUP, ++ /* nested; nexthop bucket attributes */ ++ NHA_RES_BUCKET, ++ + __NHA_MAX, + }; + + #define NHA_MAX (__NHA_MAX - 1) ++ ++enum { ++ NHA_RES_GROUP_UNSPEC, ++ /* Pad attribute for 64-bit alignment. */ ++ NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC, ++ ++ /* u16; number of nexthop buckets in a resilient nexthop group */ ++ NHA_RES_GROUP_BUCKETS, ++ /* clock_t as u32; nexthop bucket idle timer (per-group) */ ++ NHA_RES_GROUP_IDLE_TIMER, ++ /* clock_t as u32; nexthop unbalanced timer */ ++ NHA_RES_GROUP_UNBALANCED_TIMER, ++ /* clock_t as u64; nexthop unbalanced time */ ++ NHA_RES_GROUP_UNBALANCED_TIME, ++ ++ __NHA_RES_GROUP_MAX, ++}; ++ ++#define NHA_RES_GROUP_MAX (__NHA_RES_GROUP_MAX - 1) ++ ++enum { ++ NHA_RES_BUCKET_UNSPEC, ++ /* Pad attribute for 64-bit alignment. */ ++ NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC, ++ ++ /* u16; nexthop bucket index */ ++ NHA_RES_BUCKET_INDEX, ++ /* clock_t as u64; nexthop bucket idle time */ ++ NHA_RES_BUCKET_IDLE_TIME, ++ /* u32; nexthop id assigned to the nexthop bucket */ ++ NHA_RES_BUCKET_NH_ID, ++ ++ __NHA_RES_BUCKET_MAX, ++}; ++ ++#define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) ++ + #endif +diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h +index 7ea59cfe..025c40fe 100644 +--- a/include/uapi/linux/pkt_cls.h ++++ b/include/uapi/linux/pkt_cls.h +@@ -190,6 +190,8 @@ enum { + TCA_POLICE_PAD, + TCA_POLICE_RATE64, + TCA_POLICE_PEAKRATE64, ++ TCA_POLICE_PKTRATE64, ++ TCA_POLICE_PKTBURST64, + __TCA_POLICE_MAX + #define TCA_POLICE_RESULT TCA_POLICE_RESULT + }; +diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h +index b34b9add..f62cccc1 100644 +--- a/include/uapi/linux/rtnetlink.h ++++ b/include/uapi/linux/rtnetlink.h +@@ -178,6 +178,13 @@ enum { + RTM_GETVLAN, + #define RTM_GETVLAN RTM_GETVLAN + ++ RTM_NEWNEXTHOPBUCKET = 116, ++#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET ++ RTM_DELNEXTHOPBUCKET, ++#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET ++ RTM_GETNEXTHOPBUCKET, ++#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET ++ + __RTM_MAX, + #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) + }; +-- +2.31.1 + diff --git a/SOURCES/0004-m_mpls-add-mac_push-action.patch b/SOURCES/0004-m_mpls-add-mac_push-action.patch deleted file mode 100644 index 54c1c3c..0000000 --- a/SOURCES/0004-m_mpls-add-mac_push-action.patch +++ /dev/null @@ -1,342 +0,0 @@ -From 0afe12a4a9471ed1343693338ec6350dc66ba295 Mon Sep 17 00:00:00 2001 -Message-Id: <0afe12a4a9471ed1343693338ec6350dc66ba295.1611877215.git.aclaudi@redhat.com> -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:03 +0100 -Subject: [PATCH] m_mpls: add mac_push action - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit 02a261b5 - -commit 02a261b5ba1c8580ac2a35bc6c87faa2ec9f5c96 -Author: Guillaume Nault -Date: Mon Oct 19 17:23:08 2020 +0200 - - m_mpls: add mac_push action - - Add support for the new TCA_MPLS_ACT_MAC_PUSH action (kernel commit - a45294af9e96 ("net/sched: act_mpls: Add action to push MPLS LSE before - Ethernet header")). This action let TC push an MPLS header before the - MAC header of a frame. - - Example (encapsulate all outgoing frames with label 20, then add an - outer Ethernet header): - # tc filter add dev ethX matchall \ - action mpls mac_push label 20 ttl 64 \ - action vlan push_eth dst_mac 0a:00:00:00:00:02 \ - src_mac 0a:00:00:00:00:01 - - This patch also adds an alias for ETH_P_TEB, since it is useful when - decapsulating MPLS packets that contain an Ethernet frame. - - With MAC_PUSH, there's no previous Ethertype to modify. However, the - "protocol" option is still needed, because the kernel uses it to set - skb->protocol. So rename can_modify_ethtype() to can_set_ethtype(). - - Also add a test suite for m_mpls, which covers the new action and the - pre-existing ones. - - Signed-off-by: Guillaume Nault - Signed-off-by: David Ahern ---- - lib/ll_proto.c | 1 + - man/man8/tc-mpls.8 | 44 +++++++++++++++++++++++-- - man/man8/tc-vlan.8 | 5 ++- - tc/m_mpls.c | 43 ++++++++++++++++-------- - testsuite/tests/tc/mpls.t | 69 +++++++++++++++++++++++++++++++++++++++ - 5 files changed, 145 insertions(+), 17 deletions(-) - create mode 100755 testsuite/tests/tc/mpls.t - -diff --git a/lib/ll_proto.c b/lib/ll_proto.c -index 2a0c1cb3..78179311 100644 ---- a/lib/ll_proto.c -+++ b/lib/ll_proto.c -@@ -80,6 +80,7 @@ __PF(8021Q,802.1Q) - __PF(8021AD,802.1ad) - __PF(MPLS_UC,mpls_uc) - __PF(MPLS_MC,mpls_mc) -+__PF(TEB,teb) - - { 0x8100, "802.1Q" }, - { 0x88cc, "LLDP" }, -diff --git a/man/man8/tc-mpls.8 b/man/man8/tc-mpls.8 -index 84ef2ef1..9e563e98 100644 ---- a/man/man8/tc-mpls.8 -+++ b/man/man8/tc-mpls.8 -@@ -17,7 +17,7 @@ mpls - mpls manipulation module - - .ti -8 - .IR PUSH " := " --.BR push " [ " protocol -+.RB "{ " push " | " mac_push " } [ " protocol - .IR MPLS_PROTO " ]" - .RB " [ " tc - .IR MPLS_TC " ] " -@@ -64,7 +64,14 @@ requires no arguments and simply subtracts 1 from the MPLS header TTL field. - Decapsulation mode. Requires the protocol of the next header. - .TP - .B push --Encapsulation mode. Requires at least the -+Encapsulation mode. Adds the MPLS header between the MAC and the network -+headers. Requires at least the -+.B label -+option. -+.TP -+.B mac_push -+Encapsulation mode. Adds the MPLS header before the MAC header. Requires at -+least the - .B label - option. - .TP -@@ -152,5 +159,36 @@ ip packets and output to eth1: - .EE - .RE - -+Here is another example, where incoming Ethernet frames are encapsulated into -+MPLS with label 123 and TTL 64. Then, an outer Ethernet header is added and the -+resulting frame is finally sent on eth1: -+ -+.RS -+.EX -+#tc qdisc add dev eth0 ingress -+#tc filter add dev eth0 ingress matchall \\ -+ action mpls mac_push label 123 ttl 64 \\ -+ action vlan push_eth \\ -+ dst_mac 02:00:00:00:00:02 \\ -+ src_mac 02:00:00:00:00:01 \\ -+ action mirred egress redirect dev eth1 -+.EE -+.RE -+ -+The following example assumes that incoming MPLS packets with label 123 -+transport Ethernet frames. The outer Ethernet and the MPLS headers are -+stripped, then the inner Ethernet frame is sent on eth1: -+ -+.RS -+.EX -+#tc qdisc add dev eth0 ingress -+#tc filter add dev eth0 ingress protocol mpls_uc \\ -+ flower mpls_label 123 mpls_bos 1 \\ -+ action vlan pop_eth \\ -+ action mpls pop protocol teb \\ -+ action mirred egress redirect dev eth1 -+.EE -+.RE -+ - .SH SEE ALSO --.BR tc (8) -+.BR tc "(8), " tc-mirred "(8), " tc-vlan (8) -diff --git a/man/man8/tc-vlan.8 b/man/man8/tc-vlan.8 -index 5c2808b1..264053d3 100644 ---- a/man/man8/tc-vlan.8 -+++ b/man/man8/tc-vlan.8 -@@ -157,5 +157,8 @@ process then restarted for the plain packet: - .EE - .RE - -+For an example of the -+.BR pop_eth " and " push_eth " modes, see " tc-mpls (8). -+ - .SH SEE ALSO --.BR tc (8) -+.BR tc "(8), " tc-mpls (8) -diff --git a/tc/m_mpls.c b/tc/m_mpls.c -index 3d5d9b25..cb8019b1 100644 ---- a/tc/m_mpls.c -+++ b/tc/m_mpls.c -@@ -17,6 +17,7 @@ static const char * const action_names[] = { - [TCA_MPLS_ACT_PUSH] = "push", - [TCA_MPLS_ACT_MODIFY] = "modify", - [TCA_MPLS_ACT_DEC_TTL] = "dec_ttl", -+ [TCA_MPLS_ACT_MAC_PUSH] = "mac_push", - }; - - static void explain(void) -@@ -25,9 +26,11 @@ static void explain(void) - "Usage: mpls pop [ protocol MPLS_PROTO ]\n" - " mpls push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" - " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" -+ " mpls mac_push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" -+ " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" - " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ] [CONTROL]\n" -- " for pop MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n" -- " for push MPLS_PROTO is one of mpls_uc or mpls_mc\n" -+ " for pop, MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n" -+ " for push and mac_push, MPLS_PROTO is one of mpls_uc or mpls_mc\n" - " with default: mpls_uc\n" - " CONTROL := reclassify | pipe | drop | continue | pass |\n" - " goto chain \n"); -@@ -41,12 +44,14 @@ static void usage(void) - - static bool can_modify_mpls_fields(unsigned int action) - { -- return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MODIFY; -+ return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH || -+ action == TCA_MPLS_ACT_MODIFY; - } - --static bool can_modify_ethtype(unsigned int action) -+static bool can_set_ethtype(unsigned int action) - { -- return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_POP; -+ return action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH || -+ action == TCA_MPLS_ACT_POP; - } - - static bool is_valid_label(__u32 label) -@@ -94,6 +99,10 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, - if (check_double_action(action, *argv)) - return -1; - action = TCA_MPLS_ACT_PUSH; -+ } else if (matches(*argv, "mac_push") == 0) { -+ if (check_double_action(action, *argv)) -+ return -1; -+ action = TCA_MPLS_ACT_MAC_PUSH; - } else if (matches(*argv, "modify") == 0) { - if (check_double_action(action, *argv)) - return -1; -@@ -104,31 +113,36 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, - action = TCA_MPLS_ACT_DEC_TTL; - } else if (matches(*argv, "label") == 0) { - if (!can_modify_mpls_fields(action)) -- invarg("only valid for push/modify", *argv); -+ invarg("only valid for push, mac_push and modify", -+ *argv); - NEXT_ARG(); - if (get_u32(&label, *argv, 0) || !is_valid_label(label)) - invarg("label must be <=0xFFFFF", *argv); - } else if (matches(*argv, "tc") == 0) { - if (!can_modify_mpls_fields(action)) -- invarg("only valid for push/modify", *argv); -+ invarg("only valid for push, mac_push and modify", -+ *argv); - NEXT_ARG(); - if (get_u8(&tc, *argv, 0) || (tc & ~0x7)) - invarg("tc field is 3 bits max", *argv); - } else if (matches(*argv, "ttl") == 0) { - if (!can_modify_mpls_fields(action)) -- invarg("only valid for push/modify", *argv); -+ invarg("only valid for push, mac_push and modify", -+ *argv); - NEXT_ARG(); - if (get_u8(&ttl, *argv, 0) || !ttl) - invarg("ttl must be >0 and <=255", *argv); - } else if (matches(*argv, "bos") == 0) { - if (!can_modify_mpls_fields(action)) -- invarg("only valid for push/modify", *argv); -+ invarg("only valid for push, mac_push and modify", -+ *argv); - NEXT_ARG(); - if (get_u8(&bos, *argv, 0) || (bos & ~0x1)) - invarg("bos must be 0 or 1", *argv); - } else if (matches(*argv, "protocol") == 0) { -- if (!can_modify_ethtype(action)) -- invarg("only valid for push/pop", *argv); -+ if (!can_set_ethtype(action)) -+ invarg("only valid for push, mac_push and pop", -+ *argv); - NEXT_ARG(); - if (ll_proto_a2n(&proto, *argv)) - invarg("protocol is invalid", *argv); -@@ -159,10 +173,12 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, - if (action == TCA_MPLS_ACT_PUSH && label == 0xffffffff) - missarg("label"); - -- if (action == TCA_MPLS_ACT_PUSH && proto && -+ if ((action == TCA_MPLS_ACT_PUSH || action == TCA_MPLS_ACT_MAC_PUSH) && -+ proto && - proto != htons(ETH_P_MPLS_UC) && proto != htons(ETH_P_MPLS_MC)) { - fprintf(stderr, -- "invalid push protocol \"0x%04x\" - use mpls_(uc|mc)\n", -+ "invalid %spush protocol \"0x%04x\" - use mpls_(uc|mc)\n", -+ action == TCA_MPLS_ACT_MAC_PUSH ? "mac_" : "", - ntohs(proto)); - return -1; - } -@@ -223,6 +239,7 @@ static int print_mpls(struct action_util *au, FILE *f, struct rtattr *arg) - } - break; - case TCA_MPLS_ACT_PUSH: -+ case TCA_MPLS_ACT_MAC_PUSH: - if (tb[TCA_MPLS_PROTO]) { - __u16 proto; - -diff --git a/testsuite/tests/tc/mpls.t b/testsuite/tests/tc/mpls.t -new file mode 100755 -index 00000000..cb25f361 ---- /dev/null -+++ b/testsuite/tests/tc/mpls.t -@@ -0,0 +1,69 @@ -+#!/bin/sh -+ -+. lib/generic.sh -+ -+DEV="$(rand_dev)" -+ts_ip "$0" "Add $DEV dummy interface" link add dev $DEV up type dummy -+ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress -+ -+reset_qdisc() -+{ -+ ts_tc "$0" "Remove ingress qdisc" qdisc del dev $DEV ingress -+ ts_tc "$0" "Add ingress qdisc" qdisc add dev $DEV ingress -+} -+ -+ts_tc "$0" "Add mpls action pop" \ -+ filter add dev $DEV ingress protocol mpls_uc matchall \ -+ action mpls pop protocol ip -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "mpls" -+test_on "pop protocol ip pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add mpls action push" \ -+ filter add dev $DEV ingress protocol ip matchall \ -+ action mpls push protocol mpls_uc label 20 tc 3 bos 1 ttl 64 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "mpls" -+test_on "push" -+test_on "protocol mpls_uc" -+test_on "label 20" -+test_on "tc 3" -+test_on "bos 1" -+test_on "ttl 64" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add mpls action mac_push" \ -+ filter add dev $DEV ingress matchall \ -+ action mpls mac_push protocol mpls_uc label 20 tc 3 bos 1 ttl 64 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "mpls" -+test_on "mac_push" -+test_on "protocol mpls_uc" -+test_on "label 20" -+test_on "tc 3" -+test_on "bos 1" -+test_on "ttl 64" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add mpls action modify" \ -+ filter add dev $DEV ingress protocol mpls_uc matchall \ -+ action mpls modify label 20 tc 3 ttl 64 -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "mpls" -+test_on "modify" -+test_on "label 20" -+test_on "tc 3" -+test_on "ttl 64" -+test_on "pipe" -+ -+reset_qdisc -+ts_tc "$0" "Add mpls action dec_ttl" \ -+ filter add dev $DEV ingress protocol mpls_uc matchall \ -+ action mpls dec_ttl -+ts_tc "$0" "Show ingress filters" filter show dev $DEV ingress -+test_on "mpls" -+test_on "dec_ttl" -+test_on "pipe" --- -2.29.2 - diff --git a/SOURCES/0005-m_mpls-test-the-mac_push-action-after-modify.patch b/SOURCES/0005-m_mpls-test-the-mac_push-action-after-modify.patch deleted file mode 100644 index 197b3d6..0000000 --- a/SOURCES/0005-m_mpls-test-the-mac_push-action-after-modify.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 8c66f562e88887d2bf1c1064117496c4cb862b11 Mon Sep 17 00:00:00 2001 -Message-Id: <8c66f562e88887d2bf1c1064117496c4cb862b11.1611877215.git.aclaudi@redhat.com> -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:03 +0100 -Subject: [PATCH] m_mpls: test the 'mac_push' action after 'modify' - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit f1298d76 - -commit f1298d76606a581cf3ab9ec45a92b41e72a6b4f0 -Author: Guillaume Nault -Date: Thu Oct 22 11:11:44 2020 +0200 - - m_mpls: test the 'mac_push' action after 'modify' - - Commit 02a261b5ba1c ("m_mpls: add mac_push action") added a matches() - test for the "mac_push" string before the test for "modify". - This changes the previous behaviour as 'action m' used to match - "modify" while it now matches "mac_push". - - Revert to the original behaviour by moving the "mac_push" test after - "modify". - - Fixes: 02a261b5ba1c ("m_mpls: add mac_push action") - Signed-off-by: Guillaume Nault - Signed-off-by: David Ahern ---- - tc/m_mpls.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/tc/m_mpls.c b/tc/m_mpls.c -index cb8019b1..2c3752ba 100644 ---- a/tc/m_mpls.c -+++ b/tc/m_mpls.c -@@ -99,14 +99,14 @@ static int parse_mpls(struct action_util *a, int *argc_p, char ***argv_p, - if (check_double_action(action, *argv)) - return -1; - action = TCA_MPLS_ACT_PUSH; -- } else if (matches(*argv, "mac_push") == 0) { -- if (check_double_action(action, *argv)) -- return -1; -- action = TCA_MPLS_ACT_MAC_PUSH; - } else if (matches(*argv, "modify") == 0) { - if (check_double_action(action, *argv)) - return -1; - action = TCA_MPLS_ACT_MODIFY; -+ } else if (matches(*argv, "mac_push") == 0) { -+ if (check_double_action(action, *argv)) -+ return -1; -+ action = TCA_MPLS_ACT_MAC_PUSH; - } else if (matches(*argv, "dec_ttl") == 0) { - if (check_double_action(action, *argv)) - return -1; --- -2.29.2 - diff --git a/SOURCES/0005-police-add-support-for-packet-per-second-rate-limiti.patch b/SOURCES/0005-police-add-support-for-packet-per-second-rate-limiti.patch new file mode 100644 index 0000000..9d19b45 --- /dev/null +++ b/SOURCES/0005-police-add-support-for-packet-per-second-rate-limiti.patch @@ -0,0 +1,221 @@ +From b061aeba93b1c730b7dafeece6b90aad2e7afce8 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Wed, 11 Aug 2021 12:55:14 +0200 +Subject: [PATCH] police: add support for packet-per-second rate limiting + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393 +Upstream Status: iproute2.git commit cf9ae1bd + +commit cf9ae1bd31187d8ae62bc1bb408e443dbc8bd6a0 +Author: Baowen Zheng +Date: Fri Mar 26 13:50:18 2021 +0100 + + police: add support for packet-per-second rate limiting + + Allow a policer action to enforce a rate-limit based on packets-per-second, + configurable using a packet-per-second rate and burst parameters. + + e.g. + # $TC actions add action police pkts_rate 1000 pkts_burst 200 index 1 + # $TC actions ls action police + total acts 1 + + action order 0: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200 + ref 1 bind 0 + + Signed-off-by: Baowen Zheng + Signed-off-by: Simon Horman + Signed-off-by: Louis Peens + Signed-off-by: David Ahern + +Signed-off-by: Andrea Claudi +--- + man/man8/tc-police.8 | 35 ++++++++++++++++++++++++------- + tc/m_police.c | 50 +++++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 75 insertions(+), 10 deletions(-) + +diff --git a/man/man8/tc-police.8 b/man/man8/tc-police.8 +index 52279755..86e263bb 100644 +--- a/man/man8/tc-police.8 ++++ b/man/man8/tc-police.8 +@@ -5,9 +5,11 @@ police - policing action + .SH SYNOPSIS + .in +8 + .ti -8 +-.BR tc " ... " "action police" ++.BR tc " ... " "action police [" + .BI rate " RATE " burst +-.IR BYTES [\fB/ BYTES "] [" ++.IR BYTES [\fB/ BYTES "] ] [" ++.BI pkts_rate " RATE " pkts_burst ++.IR PACKETS "] [" + .B mtu + .IR BYTES [\fB/ BYTES "] ] [" + .BI peakrate " RATE" +@@ -34,19 +36,29 @@ police - policing action + .SH DESCRIPTION + The + .B police +-action allows to limit bandwidth of traffic matched by the filter it is +-attached to. Basically there are two different algorithms available to measure +-the packet rate: The first one uses an internal dual token bucket and is +-configured using the ++action allows limiting of the byte or packet rate of traffic matched by the ++filter it is attached to. ++.P ++There are two different algorithms available to measure the byte rate: The ++first one uses an internal dual token bucket and is configured using the + .BR rate ", " burst ", " mtu ", " peakrate ", " overhead " and " linklayer + parameters. The second one uses an in-kernel sampling mechanism. It can be + fine-tuned using the + .B estimator + filter parameter. ++.P ++There is one algorithm available to measure packet rate and it is similar to ++the first algorithm described for byte rate. It is configured using the ++.BR pkt_rate " and " pkt_burst ++parameters. ++.P ++At least one of the ++.BR rate " and " pkt_rate " ++parameters must be configured. + .SH OPTIONS + .TP + .BI rate " RATE" +-The maximum traffic rate of packets passing this action. Those exceeding it will ++The maximum byte rate of packets passing this action. Those exceeding it will + be treated as defined by the + .B conform-exceed + option. +@@ -55,6 +67,15 @@ option. + Set the maximum allowed burst in bytes, optionally followed by a slash ('/') + sign and cell size which must be a power of 2. + .TP ++.BI pkt_rate " RATE" ++The maximum packet rate or packets passing this action. Those exceeding it will ++be treated as defined by the ++.B conform-exceed ++option. ++.TP ++.BI pkt_burst " PACKETS" ++Set the maximum allowed burst in packets. ++.TP + .BI mtu " BYTES\fR[\fB/\fIBYTES\fR]" + This is the maximum packet size handled by the policer (larger ones will be + handled like they exceeded the configured rate). Setting this value correctly +diff --git a/tc/m_police.c b/tc/m_police.c +index bb51df68..9ef0e40b 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -38,7 +38,8 @@ struct action_util police_action_util = { + static void usage(void) + { + fprintf(stderr, +- "Usage: ... police rate BPS burst BYTES[/BYTES] [ mtu BYTES[/BYTES] ]\n" ++ "Usage: ... police [ rate BPS burst BYTES[/BYTES] ] \n" ++ " [ pkts_rate RATE pkts_burst PACKETS ] [ mtu BYTES[/BYTES] ]\n" + " [ peakrate BPS ] [ avrate BPS ] [ overhead BYTES ]\n" + " [ linklayer TYPE ] [ CONTROL ]\n" + "Where: CONTROL := conform-exceed [/NOTEXCEEDACT]\n" +@@ -67,6 +68,7 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + int Rcell_log = -1, Pcell_log = -1; + struct rtattr *tail; + __u64 rate64 = 0, prate64 = 0; ++ __u64 pps64 = 0, ppsburst64 = 0; + + if (a) /* new way of doing things */ + NEXT_ARG(); +@@ -144,6 +146,18 @@ static int act_parse_police(struct action_util *a, int *argc_p, char ***argv_p, + NEXT_ARG(); + if (get_linklayer(&linklayer, *argv)) + invarg("linklayer", *argv); ++ } else if (matches(*argv, "pkts_rate") == 0) { ++ NEXT_ARG(); ++ if (pps64) ++ duparg("pkts_rate", *argv); ++ if (get_u64(&pps64, *argv, 10)) ++ invarg("pkts_rate", *argv); ++ } else if (matches(*argv, "pkts_burst") == 0) { ++ NEXT_ARG(); ++ if (ppsburst64) ++ duparg("pkts_burst", *argv); ++ if (get_u64(&ppsburst64, *argv, 10)) ++ invarg("pkts_burst", *argv); + } else if (strcmp(*argv, "help") == 0) { + usage(); + } else { +@@ -161,8 +175,8 @@ action_ctrl_ok: + return -1; + + /* Must at least do late binding, use TB or ewma policing */ +- if (!rate64 && !avrate && !p.index && !mtu) { +- fprintf(stderr, "'rate' or 'avrate' or 'mtu' MUST be specified.\n"); ++ if (!rate64 && !avrate && !p.index && !mtu && !pps64) { ++ fprintf(stderr, "'rate' or 'avrate' or 'mtu' or 'pkts_rate' MUST be specified.\n"); + return -1; + } + +@@ -172,6 +186,18 @@ action_ctrl_ok: + return -1; + } + ++ /* When the packets TB policer is used, pkts_burst is required */ ++ if (pps64 && !ppsburst64) { ++ fprintf(stderr, "'pkts_burst' requires 'pkts_rate'.\n"); ++ return -1; ++ } ++ ++ /* forbid rate and pkts_rate in same action */ ++ if (pps64 && rate64) { ++ fprintf(stderr, "'rate' and 'pkts_rate' are not allowed in same action.\n"); ++ return -1; ++ } ++ + if (prate64) { + if (!rate64) { + fprintf(stderr, "'peakrate' requires 'rate'.\n"); +@@ -223,6 +249,12 @@ action_ctrl_ok: + if (presult) + addattr32(n, MAX_MSG, TCA_POLICE_RESULT, presult); + ++ if (pps64) { ++ addattr64(n, MAX_MSG, TCA_POLICE_PKTRATE64, pps64); ++ ppsburst64 = tc_calc_xmittime(pps64, ppsburst64); ++ addattr64(n, MAX_MSG, TCA_POLICE_PKTBURST64, ppsburst64); ++ } ++ + addattr_nest_end(n, tail); + res = 0; + +@@ -244,6 +276,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + unsigned int buffer; + unsigned int linklayer; + __u64 rate64, prate64; ++ __u64 pps64, ppsburst64; + + if (arg == NULL) + return 0; +@@ -287,6 +320,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + tc_print_rate(PRINT_FP, NULL, "avrate %s ", + rta_getattr_u32(tb[TCA_POLICE_AVRATE])); + ++ if ((tb[TCA_POLICE_PKTRATE64] && ++ RTA_PAYLOAD(tb[TCA_POLICE_PKTRATE64]) >= sizeof(pps64)) && ++ (tb[TCA_POLICE_PKTBURST64] && ++ RTA_PAYLOAD(tb[TCA_POLICE_PKTBURST64]) >= sizeof(ppsburst64))) { ++ pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]); ++ ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]); ++ ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64); ++ fprintf(f, "pkts_rate %llu ", pps64); ++ fprintf(f, "pkts_burst %llu ", ppsburst64); ++ } ++ + print_action_control(f, "action ", p->action, ""); + + if (tb[TCA_POLICE_RESULT]) { +-- +2.31.1 + diff --git a/SOURCES/0006-police-Add-support-for-json-output.patch b/SOURCES/0006-police-Add-support-for-json-output.patch new file mode 100644 index 0000000..1f11b9b --- /dev/null +++ b/SOURCES/0006-police-Add-support-for-json-output.patch @@ -0,0 +1,159 @@ +From 04b921c03a4680931df6660b88444f2478fb585c Mon Sep 17 00:00:00 2001 +Message-Id: <04b921c03a4680931df6660b88444f2478fb585c.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Wed, 11 Aug 2021 12:55:14 +0200 +Subject: [PATCH] police: Add support for json output + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393 +Upstream Status: iproute2.git commit 0d5cf51e + +commit 0d5cf51e0d6c7bfdc51754381b85367b5f8e254a +Author: Roi Dayan +Date: Mon Jun 7 09:44:08 2021 +0300 + + police: Add support for json output + + Change to use the print wrappers instead of fprintf(). + + This is example output of the options part before this commit: + + "options": { + "handle": 1, + "in_hw": true, + "actions": [ { + "order": 1 police 0x2 , + "control_action": { + "type": "drop" + }, + "control_action": { + "type": "continue" + }overhead 0b linklayer unspec + ref 1 bind 1 + , + "used_hw_stats": [ "delayed" ] + } ] + } + + This is the output of the same dump with this commit: + + "options": { + "handle": 1, + "in_hw": true, + "actions": [ { + "order": 1, + "kind": "police", + "index": 2, + "control_action": { + "type": "drop" + }, + "control_action": { + "type": "continue" + }, + "overhead": 0, + "linklayer": "unspec", + "ref": 1, + "bind": 1, + "used_hw_stats": [ "delayed" ] + } ] + } + + Signed-off-by: Roi Dayan + Reviewed-by: Paul Blakey + Signed-off-by: David Ahern + +Signed-off-by: Andrea Claudi +--- + tc/m_police.c | 30 +++++++++++++++++------------- + 1 file changed, 17 insertions(+), 13 deletions(-) + +diff --git a/tc/m_police.c b/tc/m_police.c +index 9ef0e40b..2594c089 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -278,18 +278,19 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + __u64 rate64, prate64; + __u64 pps64, ppsburst64; + ++ print_string(PRINT_ANY, "kind", "%s", "police"); + if (arg == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_POLICE_MAX, arg); + + if (tb[TCA_POLICE_TBF] == NULL) { +- fprintf(f, "[NULL police tbf]"); +- return 0; ++ fprintf(stderr, "[NULL police tbf]"); ++ return -1; + } + #ifndef STOOPID_8BYTE + if (RTA_PAYLOAD(tb[TCA_POLICE_TBF]) < sizeof(*p)) { +- fprintf(f, "[truncated police tbf]"); ++ fprintf(stderr, "[truncated police tbf]"); + return -1; + } + #endif +@@ -300,13 +301,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64)) + rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]); + +- fprintf(f, " police 0x%x ", p->index); ++ print_uint(PRINT_ANY, "index", "\t index %u ", p->index); + tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64); + buffer = tc_calc_xmitsize(rate64, p->burst); + print_size(PRINT_FP, NULL, "burst %s ", buffer); + print_size(PRINT_FP, NULL, "mtu %s ", p->mtu); + if (show_raw) +- fprintf(f, "[%08x] ", p->burst); ++ print_hex(PRINT_FP, NULL, "[%08x] ", p->burst); + + prate64 = p->peakrate.rate; + if (tb[TCA_POLICE_PEAKRATE64] && +@@ -327,8 +328,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + pps64 = rta_getattr_u64(tb[TCA_POLICE_PKTRATE64]); + ppsburst64 = rta_getattr_u64(tb[TCA_POLICE_PKTBURST64]); + ppsburst64 = tc_calc_xmitsize(pps64, ppsburst64); +- fprintf(f, "pkts_rate %llu ", pps64); +- fprintf(f, "pkts_burst %llu ", ppsburst64); ++ print_u64(PRINT_ANY, "pkts_rate", "pkts_rate %llu ", pps64); ++ print_u64(PRINT_ANY, "pkts_burst", "pkts_burst %llu ", ppsburst64); + } + + print_action_control(f, "action ", p->action, ""); +@@ -337,14 +338,17 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + __u32 action = rta_getattr_u32(tb[TCA_POLICE_RESULT]); + + print_action_control(f, "/", action, " "); +- } else +- fprintf(f, " "); ++ } else { ++ print_string(PRINT_FP, NULL, " ", NULL); ++ } + +- fprintf(f, "overhead %ub ", p->rate.overhead); ++ print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead); + linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK); + if (linklayer > TC_LINKLAYER_ETHERNET || show_details) +- fprintf(f, "linklayer %s ", sprint_linklayer(linklayer, b2)); +- fprintf(f, "\n\tref %d bind %d", p->refcnt, p->bindcnt); ++ print_string(PRINT_ANY, "linklayer", "linklayer %s ", ++ sprint_linklayer(linklayer, b2)); ++ print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt); ++ print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt); + if (show_stats) { + if (tb[TCA_POLICE_TM]) { + struct tcf_t *tm = RTA_DATA(tb[TCA_POLICE_TM]); +@@ -352,7 +356,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + print_tm(f, tm); + } + } +- fprintf(f, "\n"); ++ print_nl(); + + + return 0; +-- +2.31.1 + diff --git a/SOURCES/0006-tc-vlan-fix-help-and-error-message-strings.patch b/SOURCES/0006-tc-vlan-fix-help-and-error-message-strings.patch deleted file mode 100644 index a70ac17..0000000 --- a/SOURCES/0006-tc-vlan-fix-help-and-error-message-strings.patch +++ /dev/null @@ -1,52 +0,0 @@ -From cdb8197d0e7380b3679ded6bab398883aead92dc Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:03 +0100 -Subject: [PATCH] tc-vlan: fix help and error message strings - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit 7c7a0fe0 - -commit 7c7a0fe0c81cdff258c4314c629d7a52ae331dc4 -Author: Guillaume Nault -Date: Mon Nov 2 11:59:46 2020 +0100 - - tc-vlan: fix help and error message strings - - * "vlan pop" can be followed by a CONTROL keyword. - - * Add missing space in error message. - - Signed-off-by: Guillaume Nault - Signed-off-by: Stephen Hemminger ---- - tc/m_vlan.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tc/m_vlan.c b/tc/m_vlan.c -index e6b21330..57722b73 100644 ---- a/tc/m_vlan.c -+++ b/tc/m_vlan.c -@@ -30,7 +30,7 @@ static const char * const action_names[] = { - static void explain(void) - { - fprintf(stderr, -- "Usage: vlan pop\n" -+ "Usage: vlan pop [CONTROL]\n" - " vlan push [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" - " vlan modify [ protocol VLANPROTO ] id VLANID [ priority VLANPRIO ] [CONTROL]\n" - " vlan pop_eth [CONTROL]\n" -@@ -244,7 +244,7 @@ static int print_vlan(struct action_util *au, FILE *f, struct rtattr *arg) - parse_rtattr_nested(tb, TCA_VLAN_MAX, arg); - - if (!tb[TCA_VLAN_PARMS]) { -- fprintf(stderr, "Missing vlanparameters\n"); -+ fprintf(stderr, "Missing vlan parameters\n"); - return -1; - } - parm = RTA_DATA(tb[TCA_VLAN_PARMS]); --- -2.29.2 - diff --git a/SOURCES/0007-police-Fix-normal-output-back-to-what-it-was.patch b/SOURCES/0007-police-Fix-normal-output-back-to-what-it-was.patch new file mode 100644 index 0000000..792cdbf --- /dev/null +++ b/SOURCES/0007-police-Fix-normal-output-back-to-what-it-was.patch @@ -0,0 +1,73 @@ +From 148b286b52aa8f38d8d7587b598522310067de7b Mon Sep 17 00:00:00 2001 +Message-Id: <148b286b52aa8f38d8d7587b598522310067de7b.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Wed, 11 Aug 2021 12:55:14 +0200 +Subject: [PATCH] police: Fix normal output back to what it was + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1981393 +Upstream Status: iproute2.git commit 71d36000 + +commit 71d36000dc9ce8397fc45b680e0c0340df5a28e5 +Author: Roi Dayan +Date: Mon Jul 12 15:26:53 2021 +0300 + + police: Fix normal output back to what it was + + With the json support fix the normal output was + changed. set it back to what it was. + Print overhead with print_size(). + Print newline before ref. + + Fixes: 0d5cf51e0d6c ("police: Add support for json output") + Signed-off-by: Roi Dayan + Signed-off-by: Stephen Hemminger + +Signed-off-by: Andrea Claudi +--- + tc/m_police.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/tc/m_police.c b/tc/m_police.c +index 2594c089..f38ab90a 100644 +--- a/tc/m_police.c ++++ b/tc/m_police.c +@@ -278,7 +278,7 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + __u64 rate64, prate64; + __u64 pps64, ppsburst64; + +- print_string(PRINT_ANY, "kind", "%s", "police"); ++ print_string(PRINT_JSON, "kind", "%s", "police"); + if (arg == NULL) + return 0; + +@@ -301,7 +301,8 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + RTA_PAYLOAD(tb[TCA_POLICE_RATE64]) >= sizeof(rate64)) + rate64 = rta_getattr_u64(tb[TCA_POLICE_RATE64]); + +- print_uint(PRINT_ANY, "index", "\t index %u ", p->index); ++ print_hex(PRINT_FP, NULL, " police 0x%x ", p->index); ++ print_uint(PRINT_JSON, "index", NULL, p->index); + tc_print_rate(PRINT_FP, NULL, "rate %s ", rate64); + buffer = tc_calc_xmitsize(rate64, p->burst); + print_size(PRINT_FP, NULL, "burst %s ", buffer); +@@ -342,12 +343,13 @@ static int print_police(struct action_util *a, FILE *f, struct rtattr *arg) + print_string(PRINT_FP, NULL, " ", NULL); + } + +- print_uint(PRINT_ANY, "overhead", "overhead %u ", p->rate.overhead); ++ print_size(PRINT_ANY, "overhead", "overhead %s ", p->rate.overhead); + linklayer = (p->rate.linklayer & TC_LINKLAYER_MASK); + if (linklayer > TC_LINKLAYER_ETHERNET || show_details) + print_string(PRINT_ANY, "linklayer", "linklayer %s ", + sprint_linklayer(linklayer, b2)); +- print_int(PRINT_ANY, "ref", "ref %d ", p->refcnt); ++ print_nl(); ++ print_int(PRINT_ANY, "ref", "\tref %d ", p->refcnt); + print_int(PRINT_ANY, "bind", "bind %d ", p->bindcnt); + if (show_stats) { + if (tb[TCA_POLICE_TM]) { +-- +2.31.1 + diff --git a/SOURCES/0007-tc-mpls-fix-manpage-example-and-help-message-string.patch b/SOURCES/0007-tc-mpls-fix-manpage-example-and-help-message-string.patch deleted file mode 100644 index a53a799..0000000 --- a/SOURCES/0007-tc-mpls-fix-manpage-example-and-help-message-string.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 8953735b551d5f3c18c9523ea24055f4a7f9b927 Mon Sep 17 00:00:00 2001 -Message-Id: <8953735b551d5f3c18c9523ea24055f4a7f9b927.1611877215.git.aclaudi@redhat.com> -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:03 +0100 -Subject: [PATCH] tc-mpls: fix manpage example and help message string - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit 8682f588 - -commit 8682f588bfed7862233a22626562696d662ca60c -Author: Guillaume Nault -Date: Mon Nov 2 12:24:25 2020 +0100 - - tc-mpls: fix manpage example and help message string - - Manpage: - * Remove the extra "and to ip packets" part from command description - to make it more understandable. - - * Redirect packets to eth1, instead of eth0, as told in the - description. - - Help string: - * "mpls pop" can be followed by a CONTROL keyword. - - * "mpls modify" can also set the MPLS_BOS field. - - Signed-off-by: Guillaume Nault - Signed-off-by: Stephen Hemminger ---- - man/man8/tc-mpls.8 | 6 +++--- - tc/m_mpls.c | 5 +++-- - 2 files changed, 6 insertions(+), 5 deletions(-) - -diff --git a/man/man8/tc-mpls.8 b/man/man8/tc-mpls.8 -index 9e563e98..7f8be221 100644 ---- a/man/man8/tc-mpls.8 -+++ b/man/man8/tc-mpls.8 -@@ -147,15 +147,15 @@ a label 123 and sends them out eth1: - .EE - .RE - --In this example, incoming MPLS unicast packets on eth0 are decapsulated and to --ip packets and output to eth1: -+In this example, incoming MPLS unicast packets on eth0 are decapsulated -+and redirected to eth1: - - .RS - .EX - #tc qdisc add dev eth0 handle ffff: ingress - #tc filter add dev eth0 protocol mpls_uc parent ffff: flower \\ - action mpls pop protocol ipv4 \\ -- action mirred egress redirect dev eth0 -+ action mirred egress redirect dev eth1 - .EE - .RE - -diff --git a/tc/m_mpls.c b/tc/m_mpls.c -index 2c3752ba..9fee22e3 100644 ---- a/tc/m_mpls.c -+++ b/tc/m_mpls.c -@@ -23,12 +23,13 @@ static const char * const action_names[] = { - static void explain(void) - { - fprintf(stderr, -- "Usage: mpls pop [ protocol MPLS_PROTO ]\n" -+ "Usage: mpls pop [ protocol MPLS_PROTO ] [CONTROL]\n" - " mpls push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" - " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" - " mpls mac_push [ protocol MPLS_PROTO ] [ label MPLS_LABEL ] [ tc MPLS_TC ]\n" - " [ ttl MPLS_TTL ] [ bos MPLS_BOS ] [CONTROL]\n" -- " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ] [CONTROL]\n" -+ " mpls modify [ label MPLS_LABEL ] [ tc MPLS_TC ] [ ttl MPLS_TTL ]\n" -+ " [ bos MPLS_BOS ] [CONTROL]\n" - " for pop, MPLS_PROTO is next header of packet - e.g. ip or mpls_uc\n" - " for push and mac_push, MPLS_PROTO is one of mpls_uc or mpls_mc\n" - " with default: mpls_uc\n" --- -2.29.2 - diff --git a/SOURCES/0008-tc-flower-fix-json-output-with-mpls-lse.patch b/SOURCES/0008-tc-flower-fix-json-output-with-mpls-lse.patch deleted file mode 100644 index 3c57d58..0000000 --- a/SOURCES/0008-tc-flower-fix-json-output-with-mpls-lse.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 52754e4b7d4b52e9852869d7e2f6af1b890677f1 Mon Sep 17 00:00:00 2001 -Message-Id: <52754e4b7d4b52e9852869d7e2f6af1b890677f1.1611877215.git.aclaudi@redhat.com> -In-Reply-To: -References: -From: Andrea Claudi -Date: Fri, 29 Jan 2021 00:35:04 +0100 -Subject: [PATCH] tc: flower: fix json output with mpls lse - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1885770 -Upstream Status: unknown commit 676a1a70 - -commit 676a1a708f8e99d6a4faa3de8a093f8f8c14b9da -Author: Guillaume Nault -Date: Tue Jan 12 11:30:53 2021 +0100 - - tc: flower: fix json output with mpls lse - - The json output of the TCA_FLOWER_KEY_MPLS_OPTS attribute was invalid. - - Example: - - $ tc filter add dev eth0 ingress protocol mpls_uc flower mpls \ - lse depth 1 label 100 \ - lse depth 2 label 200 - - $ tc -json filter show dev eth0 ingress - ...{"eth_type":"8847", - " mpls":[" lse":["depth":1,"label":100], - " lse":["depth":2,"label":200]]}... - - This is invalid as the arrays, introduced by "[", can't contain raw - string:value pairs. Those must be enclosed into "{}" to form valid json - ojects. Also, there are spurious whitespaces before the mpls and lse - strings because of the indentation used for normal output. - - Fix this by putting all LSE parameters (depth, label, tc, bos and ttl) - into the same json object. The "mpls" key now directly contains a list - of such objects. - - Also, handle strings differently for normal and json output, so that - json strings don't get spurious indentation whitespaces. - - Normal output isn't modified. - The json output now looks like: - - $ tc -json filter show dev eth0 ingress - ...{"eth_type":"8847", - "mpls":[{"depth":1,"label":100}, - {"depth":2,"label":200}]}... - - Fixes: eb09a15c12fb ("tc: flower: support multiple MPLS LSE match") - Signed-off-by: Guillaume Nault - Signed-off-by: Stephen Hemminger ---- - tc/f_flower.c | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - -diff --git a/tc/f_flower.c b/tc/f_flower.c -index 00c919fd..27731078 100644 ---- a/tc/f_flower.c -+++ b/tc/f_flower.c -@@ -2476,7 +2476,7 @@ static void flower_print_u32(const char *name, struct rtattr *attr) - print_uint(PRINT_ANY, name, namefrm, rta_getattr_u32(attr)); - } - --static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) -+static void flower_print_mpls_opt_lse(struct rtattr *lse) - { - struct rtattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1]; - struct rtattr *attr; -@@ -2493,7 +2493,8 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) - RTA_PAYLOAD(lse)); - - print_nl(); -- open_json_array(PRINT_ANY, name); -+ print_string(PRINT_FP, NULL, " lse", NULL); -+ open_json_object(NULL); - attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]; - if (attr) - print_hhu(PRINT_ANY, "depth", " depth %u", -@@ -2511,10 +2512,10 @@ static void flower_print_mpls_opt_lse(const char *name, struct rtattr *lse) - attr = tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]; - if (attr) - print_hhu(PRINT_ANY, "ttl", " ttl %u", rta_getattr_u8(attr)); -- close_json_array(PRINT_JSON, NULL); -+ close_json_object(); - } - --static void flower_print_mpls_opts(const char *name, struct rtattr *attr) -+static void flower_print_mpls_opts(struct rtattr *attr) - { - struct rtattr *lse; - int rem; -@@ -2523,11 +2524,12 @@ static void flower_print_mpls_opts(const char *name, struct rtattr *attr) - return; - - print_nl(); -- open_json_array(PRINT_ANY, name); -+ print_string(PRINT_FP, NULL, " mpls", NULL); -+ open_json_array(PRINT_JSON, "mpls"); - rem = RTA_PAYLOAD(attr); - lse = RTA_DATA(attr); - while (RTA_OK(lse, rem)) { -- flower_print_mpls_opt_lse(" lse", lse); -+ flower_print_mpls_opt_lse(lse); - lse = RTA_NEXT(lse, rem); - }; - if (rem) -@@ -2650,7 +2652,7 @@ static int flower_print_opt(struct filter_util *qu, FILE *f, - flower_print_ip_attr("ip_ttl", tb[TCA_FLOWER_KEY_IP_TTL], - tb[TCA_FLOWER_KEY_IP_TTL_MASK]); - -- flower_print_mpls_opts(" mpls", tb[TCA_FLOWER_KEY_MPLS_OPTS]); -+ flower_print_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS]); - flower_print_u32("mpls_label", tb[TCA_FLOWER_KEY_MPLS_LABEL]); - flower_print_u8("mpls_tc", tb[TCA_FLOWER_KEY_MPLS_TC]); - flower_print_u8("mpls_bos", tb[TCA_FLOWER_KEY_MPLS_BOS]); --- -2.29.2 - diff --git a/SOURCES/0008-tc-u32-Fix-key-folding-in-sample-option.patch b/SOURCES/0008-tc-u32-Fix-key-folding-in-sample-option.patch new file mode 100644 index 0000000..3aa5014 --- /dev/null +++ b/SOURCES/0008-tc-u32-Fix-key-folding-in-sample-option.patch @@ -0,0 +1,68 @@ +From 7fcfc0e4d6949ff32df3ed749bad8eb419cebbda Mon Sep 17 00:00:00 2001 +Message-Id: <7fcfc0e4d6949ff32df3ed749bad8eb419cebbda.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Wed, 11 Aug 2021 14:49:33 +0200 +Subject: [PATCH] tc: u32: Fix key folding in sample option + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979425 +Upstream Status: iproute2.git commit 9b7ea92b + +commit 9b7ea92b9e3feff2876f772ace01148b7406839c +Author: Phil Sutter +Date: Wed Aug 4 11:18:28 2021 +0200 + + tc: u32: Fix key folding in sample option + + In between Linux kernel 2.4 and 2.6, key folding for hash tables changed + in kernel space. When iproute2 dropped support for the older algorithm, + the wrong code was removed and kernel 2.4 folding method remained in + place. To get things functional for recent kernels again, restoring the + old code alone was not sufficient - additional byteorder fixes were + needed. + + While being at it, make use of ffs() and thereby align the code with how + kernel determines the shift width. + + Fixes: 267480f55383c ("Backout the 2.4 utsname hash patch.") + Signed-off-by: Phil Sutter + Signed-off-by: Stephen Hemminger + +Signed-off-by: Andrea Claudi +--- + tc/f_u32.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tc/f_u32.c b/tc/f_u32.c +index 2ed5254a..a5747f67 100644 +--- a/tc/f_u32.c ++++ b/tc/f_u32.c +@@ -978,6 +978,13 @@ show_k: + goto show_k; + } + ++static __u32 u32_hash_fold(struct tc_u32_key *key) ++{ ++ __u8 fshift = key->mask ? ffs(ntohl(key->mask)) - 1 : 0; ++ ++ return ntohl(key->val & key->mask) >> fshift; ++} ++ + static int u32_parse_opt(struct filter_util *qu, char *handle, + int argc, char **argv, struct nlmsghdr *n) + { +@@ -1110,9 +1117,7 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, + } + NEXT_ARG(); + } +- hash = sel2.keys[0].val & sel2.keys[0].mask; +- hash ^= hash >> 16; +- hash ^= hash >> 8; ++ hash = u32_hash_fold(&sel2.keys[0]); + htid = ((hash % divisor) << 12) | (htid & 0xFFF00000); + sample_ok = 1; + continue; +-- +2.31.1 + diff --git a/SOURCES/0009-iproute-force-rtm_dst_len-to-32-128.patch b/SOURCES/0009-iproute-force-rtm_dst_len-to-32-128.patch deleted file mode 100644 index c5aee30..0000000 --- a/SOURCES/0009-iproute-force-rtm_dst_len-to-32-128.patch +++ /dev/null @@ -1,67 +0,0 @@ -From f2cb0f1570ca603c5d92d6a7d87596d07fdb01cd Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: -References: -From: Andrea Claudi -Date: Tue, 9 Feb 2021 12:00:58 +0100 -Subject: [PATCH] iproute: force rtm_dst_len to 32/128 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1852038 -Upstream Status: unknown commit 5a37254b - -commit 5a37254b71249bfb73d44d6278d767a6b127a2f9 -Author: Luca Boccassi -Date: Sun Jan 24 17:36:58 2021 +0000 - - iproute: force rtm_dst_len to 32/128 - - Since NETLINK_GET_STRICT_CHK was enabled, the kernel rejects commands - that pass a prefix length, eg: - - ip route get `1.0.0.0/1 - Error: ipv4: Invalid values in header for route get request. - ip route get 0.0.0.0/0 - Error: ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4 - - Since there's no point in setting a rtm_dst_len that we know is going - to be rejected, just force it to the right value if it's passed on - the command line. Print a warning to stderr to notify users. - - Bug-Debian: https://bugs.debian.org/944730 - Reported-By: Clément 'wxcafé' Hertling - Signed-off-by: Luca Boccassi - Signed-off-by: Stephen Hemminger ---- - ip/iproute.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/ip/iproute.c b/ip/iproute.c -index 05ec2c29..1f3c347e 100644 ---- a/ip/iproute.c -+++ b/ip/iproute.c -@@ -2067,7 +2067,18 @@ static int iproute_get(int argc, char **argv) - if (addr.bytelen) - addattr_l(&req.n, sizeof(req), - RTA_DST, &addr.data, addr.bytelen); -- req.r.rtm_dst_len = addr.bitlen; -+ if (req.r.rtm_family == AF_INET && addr.bitlen != 32) { -+ fprintf(stderr, -+ "Warning: /%u as prefix is invalid, only /32 (or none) is supported.\n", -+ addr.bitlen); -+ req.r.rtm_dst_len = 32; -+ } else if (req.r.rtm_family == AF_INET6 && addr.bitlen != 128) { -+ fprintf(stderr, -+ "Warning: /%u as prefix is invalid, only /128 (or none) is supported.\n", -+ addr.bitlen); -+ req.r.rtm_dst_len = 128; -+ } else -+ req.r.rtm_dst_len = addr.bitlen; - address_found = true; - } - argc--; argv++; --- -2.29.2 - diff --git a/SOURCES/0009-tc-htb-improve-burst-error-messages.patch b/SOURCES/0009-tc-htb-improve-burst-error-messages.patch new file mode 100644 index 0000000..e3f18bb --- /dev/null +++ b/SOURCES/0009-tc-htb-improve-burst-error-messages.patch @@ -0,0 +1,84 @@ +From 0b66dc13c157f4d34518c06dd774ef39be0df271 Mon Sep 17 00:00:00 2001 +Message-Id: <0b66dc13c157f4d34518c06dd774ef39be0df271.1628790091.git.aclaudi@redhat.com> +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1628790091.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Thu, 12 Aug 2021 18:26:39 +0200 +Subject: [PATCH] tc: htb: improve burst error messages + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1910745 +Upstream Status: iproute2.git commit e44786b2 + +commit e44786b26934e4fbf337b0af73a9e6f53d458a25 +Author: Andrea Claudi +Date: Thu May 6 12:42:06 2021 +0200 + + tc: htb: improve burst error messages + + When a wrong value is provided for "burst" or "cburst" parameters, the + resulting error message is unclear and can be misleading: + + $ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger + Illegal "buffer" + + The message claims an illegal "buffer" is provided, but neither the + inline help nor the man page list "buffer" among the htb parameters, and + the only way to know that "burst", "maxburst" and "buffer" are synonyms + is to look into tc/q_htb.c. + + This commit tries to improve this simply changing the error string to + the parameter name provided in the user-given command, clearly pointing + out where the wrong value is. + + $ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100KBps burst errtrigger + Illegal "burst" + + $ tc class add dev dummy0 parent 1: classid 1:1 htb rate 100Kbps maxburst errtrigger + Illegal "maxburst" + + Reported-by: Sebastian Mitterle + Signed-off-by: Andrea Claudi + Signed-off-by: David Ahern + +Signed-off-by: Andrea Claudi +--- + tc/q_htb.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tc/q_htb.c b/tc/q_htb.c +index 42566355..b5f95f67 100644 +--- a/tc/q_htb.c ++++ b/tc/q_htb.c +@@ -125,6 +125,7 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str + unsigned int linklayer = LINKLAYER_ETHERNET; /* Assume ethernet */ + struct rtattr *tail; + __u64 ceil64 = 0, rate64 = 0; ++ char *param; + + while (argc > 0) { + if (matches(*argv, "prio") == 0) { +@@ -160,17 +161,19 @@ static int htb_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str + } else if (matches(*argv, "burst") == 0 || + strcmp(*argv, "buffer") == 0 || + strcmp(*argv, "maxburst") == 0) { ++ param = *argv; + NEXT_ARG(); + if (get_size_and_cell(&buffer, &cell_log, *argv) < 0) { +- explain1("buffer"); ++ explain1(param); + return -1; + } + } else if (matches(*argv, "cburst") == 0 || + strcmp(*argv, "cbuffer") == 0 || + strcmp(*argv, "cmaxburst") == 0) { ++ param = *argv; + NEXT_ARG(); + if (get_size_and_cell(&cbuffer, &ccell_log, *argv) < 0) { +- explain1("cbuffer"); ++ explain1(param); + return -1; + } + } else if (strcmp(*argv, "ceil") == 0) { +-- +2.31.1 + diff --git a/SOURCES/0010-iplink_bareudp-cleanup-help-message-and-man-page.patch b/SOURCES/0010-iplink_bareudp-cleanup-help-message-and-man-page.patch deleted file mode 100644 index a0ca03e..0000000 --- a/SOURCES/0010-iplink_bareudp-cleanup-help-message-and-man-page.patch +++ /dev/null @@ -1,104 +0,0 @@ -From e5143d1e2787fca4ea365c4010e0da5bcbbbba36 Mon Sep 17 00:00:00 2001 -Message-Id: -In-Reply-To: -References: -From: Andrea Claudi -Date: Mon, 8 Mar 2021 12:52:23 +0100 -Subject: [PATCH] iplink_bareudp: cleanup help message and man page - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1912412 -Upstream Status: unknown commit 86d9660d - -commit 86d9660dc1805be4435497ff194f618535e8fc97 -Author: Guillaume Nault -Date: Mon Feb 1 18:44:07 2021 +0100 - - iplink_bareudp: cleanup help message and man page - - * Fix PROTO description in help message (mpls isn't a valid argument). - - * Remove SRCPORTMIN description from help message since it doesn't - appear in the syntax string. - - * Use same keywords in help message and in man page. - - * Use the "ethertype" option name (.B ethertype) rather than the - option value (.I ETHERTYPE) in the man page description of - [no]multiproto. - - Signed-off-by: Guillaume Nault - Signed-off-by: Stephen Hemminger ---- - ip/iplink_bareudp.c | 8 +++++--- - man/man8/ip-link.8.in | 15 +++++++++------ - 2 files changed, 14 insertions(+), 9 deletions(-) - -diff --git a/ip/iplink_bareudp.c b/ip/iplink_bareudp.c -index 860ec699..aa311106 100644 ---- a/ip/iplink_bareudp.c -+++ b/ip/iplink_bareudp.c -@@ -22,9 +22,11 @@ static void print_explain(FILE *f) - " [ srcportmin PORT ]\n" - " [ [no]multiproto ]\n" - "\n" -- "Where: PORT := 0-65535\n" -- " PROTO := NUMBER | ip | mpls\n" -- " SRCPORTMIN := 0-65535\n" -+ "Where: PORT := UDP_PORT\n" -+ " PROTO := ETHERTYPE\n" -+ "\n" -+ "Note: ETHERTYPE can be given as number or as protocol name (\"ipv4\", \"ipv6\",\n" -+ " \"mpls_uc\", etc.).\n" - ); - } - -diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in -index f451ecf3..ce3c8636 100644 ---- a/man/man8/ip-link.8.in -+++ b/man/man8/ip-link.8.in -@@ -1304,9 +1304,9 @@ For a link of type - the following additional arguments are supported: - - .BI "ip link add " DEVICE --.BI type " bareudp " dstport " PORT " ethertype " ETHERTYPE" -+.BI type " bareudp " dstport " PORT " ethertype " PROTO" - [ --.BI srcportmin " SRCPORTMIN " -+.BI srcportmin " PORT " - ] [ - .RB [ no ] multiproto - ] -@@ -1317,11 +1317,14 @@ the following additional arguments are supported: - - specifies the destination port for the UDP tunnel. - - .sp --.BI ethertype " ETHERTYPE" -+.BI ethertype " PROTO" - - specifies the ethertype of the L3 protocol being tunnelled. -+.B ethertype -+can be given as plain Ethernet protocol number or using the protocol name -+("ipv4", "ipv6", "mpls_uc", etc.). - - .sp --.BI srcportmin " SRCPORTMIN" -+.BI srcportmin " PORT" - - selects the lowest value of the UDP tunnel source port range. - - .sp -@@ -1329,11 +1332,11 @@ the following additional arguments are supported: - - activates support for protocols similar to the one - .RB "specified by " ethertype . - When --.I ETHERTYPE -+.B ethertype - is "mpls_uc" (that is, unicast MPLS), this allows the tunnel to also handle - multicast MPLS. - When --.I ETHERTYPE -+.B ethertype - is "ipv4", this allows the tunnel to also handle IPv6. This option is disabled - by default. - --- -2.30.2 - diff --git a/SOURCES/0010-lib-bpf_legacy-fix-bpffs-mount-when-sys-fs-bpf-exist.patch b/SOURCES/0010-lib-bpf_legacy-fix-bpffs-mount-when-sys-fs-bpf-exist.patch new file mode 100644 index 0000000..514fa63 --- /dev/null +++ b/SOURCES/0010-lib-bpf_legacy-fix-bpffs-mount-when-sys-fs-bpf-exist.patch @@ -0,0 +1,67 @@ +From d1f0f7f4e3e3a372a51e64bdd88f8ddecde1fbbf Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: <650694eb0120722499207078f965442ef7343bb1.1633614399.git.aclaudi@redhat.com> +References: <650694eb0120722499207078f965442ef7343bb1.1633614399.git.aclaudi@redhat.com> +From: Andrea Claudi +Date: Tue, 28 Sep 2021 11:46:43 +0200 +Subject: [PATCH] lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1995082 +Upstream Status: iproute2.git commit 2f5825cb + +commit 2f5825cb38028a14961a79844a069be4e3057eca +Author: Andrea Claudi +Date: Tue Sep 21 11:33:24 2021 +0200 + + lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists + + bpf selftests using iproute2 fails with: + + $ ip link set dev veth0 xdp object ../bpf/xdp_dummy.o section xdp_dummy + Continuing without mounted eBPF fs. Too old kernel? + mkdir (null)/globals failed: No such file or directory + Unable to load program + + This happens when the /sys/fs/bpf directory exists. In this case, mkdir + in bpf_mnt_check_target() fails with errno == EEXIST, and the function + returns -1. Thus bpf_get_work_dir() does not call bpf_mnt_fs() and the + bpffs is not mounted. + + Fix this in bpf_mnt_check_target(), returning 0 when the mountpoint + exists. + + Fixes: d4fcdbbec9df ("lib/bpf: Fix and simplify bpf_mnt_check_target()") + Reported-by: Mingyu Shi + Reported-by: Jiri Benc + Suggested-by: Jiri Benc + Signed-off-by: Andrea Claudi + Reviewed-by: Toke Høiland-Jørgensen + Signed-off-by: Stephen Hemminger +--- + lib/bpf_legacy.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c +index 7ec9ce9d..f9dfad6e 100644 +--- a/lib/bpf_legacy.c ++++ b/lib/bpf_legacy.c +@@ -513,9 +513,12 @@ static int bpf_mnt_check_target(const char *target) + int ret; + + ret = mkdir(target, S_IRWXU); +- if (ret && errno != EEXIST) ++ if (ret) { ++ if (errno == EEXIST) ++ return 0; + fprintf(stderr, "mkdir %s failed: %s\n", target, + strerror(errno)); ++ } + + return ret; + } +-- +2.31.1 + diff --git a/SOURCES/iproute2.sh b/SOURCES/iproute2.sh new file mode 100644 index 0000000..f13e1f0 --- /dev/null +++ b/SOURCES/iproute2.sh @@ -0,0 +1,5 @@ +# tc initialization script (sh) + +if [ -z "$TC_LIB_DIR" ]; then + export TC_LIB_DIR=/usr/lib64/tc +fi diff --git a/SPECS/iproute.spec b/SPECS/iproute.spec index 6ef0b52..1b5b5a7 100644 --- a/SPECS/iproute.spec +++ b/SPECS/iproute.spec @@ -1,30 +1,28 @@ -%define rpmversion 5.9.0 -%define specrelease 4%{?dist} -%define pkg_release %{specrelease}%{?buildid} - Summary: Advanced IP routing and network device configuration tools Name: iproute -Version: %{rpmversion} -Release: %{pkg_release} +Version: 5.12.0 +Release: 4%{?dist}%{?buildid} Group: Applications/System URL: http://kernel.org/pub/linux/utils/net/%{name}2/ Source0: http://kernel.org/pub/linux/utils/net/%{name}2/%{name}2-%{version}.tar.xz Source1: rt_dsfield.deprecated -Patch0: 0001-v5.9.0.patch -Patch1: 0002-Update-kernel-headers.patch -Patch2: 0003-m_vlan-add-pop_eth-and-push_eth-actions.patch -Patch3: 0004-m_mpls-add-mac_push-action.patch -Patch4: 0005-m_mpls-test-the-mac_push-action-after-modify.patch -Patch5: 0006-tc-vlan-fix-help-and-error-message-strings.patch -Patch6: 0007-tc-mpls-fix-manpage-example-and-help-message-string.patch -Patch7: 0008-tc-flower-fix-json-output-with-mpls-lse.patch -Patch8: 0009-iproute-force-rtm_dst_len-to-32-128.patch -Patch9: 0010-iplink_bareudp-cleanup-help-message-and-man-page.patch +Source2: iproute2.sh +Patch0: 0001-tc-f_flower-Add-option-to-match-on-related-ct-state.patch +Patch1: 0002-tc-f_flower-Add-missing-ct_state-flags-to-usage-desc.patch +Patch2: 0003-mptcp-add-support-for-port-based-endpoint.patch +Patch3: 0004-Update-kernel-headers.patch +Patch4: 0005-police-add-support-for-packet-per-second-rate-limiti.patch +Patch5: 0006-police-Add-support-for-json-output.patch +Patch6: 0007-police-Fix-normal-output-back-to-what-it-was.patch +Patch7: 0008-tc-u32-Fix-key-folding-in-sample-option.patch +Patch8: 0009-tc-htb-improve-burst-error-messages.patch +Patch9: 0010-lib-bpf_legacy-fix-bpffs-mount-when-sys-fs-bpf-exist.patch License: GPLv2+ and Public Domain BuildRequires: bison BuildRequires: elfutils-libelf-devel BuildRequires: flex BuildRequires: iptables-devel >= 1.4.5 +BuildRequires: libbpf-devel BuildRequires: libdb-devel BuildRequires: libmnl-devel BuildRequires: libselinux-devel @@ -36,6 +34,7 @@ BuildRequires: linux-atm-libs-devel %endif # For the UsrMove transition period Conflicts: filesystem < 3 +Requires: libbpf Provides: /sbin/ip Obsoletes: %{name} < 4.5.0-3 @@ -50,7 +49,7 @@ Group: Applications/System License: GPLv2+ Obsoletes: %{name} < 4.5.0-3 Requires: %{name}%{?_isa} = %{version}-%{release} -Provides: tc +Provides: /sbin/tc %description tc The Traffic Control utility manages queueing disciplines, their classes and @@ -82,20 +81,12 @@ The libnetlink static library. %autosetup -p1 -n %{name}2-%{version} %build -export CFLAGS="%{optflags} -fno-exceptions" -export LDFLAGS="%{build_ldflags}" -export LIBDIR=/%{_libdir} -export IPT_LIB_DIR=/%{_lib}/xtables -./configure -make %{?_smp_mflags} +%configure +%make_build %install -export DESTDIR='%{buildroot}' export SBINDIR='%{_sbindir}' -export MANDIR='%{_mandir}' export LIBDIR='%{_libdir}' -export CONFDIR='%{_sysconfdir}/iproute2' -export DOCDIR='%{_docdir}' %make_install echo '.so man8/tc-cbq.8' > %{buildroot}%{_mandir}/man8/cbq.8 @@ -110,6 +101,9 @@ rm -rf '%{buildroot}%{_docdir}' # Append deprecated values to rt_dsfield for compatibility reasons cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield +# use TC_LIB_DIR environment variable +install -D -m644 %{SOURCE2} %{buildroot}%{_sysconfdir}/profile.d/iproute2.sh + %files %dir %{_sysconfdir}/iproute2 %{!?_licensedir:%global license %%doc} @@ -128,6 +122,7 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield %files tc %{!?_licensedir:%global license %%doc} %license COPYING +%{_sysconfdir}/profile.d/iproute2.sh %{_mandir}/man7/tc-* %{_mandir}/man8/tc* %{_mandir}/man8/cbq* @@ -152,6 +147,28 @@ cat %{SOURCE1} >>%{buildroot}%{_sysconfdir}/iproute2/rt_dsfield %{_includedir}/iproute2/bpf_elf.h %changelog +* Thu Oct 07 2021 Andrea Claudi [5.12.0-4.el8] +- lib: bpf_legacy: fix bpffs mount when /sys/fs/bpf exists (Andrea Claudi) [1995082] + +* Thu Aug 12 2021 Andrea Claudi [5.12.0-3.el8] +- tc: htb: improve burst error messages (Andrea Claudi) [1910745] +- tc: u32: Fix key folding in sample option (Andrea Claudi) [1979425] +- police: Fix normal output back to what it was (Andrea Claudi) [1981393] +- police: Add support for json output (Andrea Claudi) [1981393] +- police: add support for packet-per-second rate limiting (Andrea Claudi) [1981393] +- Update kernel headers (Andrea Claudi) [1981393] +- mptcp: add support for port based endpoint (Andrea Claudi) [1984733] + +* Fri Aug 08 2021 Andrea Claudi [5.12.0-2.el8] +- add build and run-time dependencies on libbpf (Andrea Claudi) [1990402] + +* Mon Jun 28 2021 Andrea Claudi [5.12.0-1.el8] +- tc: f_flower: Add missing ct_state flags to usage description (Andrea Claudi) [1957243] +- tc: f_flower: Add option to match on related ct state (Andrea Claudi) [1957243] + +* Thu Apr 29 2021 Andrea Claudi [5.12.0-0.el8] +- New version 5.12.0 [1939382] + * Fri Mar 12 2021 Andrea Claudi [5.9.0-4.el8] - iplink_bareudp: cleanup help message and man page (Andrea Claudi) [1912412]