Recreate RHEL 5.14.0-687.19.1 from CS9/upstream backports
This commit is contained in:
parent
7f71864ff0
commit
26248cc311
@ -0,0 +1,508 @@
|
||||
From 23e3d86443306f1ab3a60ea10e0a8403ecbbdb27 Mon Sep 17 00:00:00 2001
|
||||
From: CKI Backport Bot <cki-ci-bot+cki-gitlab-backport-bot@redhat.com>
|
||||
Date: Fri, 15 May 2026 17:29:34 +0000
|
||||
Subject: [PATCH] netfilter: flowtable: strictly check for maximum number of
|
||||
actions
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-176922
|
||||
CVE: CVE-2026-43329
|
||||
|
||||
commit 76522fcdbc3a02b568f5d957f7e66fc194abb893
|
||||
Author: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
Date: Thu Mar 26 00:17:09 2026 +0100
|
||||
|
||||
netfilter: flowtable: strictly check for maximum number of actions
|
||||
|
||||
The maximum number of flowtable hardware offload actions in IPv6 is:
|
||||
|
||||
* ethernet mangling (4 payload actions, 2 for each ethernet address)
|
||||
* SNAT (4 payload actions)
|
||||
* DNAT (4 payload actions)
|
||||
* Double VLAN (4 vlan actions, 2 for popping vlan, and 2 for pushing)
|
||||
for QinQ.
|
||||
* Redirect (1 action)
|
||||
|
||||
Which makes 17, while the maximum is 16. But act_ct supports for tunnels
|
||||
actions too. Note that payload action operates at 32-bit word level, so
|
||||
mangling an IPv6 address takes 4 payload actions.
|
||||
|
||||
Update flow_action_entry_next() calls to check for the maximum number of
|
||||
supported actions.
|
||||
|
||||
While at it, rise the maximum number of actions per flow from 16 to 24
|
||||
so this works fine with IPv6 setups.
|
||||
|
||||
Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support")
|
||||
Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
|
||||
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
||||
|
||||
Signed-off-by: CKI Backport Bot <cki-ci-bot+cki-gitlab-backport-bot@redhat.com>
|
||||
|
||||
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
|
||||
index e59fa3be408c..0f3bccc69b57 100644
|
||||
--- a/net/netfilter/nf_flow_table_offload.c
|
||||
+++ b/net/netfilter/nf_flow_table_offload.c
|
||||
@@ -13,6 +13,8 @@
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
#include <net/netfilter/nf_conntrack_tuple.h>
|
||||
|
||||
+#define NF_FLOW_RULE_ACTION_MAX 24
|
||||
+
|
||||
static struct workqueue_struct *nf_flow_offload_add_wq;
|
||||
static struct workqueue_struct *nf_flow_offload_del_wq;
|
||||
static struct workqueue_struct *nf_flow_offload_stats_wq;
|
||||
@@ -208,7 +210,12 @@ static void flow_offload_mangle(struct flow_action_entry *entry,
|
||||
static inline struct flow_action_entry *
|
||||
flow_action_entry_next(struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
- int i = flow_rule->rule->action.num_entries++;
|
||||
+ int i;
|
||||
+
|
||||
+ if (unlikely(flow_rule->rule->action.num_entries >= NF_FLOW_RULE_ACTION_MAX))
|
||||
+ return NULL;
|
||||
+
|
||||
+ i = flow_rule->rule->action.num_entries++;
|
||||
|
||||
return &flow_rule->rule->action.entries[i];
|
||||
}
|
||||
@@ -226,6 +233,9 @@ static int flow_offload_eth_src(struct net *net,
|
||||
u32 mask, val;
|
||||
u16 val16;
|
||||
|
||||
+ if (!entry0 || !entry1)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
|
||||
switch (this_tuple->xmit_type) {
|
||||
@@ -276,6 +286,9 @@ static int flow_offload_eth_dst(struct net *net,
|
||||
u8 nud_state;
|
||||
u16 val16;
|
||||
|
||||
+ if (!entry0 || !entry1)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
|
||||
switch (this_tuple->xmit_type) {
|
||||
@@ -317,16 +330,19 @@ static int flow_offload_eth_dst(struct net *net,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv4_snat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_ipv4_snat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
__be32 addr;
|
||||
u32 offset;
|
||||
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
|
||||
@@ -337,23 +353,27 @@ static void flow_offload_ipv4_snat(struct net *net,
|
||||
offset = offsetof(struct iphdr, daddr);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
|
||||
&addr, &mask);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv4_dnat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_ipv4_dnat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
__be32 addr;
|
||||
u32 offset;
|
||||
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
|
||||
@@ -364,14 +384,15 @@ static void flow_offload_ipv4_dnat(struct net *net,
|
||||
offset = offsetof(struct iphdr, saddr);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
|
||||
&addr, &mask);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
|
||||
+static int flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
|
||||
unsigned int offset,
|
||||
const __be32 *addr, const __be32 *mask)
|
||||
{
|
||||
@@ -380,15 +401,20 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
|
||||
|
||||
for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
|
||||
offset + i * sizeof(u32), &addr[i], mask);
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv6_snat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_ipv6_snat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
const __be32 *addr;
|
||||
@@ -404,16 +430,16 @@ static void flow_offload_ipv6_snat(struct net *net,
|
||||
offset = offsetof(struct ipv6hdr, daddr);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
- flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
|
||||
+ return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv6_dnat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_ipv6_dnat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
u32 mask = ~htonl(0xffffffff);
|
||||
const __be32 *addr;
|
||||
@@ -429,10 +455,10 @@ static void flow_offload_ipv6_dnat(struct net *net,
|
||||
offset = offsetof(struct ipv6hdr, saddr);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
- flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
|
||||
+ return flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
|
||||
}
|
||||
|
||||
static int flow_offload_l4proto(const struct flow_offload *flow)
|
||||
@@ -454,15 +480,18 @@ static int flow_offload_l4proto(const struct flow_offload *flow)
|
||||
return type;
|
||||
}
|
||||
|
||||
-static void flow_offload_port_snat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_port_snat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
u32 mask, port;
|
||||
u32 offset;
|
||||
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
|
||||
@@ -477,22 +506,26 @@ static void flow_offload_port_snat(struct net *net,
|
||||
mask = ~htonl(0xffff);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
|
||||
&port, &mask);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_port_dnat(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_port_dnat(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
u32 mask, port;
|
||||
u32 offset;
|
||||
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
switch (dir) {
|
||||
case FLOW_OFFLOAD_DIR_ORIGINAL:
|
||||
port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
|
||||
@@ -507,20 +540,24 @@ static void flow_offload_port_dnat(struct net *net,
|
||||
mask = ~htonl(0xffff0000);
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
|
||||
&port, &mask);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_ipv4_checksum(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_ipv4_checksum(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
|
||||
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
|
||||
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
+
|
||||
entry->id = FLOW_ACTION_CSUM;
|
||||
entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
|
||||
|
||||
@@ -532,12 +569,14 @@ static void flow_offload_ipv4_checksum(struct net *net,
|
||||
entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
|
||||
break;
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_redirect(struct net *net,
|
||||
- const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_redirect(struct net *net,
|
||||
+ const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
const struct flow_offload_tuple *this_tuple, *other_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
@@ -555,21 +594,28 @@ static void flow_offload_redirect(struct net *net,
|
||||
ifindex = other_tuple->iifidx;
|
||||
break;
|
||||
default:
|
||||
- return;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
dev = dev_get_by_index(net, ifindex);
|
||||
if (!dev)
|
||||
- return;
|
||||
+ return -ENODEV;
|
||||
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry) {
|
||||
+ dev_put(dev);
|
||||
+ return -E2BIG;
|
||||
+ }
|
||||
+
|
||||
entry->id = FLOW_ACTION_REDIRECT;
|
||||
entry->dev = dev;
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
const struct flow_offload_tuple *this_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
@@ -577,7 +623,7 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
- return;
|
||||
+ return 0;
|
||||
|
||||
dst = this_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
@@ -586,15 +632,19 @@ static void flow_offload_encap_tunnel(const struct flow_offload *flow,
|
||||
tun_info = lwt_tun_info(dst->lwtstate);
|
||||
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
entry->id = FLOW_ACTION_TUNNEL_ENCAP;
|
||||
entry->tunnel = tun_info;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static void flow_offload_decap_tunnel(const struct flow_offload *flow,
|
||||
- enum flow_offload_tuple_dir dir,
|
||||
- struct nf_flow_rule *flow_rule)
|
||||
+static int flow_offload_decap_tunnel(const struct flow_offload *flow,
|
||||
+ enum flow_offload_tuple_dir dir,
|
||||
+ struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
const struct flow_offload_tuple *other_tuple;
|
||||
struct flow_action_entry *entry;
|
||||
@@ -602,7 +652,7 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
|
||||
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
|
||||
- return;
|
||||
+ return 0;
|
||||
|
||||
dst = other_tuple->dst_cache;
|
||||
if (dst && dst->lwtstate) {
|
||||
@@ -611,9 +661,13 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
|
||||
tun_info = lwt_tun_info(dst->lwtstate);
|
||||
if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry)
|
||||
+ return -E2BIG;
|
||||
entry->id = FLOW_ACTION_TUNNEL_DECAP;
|
||||
}
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -625,8 +679,9 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
||||
const struct flow_offload_tuple *tuple;
|
||||
int i;
|
||||
|
||||
- flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
- flow_offload_encap_tunnel(flow, dir, flow_rule);
|
||||
+ if (flow_offload_decap_tunnel(flow, dir, flow_rule) < 0 ||
|
||||
+ flow_offload_encap_tunnel(flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
|
||||
if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
|
||||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
@@ -642,6 +697,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
||||
|
||||
if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry)
|
||||
+ return -1;
|
||||
entry->id = FLOW_ACTION_VLAN_POP;
|
||||
}
|
||||
}
|
||||
@@ -655,6 +712,8 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
||||
continue;
|
||||
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
+ if (!entry)
|
||||
+ return -1;
|
||||
|
||||
switch (other_tuple->encap[i].proto) {
|
||||
case htons(ETH_P_PPP_SES):
|
||||
@@ -680,18 +739,22 @@ int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
|
||||
return -1;
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
- flow_offload_ipv4_snat(net, flow, dir, flow_rule);
|
||||
- flow_offload_port_snat(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_ipv4_snat(net, flow, dir, flow_rule) < 0 ||
|
||||
+ flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
}
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
- flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
|
||||
- flow_offload_port_dnat(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_ipv4_dnat(net, flow, dir, flow_rule) < 0 ||
|
||||
+ flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
}
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
|
||||
test_bit(NF_FLOW_DNAT, &flow->flags))
|
||||
- flow_offload_ipv4_checksum(net, flow, flow_rule);
|
||||
+ if (flow_offload_ipv4_checksum(net, flow, flow_rule) < 0)
|
||||
+ return -1;
|
||||
|
||||
- flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -705,22 +768,23 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
|
||||
return -1;
|
||||
|
||||
if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
|
||||
- flow_offload_ipv6_snat(net, flow, dir, flow_rule);
|
||||
- flow_offload_port_snat(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_ipv6_snat(net, flow, dir, flow_rule) < 0 ||
|
||||
+ flow_offload_port_snat(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
}
|
||||
if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
|
||||
- flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
|
||||
- flow_offload_port_dnat(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_ipv6_dnat(net, flow, dir, flow_rule) < 0 ||
|
||||
+ flow_offload_port_dnat(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
}
|
||||
|
||||
- flow_offload_redirect(net, flow, dir, flow_rule);
|
||||
+ if (flow_offload_redirect(net, flow, dir, flow_rule) < 0)
|
||||
+ return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
|
||||
|
||||
-#define NF_FLOW_RULE_ACTION_MAX 16
|
||||
-
|
||||
static struct nf_flow_rule *
|
||||
nf_flow_offload_rule_alloc(struct net *net,
|
||||
const struct flow_offload_work *offload,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,112 @@
|
||||
From aed3d041ab061ec8a64f50a3edda0f4db7280025 Mon Sep 17 00:00:00 2001
|
||||
From: Yussuf Khalil <dev@pp3345.net>
|
||||
Date: Fri, 6 Mar 2026 12:06:35 +0000
|
||||
Subject: [PATCH] drm/amd/display: Do not skip unrelated mode changes in DSC
|
||||
validation
|
||||
|
||||
Starting with commit 17ce8a6907f7 ("drm/amd/display: Add dsc pre-validation in
|
||||
atomic check"), amdgpu resets the CRTC state mode_changed flag to false when
|
||||
recomputing the DSC configuration results in no timing change for a particular
|
||||
stream.
|
||||
|
||||
However, this is incorrect in scenarios where a change in MST/DSC configuration
|
||||
happens in the same KMS commit as another (unrelated) mode change. For example,
|
||||
the integrated panel of a laptop may be configured differently (e.g., HDR
|
||||
enabled/disabled) depending on whether external screens are attached. In this
|
||||
case, plugging in external DP-MST screens may result in the mode_changed flag
|
||||
being dropped incorrectly for the integrated panel if its DSC configuration
|
||||
did not change during precomputation in pre_validate_dsc().
|
||||
|
||||
At this point, however, dm_update_crtc_state() has already created new streams
|
||||
for CRTCs with DSC-independent mode changes. In turn,
|
||||
amdgpu_dm_commit_streams() will never release the old stream, resulting in a
|
||||
memory leak. amdgpu_dm_atomic_commit_tail() will never acquire a reference to
|
||||
the new stream either, which manifests as a use-after-free when the stream gets
|
||||
disabled later on:
|
||||
|
||||
BUG: KASAN: use-after-free in dc_stream_release+0x25/0x90 [amdgpu]
|
||||
Write of size 4 at addr ffff88813d836524 by task kworker/9:9/29977
|
||||
|
||||
Workqueue: events drm_mode_rmfb_work_fn
|
||||
Call Trace:
|
||||
<TASK>
|
||||
dump_stack_lvl+0x6e/0xa0
|
||||
print_address_description.constprop.0+0x88/0x320
|
||||
? dc_stream_release+0x25/0x90 [amdgpu]
|
||||
print_report+0xfc/0x1ff
|
||||
? srso_alias_return_thunk+0x5/0xfbef5
|
||||
? __virt_addr_valid+0x225/0x4e0
|
||||
? dc_stream_release+0x25/0x90 [amdgpu]
|
||||
kasan_report+0xe1/0x180
|
||||
? dc_stream_release+0x25/0x90 [amdgpu]
|
||||
kasan_check_range+0x125/0x200
|
||||
dc_stream_release+0x25/0x90 [amdgpu]
|
||||
dc_state_destruct+0x14d/0x5c0 [amdgpu]
|
||||
dc_state_release.part.0+0x4e/0x130 [amdgpu]
|
||||
dm_atomic_destroy_state+0x3f/0x70 [amdgpu]
|
||||
drm_atomic_state_default_clear+0x8ee/0xf30
|
||||
? drm_mode_object_put.part.0+0xb1/0x130
|
||||
__drm_atomic_state_free+0x15c/0x2d0
|
||||
atomic_remove_fb+0x67e/0x980
|
||||
|
||||
Since there is no reliable way of figuring out whether a CRTC has unrelated
|
||||
mode changes pending at the time of DSC validation, remember the value of the
|
||||
mode_changed flag from before the point where a CRTC was marked as potentially
|
||||
affected by a change in DSC configuration. Reset the mode_changed flag to this
|
||||
earlier value instead in pre_validate_dsc().
|
||||
|
||||
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/5004
|
||||
Fixes: 17ce8a6907f7 ("drm/amd/display: Add dsc pre-validation in atomic check")
|
||||
Signed-off-by: Yussuf Khalil <dev@pp3345.net>
|
||||
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
|
||||
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
|
||||
(cherry picked from commit cc7c7121ae082b7b82891baa7280f1ff2608f22b)
|
||||
|
||||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
index 085cc98bd875..a9c398b1516b 100644
|
||||
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
@@ -12523,6 +12523,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
|
||||
}
|
||||
|
||||
if (dc_resource_is_dsc_encoding_supported(dc)) {
|
||||
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
|
||||
+ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
|
||||
+ dm_new_crtc_state->mode_changed_independent_from_dsc = new_crtc_state->mode_changed;
|
||||
+ }
|
||||
+
|
||||
for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
|
||||
if (drm_atomic_crtc_needs_modeset(new_crtc_state)) {
|
||||
ret = add_affected_mst_dsc_crtcs(state, crtc);
|
||||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
|
||||
index 800813671748..d15812d51d72 100644
|
||||
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
|
||||
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
|
||||
@@ -984,6 +984,7 @@ struct dm_crtc_state {
|
||||
|
||||
bool freesync_vrr_info_changed;
|
||||
|
||||
+ bool mode_changed_independent_from_dsc;
|
||||
bool dsc_force_changed;
|
||||
bool vrr_supported;
|
||||
struct mod_freesync_config freesync_config;
|
||||
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
|
||||
index 7be50e8c0636..5d8c4c7020b1 100644
|
||||
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
|
||||
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
|
||||
@@ -1744,9 +1744,11 @@ int pre_validate_dsc(struct drm_atomic_state *state,
|
||||
int ind = find_crtc_index_in_state_by_stream(state, stream);
|
||||
|
||||
if (ind >= 0) {
|
||||
+ struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(state->crtcs[ind].new_state);
|
||||
+
|
||||
DRM_INFO_ONCE("%s:%d MST_DSC no mode changed for stream 0x%p\n",
|
||||
__func__, __LINE__, stream);
|
||||
- state->crtcs[ind].new_state->mode_changed = 0;
|
||||
+ dm_new_crtc_state->base.mode_changed = dm_new_crtc_state->mode_changed_independent_from_dsc;
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
From 0452b6526b2f54b2413b9cb4ff1ea2ac542c99c7 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Dumazet <edumazet@google.com>
|
||||
Date: Thu, 26 Mar 2026 20:26:08 +0000
|
||||
Subject: [PATCH] ipv6: icmp: clear skb2->cb[] in ip6_err_gen_icmpv6_unreach()
|
||||
|
||||
[ Upstream commit 86ab3e55673a7a49a841838776f1ab18d23a67b5 ]
|
||||
|
||||
Sashiko AI-review observed:
|
||||
|
||||
In ip6_err_gen_icmpv6_unreach(), the skb is an outer IPv4 ICMP error packet
|
||||
where its cb contains an IPv4 inet_skb_parm. When skb is cloned into skb2
|
||||
and passed to icmp6_send(), it uses IP6CB(skb2).
|
||||
|
||||
IP6CB interprets the IPv4 inet_skb_parm as an inet6_skb_parm. The cipso
|
||||
offset in inet_skb_parm.opt directly overlaps with dsthao in inet6_skb_parm
|
||||
at offset 18.
|
||||
|
||||
If an attacker sends a forged ICMPv4 error with a CIPSO IP option, dsthao
|
||||
would be a non-zero offset. Inside icmp6_send(), mip6_addr_swap() is called
|
||||
and uses ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO).
|
||||
|
||||
This would scan the inner, attacker-controlled IPv6 packet starting at that
|
||||
offset, potentially returning a fake TLV without checking if the remaining
|
||||
packet length can hold the full 18-byte struct ipv6_destopt_hao.
|
||||
|
||||
Could mip6_addr_swap() then perform a 16-byte swap that extends past the end
|
||||
of the packet data into skb_shared_info?
|
||||
|
||||
Should the cb array also be cleared in ip6_err_gen_icmpv6_unreach() and
|
||||
ip6ip6_err() to prevent this?
|
||||
|
||||
This patch implements the first suggestion.
|
||||
|
||||
I am not sure if ip6ip6_err() needs to be changed.
|
||||
A separate patch would be better anyway.
|
||||
|
||||
Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error")
|
||||
Reported-by: Ido Schimmel <idosch@nvidia.com>
|
||||
Closes: https://sashiko.dev/#/patchset/20260326155138.2429480-1-edumazet%40google.com
|
||||
Signed-off-by: Eric Dumazet <edumazet@google.com>
|
||||
Cc: Oskar Kjos <oskar.kjos@hotmail.com>
|
||||
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20260326202608.2976021-1-edumazet@google.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
Signed-off-by: Sasha Levin <sashal@kernel.org>
|
||||
|
||||
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
|
||||
index 8601c76f3cc9..6f053874de74 100644
|
||||
--- a/net/ipv6/icmp.c
|
||||
+++ b/net/ipv6/icmp.c
|
||||
@@ -674,6 +674,9 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
|
||||
if (!skb2)
|
||||
return 1;
|
||||
|
||||
+ /* Remove debris left by IPv4 stack. */
|
||||
+ memset(IP6CB(skb2), 0, sizeof(*IP6CB(skb2)));
|
||||
+
|
||||
skb_dst_drop(skb2);
|
||||
skb_pull(skb2, nhs);
|
||||
skb_reset_network_header(skb2);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,137 @@
|
||||
From dc9c57624e89fab59f90148b663d5171e0fa2416 Mon Sep 17 00:00:00 2001
|
||||
From: CKI Backport Bot <cki-ci-bot+cki-gitlab-backport-bot@redhat.com>
|
||||
Date: Wed, 27 May 2026 17:14:34 +0000
|
||||
Subject: [PATCH] ALSA: aloop: Fix peer runtime UAF during format-change stop
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-179312
|
||||
CVE: CVE-2026-46090
|
||||
Backported from tree(s): linux
|
||||
|
||||
commit e5c33cdc6f402eab8abd36ecf436b22c9d3a8aff
|
||||
Author: Cássio Gabriel <cassiogabrielcontato@gmail.com>
|
||||
Date: Fri Apr 24 09:48:41 2026 -0300
|
||||
|
||||
ALSA: aloop: Fix peer runtime UAF during format-change stop
|
||||
|
||||
loopback_check_format() may stop the capture side when playback starts
|
||||
with parameters that no longer match a running capture stream. Commit
|
||||
826af7fa62e3 ("ALSA: aloop: Fix racy access at PCM trigger") moved
|
||||
the peer lookup under cable->lock, but the actual snd_pcm_stop() still
|
||||
runs after dropping that lock.
|
||||
|
||||
A concurrent close can clear the capture entry from cable->streams[] and
|
||||
detach or free its runtime while the playback trigger path still holds a
|
||||
stale peer substream pointer.
|
||||
|
||||
Keep a per-cable count of in-flight peer stops before dropping
|
||||
cable->lock, and make free_cable() wait for those stops before
|
||||
detaching the runtime. This preserves the existing behavior while
|
||||
making the peer runtime lifetime explicit.
|
||||
|
||||
Reported-by: syzbot+8fa95c41eafbc9d2ff6f@syzkaller.appspotmail.com
|
||||
Closes: https://syzkaller.appspot.com/bug?extid=8fa95c41eafbc9d2ff6f
|
||||
Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback")
|
||||
Cc: stable@vger.kernel.org
|
||||
Suggested-by: Takashi Iwai <tiwai@suse.com>
|
||||
Signed-off-by: Cássio Gabriel <cassiogabrielcontato@gmail.com>
|
||||
Link: https://patch.msgid.link/20260424-alsa-aloop-peer-stop-uaf-v2-1-94e68101db8a@gmail.com
|
||||
Signed-off-by: Takashi Iwai <tiwai@suse.de>
|
||||
|
||||
Signed-off-by: CKI Backport Bot <cki-ci-bot+cki-gitlab-backport-bot@redhat.com>
|
||||
|
||||
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
|
||||
index db137222d319..d2b9160a08dd 100644
|
||||
--- a/sound/drivers/aloop.c
|
||||
+++ b/sound/drivers/aloop.c
|
||||
@@ -99,6 +99,9 @@ struct loopback_ops {
|
||||
struct loopback_cable {
|
||||
spinlock_t lock;
|
||||
struct loopback_pcm *streams[2];
|
||||
+ /* in-flight peer stops running outside cable->lock */
|
||||
+ atomic_t stop_count;
|
||||
+ wait_queue_head_t stop_wait;
|
||||
struct snd_pcm_hardware hw;
|
||||
/* flags */
|
||||
unsigned int valid;
|
||||
@@ -366,8 +369,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream)
|
||||
return 0;
|
||||
if (stream == SNDRV_PCM_STREAM_CAPTURE)
|
||||
return -EIO;
|
||||
- else if (cruntime->state == SNDRV_PCM_STATE_RUNNING)
|
||||
+ else if (cruntime->state == SNDRV_PCM_STATE_RUNNING) {
|
||||
+ /* close must not free the peer runtime below */
|
||||
+ atomic_inc(&cable->stop_count);
|
||||
stop_capture = true;
|
||||
+ }
|
||||
}
|
||||
|
||||
setup = get_setup(dpcm_play);
|
||||
@@ -396,8 +402,11 @@ static int loopback_check_format(struct loopback_cable *cable, int stream)
|
||||
}
|
||||
}
|
||||
|
||||
- if (stop_capture)
|
||||
+ if (stop_capture) {
|
||||
snd_pcm_stop(dpcm_capt->substream, SNDRV_PCM_STATE_DRAINING);
|
||||
+ if (atomic_dec_and_test(&cable->stop_count))
|
||||
+ wake_up(&cable->stop_wait);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1049,23 +1058,29 @@ static void free_cable(struct snd_pcm_substream *substream)
|
||||
struct loopback *loopback = substream->private_data;
|
||||
int dev = get_cable_index(substream);
|
||||
struct loopback_cable *cable;
|
||||
+ struct loopback_pcm *dpcm;
|
||||
+ bool other_alive;
|
||||
|
||||
cable = loopback->cables[substream->number][dev];
|
||||
if (!cable)
|
||||
return;
|
||||
- if (cable->streams[!substream->stream]) {
|
||||
- /* other stream is still alive */
|
||||
- guard(spinlock_irq)(&cable->lock);
|
||||
- cable->streams[substream->stream] = NULL;
|
||||
- } else {
|
||||
- struct loopback_pcm *dpcm = substream->runtime->private_data;
|
||||
|
||||
- if (cable->ops && cable->ops->close_cable && dpcm)
|
||||
- cable->ops->close_cable(dpcm);
|
||||
- /* free the cable */
|
||||
- loopback->cables[substream->number][dev] = NULL;
|
||||
- kfree(cable);
|
||||
+ scoped_guard(spinlock_irq, &cable->lock) {
|
||||
+ cable->streams[substream->stream] = NULL;
|
||||
+ other_alive = cable->streams[!substream->stream];
|
||||
}
|
||||
+
|
||||
+ /* Pair with the stop_count increment in loopback_check_format(). */
|
||||
+ wait_event(cable->stop_wait, !atomic_read(&cable->stop_count));
|
||||
+ if (other_alive)
|
||||
+ return;
|
||||
+
|
||||
+ dpcm = substream->runtime->private_data;
|
||||
+ if (cable->ops && cable->ops->close_cable && dpcm)
|
||||
+ cable->ops->close_cable(dpcm);
|
||||
+ /* free the cable */
|
||||
+ loopback->cables[substream->number][dev] = NULL;
|
||||
+ kfree(cable);
|
||||
}
|
||||
|
||||
static int loopback_jiffies_timer_open(struct loopback_pcm *dpcm)
|
||||
@@ -1260,6 +1275,8 @@ static int loopback_open(struct snd_pcm_substream *substream)
|
||||
goto unlock;
|
||||
}
|
||||
spin_lock_init(&cable->lock);
|
||||
+ atomic_set(&cable->stop_count, 0);
|
||||
+ init_waitqueue_head(&cable->stop_wait);
|
||||
cable->hw = loopback_pcm_hardware;
|
||||
if (loopback->timer_source)
|
||||
cable->ops = &loopback_snd_timer_ops;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,208 @@
|
||||
From efd0aa1426972ae0542b15484850fdd73395262f Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Tue, 28 Apr 2026 13:24:18 -0400
|
||||
Subject: [PATCH] RDMA/iwcm: Fix workqueue list corruption by removing
|
||||
work_list
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-163491
|
||||
|
||||
commit 7874eeacfa42177565c01d5198726671acf7adf2
|
||||
Author: Jacob Moroni <jmoroni@google.com>
|
||||
Date: Mon Jan 12 02:00:06 2026 +0000
|
||||
|
||||
RDMA/iwcm: Fix workqueue list corruption by removing work_list
|
||||
|
||||
The commit e1168f0 ("RDMA/iwcm: Simplify cm_event_handler()")
|
||||
changed the work submission logic to unconditionally call
|
||||
queue_work() with the expectation that queue_work() would
|
||||
have no effect if work was already pending. The problem is
|
||||
that a free list of struct iwcm_work is used (for which
|
||||
struct work_struct is embedded), so each call to queue_work()
|
||||
is basically unique and therefore does indeed queue the work.
|
||||
|
||||
This causes a problem in the work handler which walks the work_list
|
||||
until it's empty to process entries. This means that a single
|
||||
run of the work handler could process item N+1 and release it
|
||||
back to the free list while the actual workqueue entry is still
|
||||
queued. It could then get reused (INIT_WORK...) and lead to
|
||||
list corruption in the workqueue logic.
|
||||
|
||||
Fix this by just removing the work_list. The workqueue already
|
||||
does this for us.
|
||||
|
||||
This fixes the following error that was observed when stress
|
||||
testing with ucmatose on an Intel E830 in iWARP mode:
|
||||
|
||||
[ 151.465780] list_del corruption. next->prev should be ffff9f0915c69c08, but was ffff9f0a1116be08. (next=ffff9f0a15b11c08)
|
||||
[ 151.466639] ------------[ cut here ]------------
|
||||
[ 151.466986] kernel BUG at lib/list_debug.c:67!
|
||||
[ 151.467349] Oops: invalid opcode: 0000 [#1] SMP NOPTI
|
||||
[ 151.467753] CPU: 14 UID: 0 PID: 2306 Comm: kworker/u64:18 Not tainted 6.19.0-rc4+ #1 PREEMPT(voluntary)
|
||||
[ 151.468466] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
|
||||
[ 151.469192] Workqueue: 0x0 (iw_cm_wq)
|
||||
[ 151.469478] RIP: 0010:__list_del_entry_valid_or_report+0xf0/0x100
|
||||
[ 151.469942] Code: c7 58 5f 4c b2 e8 10 50 aa ff 0f 0b 48 89 ef e8 36 57 cb ff 48 8b 55 08 48 89 e9 48 89 de 48 c7 c7 a8 5f 4c b2 e8 f0 4f aa ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90
|
||||
[ 151.471323] RSP: 0000:ffffb15644e7bd68 EFLAGS: 00010046
|
||||
[ 151.471712] RAX: 000000000000006d RBX: ffff9f0915c69c08 RCX: 0000000000000027
|
||||
[ 151.472243] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9f0a37d9c600
|
||||
[ 151.472768] RBP: ffff9f0a15b11c08 R08: 0000000000000000 R09: c0000000ffff7fff
|
||||
[ 151.473294] R10: 0000000000000001 R11: ffffb15644e7bba8 R12: ffff9f092339ee68
|
||||
[ 151.473817] R13: ffff9f0900059c28 R14: ffff9f092339ee78 R15: 0000000000000000
|
||||
[ 151.474344] FS: 0000000000000000(0000) GS:ffff9f0a847b5000(0000) knlGS:0000000000000000
|
||||
[ 151.474934] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
||||
[ 151.475362] CR2: 0000559e233a9088 CR3: 000000020296b004 CR4: 0000000000770ef0
|
||||
[ 151.475895] PKRU: 55555554
|
||||
[ 151.476118] Call Trace:
|
||||
[ 151.476331] <TASK>
|
||||
[ 151.476497] move_linked_works+0x49/0xa0
|
||||
[ 151.476792] __pwq_activate_work.isra.46+0x2f/0xa0
|
||||
[ 151.477151] pwq_dec_nr_in_flight+0x1e0/0x2f0
|
||||
[ 151.477479] process_scheduled_works+0x1c8/0x410
|
||||
[ 151.477823] worker_thread+0x125/0x260
|
||||
[ 151.478108] ? __pfx_worker_thread+0x10/0x10
|
||||
[ 151.478430] kthread+0xfe/0x240
|
||||
[ 151.478671] ? __pfx_kthread+0x10/0x10
|
||||
[ 151.478955] ? __pfx_kthread+0x10/0x10
|
||||
[ 151.479240] ret_from_fork+0x208/0x270
|
||||
[ 151.479523] ? __pfx_kthread+0x10/0x10
|
||||
[ 151.479806] ret_from_fork_asm+0x1a/0x30
|
||||
[ 151.480103] </TASK>
|
||||
|
||||
Fixes: e1168f09b331 ("RDMA/iwcm: Simplify cm_event_handler()")
|
||||
Signed-off-by: Jacob Moroni <jmoroni@google.com>
|
||||
Link: https://patch.msgid.link/20260112020006.1352438-1-jmoroni@google.com
|
||||
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
|
||||
index 9419ab4435df..a2cf6135fcde 100644
|
||||
--- a/drivers/infiniband/core/iwcm.c
|
||||
+++ b/drivers/infiniband/core/iwcm.c
|
||||
@@ -95,7 +95,6 @@ static struct workqueue_struct *iwcm_wq;
|
||||
struct iwcm_work {
|
||||
struct work_struct work;
|
||||
struct iwcm_id_private *cm_id;
|
||||
- struct list_head list;
|
||||
struct iw_cm_event event;
|
||||
struct list_head free_list;
|
||||
};
|
||||
@@ -179,7 +178,6 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
|
||||
return -ENOMEM;
|
||||
}
|
||||
work->cm_id = cm_id_priv;
|
||||
- INIT_LIST_HEAD(&work->list);
|
||||
put_work(work);
|
||||
}
|
||||
return 0;
|
||||
@@ -214,7 +212,6 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
|
||||
static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
|
||||
{
|
||||
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
|
||||
- BUG_ON(!list_empty(&cm_id_priv->work_list));
|
||||
free_cm_id(cm_id_priv);
|
||||
return true;
|
||||
}
|
||||
@@ -261,7 +258,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
|
||||
refcount_set(&cm_id_priv->refcount, 1);
|
||||
init_waitqueue_head(&cm_id_priv->connect_wait);
|
||||
init_completion(&cm_id_priv->destroy_comp);
|
||||
- INIT_LIST_HEAD(&cm_id_priv->work_list);
|
||||
INIT_LIST_HEAD(&cm_id_priv->work_free_list);
|
||||
|
||||
return &cm_id_priv->id;
|
||||
@@ -1008,13 +1004,13 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
|
||||
}
|
||||
|
||||
/*
|
||||
- * Process events on the work_list for the cm_id. If the callback
|
||||
- * function requests that the cm_id be deleted, a flag is set in the
|
||||
- * cm_id flags to indicate that when the last reference is
|
||||
- * removed, the cm_id is to be destroyed. This is necessary to
|
||||
- * distinguish between an object that will be destroyed by the app
|
||||
- * thread asleep on the destroy_comp list vs. an object destroyed
|
||||
- * here synchronously when the last reference is removed.
|
||||
+ * Process events for the cm_id. If the callback function requests
|
||||
+ * that the cm_id be deleted, a flag is set in the cm_id flags to
|
||||
+ * indicate that when the last reference is removed, the cm_id is
|
||||
+ * to be destroyed. This is necessary to distinguish between an
|
||||
+ * object that will be destroyed by the app thread asleep on the
|
||||
+ * destroy_comp list vs. an object destroyed here synchronously
|
||||
+ * when the last reference is removed.
|
||||
*/
|
||||
static void cm_work_handler(struct work_struct *_work)
|
||||
{
|
||||
@@ -1025,35 +1021,26 @@ static void cm_work_handler(struct work_struct *_work)
|
||||
int ret = 0;
|
||||
|
||||
spin_lock_irqsave(&cm_id_priv->lock, flags);
|
||||
- while (!list_empty(&cm_id_priv->work_list)) {
|
||||
- work = list_first_entry(&cm_id_priv->work_list,
|
||||
- struct iwcm_work, list);
|
||||
- list_del_init(&work->list);
|
||||
- levent = work->event;
|
||||
- put_work(work);
|
||||
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
-
|
||||
- if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
|
||||
- ret = process_event(cm_id_priv, &levent);
|
||||
- if (ret) {
|
||||
- destroy_cm_id(&cm_id_priv->id);
|
||||
- WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
|
||||
- }
|
||||
- } else
|
||||
- pr_debug("dropping event %d\n", levent.event);
|
||||
- if (iwcm_deref_id(cm_id_priv))
|
||||
- return;
|
||||
- spin_lock_irqsave(&cm_id_priv->lock, flags);
|
||||
- }
|
||||
+ levent = work->event;
|
||||
+ put_work(work);
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
+
|
||||
+ if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
|
||||
+ ret = process_event(cm_id_priv, &levent);
|
||||
+ if (ret) {
|
||||
+ destroy_cm_id(&cm_id_priv->id);
|
||||
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
|
||||
+ }
|
||||
+ } else
|
||||
+ pr_debug("dropping event %d\n", levent.event);
|
||||
+ if (iwcm_deref_id(cm_id_priv))
|
||||
+ return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called on interrupt context. Schedule events on
|
||||
* the iwcm_wq thread to allow callback functions to downcall into
|
||||
- * the CM and/or block. Events are queued to a per-CM_ID
|
||||
- * work_list. If this is the first event on the work_list, the work
|
||||
- * element is also queued on the iwcm_wq thread.
|
||||
+ * the CM and/or block.
|
||||
*
|
||||
* Each event holds a reference on the cm_id. Until the last posted
|
||||
* event has been delivered and processed, the cm_id cannot be
|
||||
@@ -1095,7 +1082,6 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
|
||||
}
|
||||
|
||||
refcount_inc(&cm_id_priv->refcount);
|
||||
- list_add_tail(&work->list, &cm_id_priv->work_list);
|
||||
queue_work(iwcm_wq, &work->work);
|
||||
out:
|
||||
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
|
||||
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
|
||||
index bf74639be128..b56fb12edece 100644
|
||||
--- a/drivers/infiniband/core/iwcm.h
|
||||
+++ b/drivers/infiniband/core/iwcm.h
|
||||
@@ -50,7 +50,6 @@ struct iwcm_id_private {
|
||||
struct ib_qp *qp;
|
||||
struct completion destroy_comp;
|
||||
wait_queue_head_t connect_wait;
|
||||
- struct list_head work_list;
|
||||
spinlock_t lock;
|
||||
refcount_t refcount;
|
||||
struct list_head work_free_list;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,332 @@
|
||||
From 3a8d8e4f0e85b288fc43ec65f8ffac858af16239 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 15:16:45 +0200
|
||||
Subject: [PATCH] binder: use cred instead of task for selinux checks
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 52f88693378a58094c538662ba652aff0253c4fe
|
||||
Author: Todd Kjos <tkjos@google.com>
|
||||
Date: Tue Oct 12 09:56:13 2021 -0700
|
||||
|
||||
binder: use cred instead of task for selinux checks
|
||||
|
||||
Since binder was integrated with selinux, it has passed
|
||||
'struct task_struct' associated with the binder_proc
|
||||
to represent the source and target of transactions.
|
||||
The conversion of task to SID was then done in the hook
|
||||
implementations. It turns out that there are race conditions
|
||||
which can result in an incorrect security context being used.
|
||||
|
||||
Fix by using the 'struct cred' saved during binder_open and pass
|
||||
it to the selinux subsystem.
|
||||
|
||||
Cc: stable@vger.kernel.org # 5.14 (need backport for earlier stables)
|
||||
Fixes: 79af73079d75 ("Add security hooks to binder and implement the hooks for SELinux.")
|
||||
Suggested-by: Jann Horn <jannh@google.com>
|
||||
Signed-off-by: Todd Kjos <tkjos@google.com>
|
||||
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
|
||||
index 4ef4e2dc47cb..3a01e1862d9e 100644
|
||||
--- a/drivers/android/binder.c
|
||||
+++ b/drivers/android/binder.c
|
||||
@@ -2049,7 +2049,7 @@ static int binder_translate_binder(struct flat_binder_object *fp,
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
- if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
|
||||
+ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
|
||||
ret = -EPERM;
|
||||
goto done;
|
||||
}
|
||||
@@ -2095,7 +2095,7 @@ static int binder_translate_handle(struct flat_binder_object *fp,
|
||||
proc->pid, thread->pid, fp->handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
- if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
|
||||
+ if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
|
||||
ret = -EPERM;
|
||||
goto done;
|
||||
}
|
||||
@@ -2183,7 +2183,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset,
|
||||
ret = -EBADF;
|
||||
goto err_fget;
|
||||
}
|
||||
- ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
|
||||
+ ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
|
||||
if (ret < 0) {
|
||||
ret = -EPERM;
|
||||
goto err_security;
|
||||
@@ -2588,8 +2588,8 @@ static void binder_transaction(struct binder_proc *proc,
|
||||
return_error_line = __LINE__;
|
||||
goto err_invalid_target_handle;
|
||||
}
|
||||
- if (security_binder_transaction(proc->tsk,
|
||||
- target_proc->tsk) < 0) {
|
||||
+ if (security_binder_transaction(proc->cred,
|
||||
+ target_proc->cred) < 0) {
|
||||
return_error = BR_FAILED_REPLY;
|
||||
return_error_param = -EPERM;
|
||||
return_error_line = __LINE__;
|
||||
@@ -4554,7 +4554,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp,
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
- ret = security_binder_set_context_mgr(proc->tsk);
|
||||
+ ret = security_binder_set_context_mgr(proc->cred);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (uid_valid(context->binder_context_mgr_uid)) {
|
||||
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
|
||||
index fc89fae1ea60..1c2be7057bd9 100644
|
||||
--- a/include/linux/lsm_hook_defs.h
|
||||
+++ b/include/linux/lsm_hook_defs.h
|
||||
@@ -26,13 +26,13 @@
|
||||
* #undef LSM_HOOK
|
||||
* };
|
||||
*/
|
||||
-LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr)
|
||||
-LSM_HOOK(int, 0, binder_transaction, struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
-LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
-LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from,
|
||||
- struct task_struct *to, struct file *file)
|
||||
+LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr)
|
||||
+LSM_HOOK(int, 0, binder_transaction, const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
+LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
+LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from,
|
||||
+ const struct cred *to, struct file *file)
|
||||
LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child,
|
||||
unsigned int mode)
|
||||
LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent)
|
||||
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
|
||||
index 3f04476cc692..7577ecfc79e4 100644
|
||||
--- a/include/linux/lsm_hooks.h
|
||||
+++ b/include/linux/lsm_hooks.h
|
||||
@@ -1330,22 +1330,22 @@
|
||||
*
|
||||
* @binder_set_context_mgr:
|
||||
* Check whether @mgr is allowed to be the binder context manager.
|
||||
- * @mgr contains the task_struct for the task being registered.
|
||||
+ * @mgr contains the struct cred for the current binder process.
|
||||
* Return 0 if permission is granted.
|
||||
* @binder_transaction:
|
||||
* Check whether @from is allowed to invoke a binder transaction call
|
||||
* to @to.
|
||||
- * @from contains the task_struct for the sending task.
|
||||
- * @to contains the task_struct for the receiving task.
|
||||
+ * @from contains the struct cred for the sending process.
|
||||
+ * @to contains the struct cred for the receiving process.
|
||||
* @binder_transfer_binder:
|
||||
* Check whether @from is allowed to transfer a binder reference to @to.
|
||||
- * @from contains the task_struct for the sending task.
|
||||
- * @to contains the task_struct for the receiving task.
|
||||
+ * @from contains the struct cred for the sending process.
|
||||
+ * @to contains the struct cred for the receiving process.
|
||||
* @binder_transfer_file:
|
||||
* Check whether @from is allowed to transfer @file to @to.
|
||||
- * @from contains the task_struct for the sending task.
|
||||
+ * @from contains the struct cred for the sending process.
|
||||
* @file contains the struct file being transferred.
|
||||
- * @to contains the task_struct for the receiving task.
|
||||
+ * @to contains the struct cred for the receiving process.
|
||||
*
|
||||
* @ptrace_access_check:
|
||||
* Check permission before allowing the current process to trace the
|
||||
diff --git a/include/linux/security.h b/include/linux/security.h
|
||||
index 16f44e78b7e6..3d216c94fd69 100644
|
||||
--- a/include/linux/security.h
|
||||
+++ b/include/linux/security.h
|
||||
@@ -263,13 +263,13 @@ extern int security_init(void);
|
||||
extern int early_security_init(void);
|
||||
|
||||
/* Security operations */
|
||||
-int security_binder_set_context_mgr(struct task_struct *mgr);
|
||||
-int security_binder_transaction(struct task_struct *from,
|
||||
- struct task_struct *to);
|
||||
-int security_binder_transfer_binder(struct task_struct *from,
|
||||
- struct task_struct *to);
|
||||
-int security_binder_transfer_file(struct task_struct *from,
|
||||
- struct task_struct *to, struct file *file);
|
||||
+int security_binder_set_context_mgr(const struct cred *mgr);
|
||||
+int security_binder_transaction(const struct cred *from,
|
||||
+ const struct cred *to);
|
||||
+int security_binder_transfer_binder(const struct cred *from,
|
||||
+ const struct cred *to);
|
||||
+int security_binder_transfer_file(const struct cred *from,
|
||||
+ const struct cred *to, struct file *file);
|
||||
int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
|
||||
int security_ptrace_traceme(struct task_struct *parent);
|
||||
int security_capget(struct task_struct *target,
|
||||
@@ -520,25 +520,25 @@ static inline int early_security_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static inline int security_binder_set_context_mgr(struct task_struct *mgr)
|
||||
+static inline int security_binder_set_context_mgr(const struct cred *mgr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static inline int security_binder_transaction(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+static inline int security_binder_transaction(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static inline int security_binder_transfer_binder(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+static inline int security_binder_transfer_binder(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static inline int security_binder_transfer_file(struct task_struct *from,
|
||||
- struct task_struct *to,
|
||||
+static inline int security_binder_transfer_file(const struct cred *from,
|
||||
+ const struct cred *to,
|
||||
struct file *file)
|
||||
{
|
||||
return 0;
|
||||
diff --git a/security/security.c b/security/security.c
|
||||
index 5660bbab9845..2092b657af9f 100644
|
||||
--- a/security/security.c
|
||||
+++ b/security/security.c
|
||||
@@ -887,25 +887,25 @@ OUT: \
|
||||
|
||||
/* Security operations */
|
||||
|
||||
-int security_binder_set_context_mgr(struct task_struct *mgr)
|
||||
+int security_binder_set_context_mgr(const struct cred *mgr)
|
||||
{
|
||||
return call_int_hook(binder_set_context_mgr, mgr);
|
||||
}
|
||||
|
||||
-int security_binder_transaction(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+int security_binder_transaction(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
return call_int_hook(binder_transaction, from, to);
|
||||
}
|
||||
|
||||
-int security_binder_transfer_binder(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+int security_binder_transfer_binder(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
return call_int_hook(binder_transfer_binder, from, to);
|
||||
}
|
||||
|
||||
-int security_binder_transfer_file(struct task_struct *from,
|
||||
- struct task_struct *to, struct file *file)
|
||||
+int security_binder_transfer_file(const struct cred *from,
|
||||
+ const struct cred *to, struct file *file)
|
||||
{
|
||||
return call_int_hook(binder_transfer_file, from, to, file);
|
||||
}
|
||||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
|
||||
index 9a69a2a4b31d..22173e8e88e2 100644
|
||||
--- a/security/selinux/hooks.c
|
||||
+++ b/security/selinux/hooks.c
|
||||
@@ -245,29 +245,6 @@ static inline u32 task_sid_obj(const struct task_struct *task)
|
||||
return sid;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * get the security ID of a task for use with binder
|
||||
- */
|
||||
-static inline u32 task_sid_binder(const struct task_struct *task)
|
||||
-{
|
||||
- /*
|
||||
- * In many case where this function is used we should be using the
|
||||
- * task's subjective SID, but we can't reliably access the subjective
|
||||
- * creds of a task other than our own so we must use the objective
|
||||
- * creds/SID, which are safe to access. The downside is that if a task
|
||||
- * is temporarily overriding it's creds it will not be reflected here;
|
||||
- * however, it isn't clear that binder would handle that case well
|
||||
- * anyway.
|
||||
- *
|
||||
- * If this ever changes and we can safely reference the subjective
|
||||
- * creds/SID of another task, this function will make it easier to
|
||||
- * identify the various places where we make use of the task SIDs in
|
||||
- * the binder code. It is also likely that we will need to adjust
|
||||
- * the main drivers/android binder code as well.
|
||||
- */
|
||||
- return task_sid_obj(task);
|
||||
-}
|
||||
-
|
||||
static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry);
|
||||
|
||||
/*
|
||||
@@ -2039,18 +2016,19 @@ static inline u32 open_file_to_av(struct file *file)
|
||||
|
||||
/* Hook functions begin here. */
|
||||
|
||||
-static int selinux_binder_set_context_mgr(struct task_struct *mgr)
|
||||
+static int selinux_binder_set_context_mgr(const struct cred *mgr)
|
||||
{
|
||||
return avc_has_perm(&selinux_state,
|
||||
- current_sid(), task_sid_binder(mgr), SECCLASS_BINDER,
|
||||
+ current_sid(), cred_sid(mgr), SECCLASS_BINDER,
|
||||
BINDER__SET_CONTEXT_MGR, NULL);
|
||||
}
|
||||
|
||||
-static int selinux_binder_transaction(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+static int selinux_binder_transaction(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
u32 mysid = current_sid();
|
||||
- u32 fromsid = task_sid_binder(from);
|
||||
+ u32 fromsid = cred_sid(from);
|
||||
+ u32 tosid = cred_sid(to);
|
||||
int rc;
|
||||
|
||||
if (mysid != fromsid) {
|
||||
@@ -2061,24 +2039,24 @@ static int selinux_binder_transaction(struct task_struct *from,
|
||||
return rc;
|
||||
}
|
||||
|
||||
- return avc_has_perm(&selinux_state, fromsid, task_sid_binder(to),
|
||||
+ return avc_has_perm(&selinux_state, fromsid, tosid,
|
||||
SECCLASS_BINDER, BINDER__CALL, NULL);
|
||||
}
|
||||
|
||||
-static int selinux_binder_transfer_binder(struct task_struct *from,
|
||||
- struct task_struct *to)
|
||||
+static int selinux_binder_transfer_binder(const struct cred *from,
|
||||
+ const struct cred *to)
|
||||
{
|
||||
return avc_has_perm(&selinux_state,
|
||||
- task_sid_binder(from), task_sid_binder(to),
|
||||
+ cred_sid(from), cred_sid(to),
|
||||
SECCLASS_BINDER, BINDER__TRANSFER,
|
||||
NULL);
|
||||
}
|
||||
|
||||
-static int selinux_binder_transfer_file(struct task_struct *from,
|
||||
- struct task_struct *to,
|
||||
+static int selinux_binder_transfer_file(const struct cred *from,
|
||||
+ const struct cred *to,
|
||||
struct file *file)
|
||||
{
|
||||
- u32 sid = task_sid_binder(to);
|
||||
+ u32 sid = cred_sid(to);
|
||||
struct file_security_struct *fsec = selinux_file(file);
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct inode_security_struct *isec;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,115 @@
|
||||
From 16ac6be5ae309b2c31c37898511025294dcadcc8 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:30:04 +0200
|
||||
Subject: [PATCH] locks: fix TOCTOU race when granting write lease
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit d6da19c9cace63290ccfccb1fc35151ffefc0bec
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Tue Aug 16 17:53:17 2022 +0300
|
||||
|
||||
locks: fix TOCTOU race when granting write lease
|
||||
|
||||
Thread A trying to acquire a write lease checks the value of i_readcount
|
||||
and i_writecount in check_conflicting_open() to verify that its own fd
|
||||
is the only fd referencing the file.
|
||||
|
||||
Thread B trying to open the file for read will call break_lease() in
|
||||
do_dentry_open() before incrementing i_readcount, which leaves a small
|
||||
window where thread A can acquire the write lease and then thread B
|
||||
completes the open of the file for read without breaking the write lease
|
||||
that was acquired by thread A.
|
||||
|
||||
Fix this race by incrementing i_readcount before checking for existing
|
||||
leases, same as the case with i_writecount.
|
||||
|
||||
Use a helper put_file_access() to decrement i_readcount or i_writecount
|
||||
in do_dentry_open() and __fput().
|
||||
|
||||
Fixes: 387e3746d01c ("locks: eliminate false positive conflicts for write lease")
|
||||
Reviewed-by: Jeff Layton <jlayton@kernel.org>
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index cdc1dea33154..845c741dc518 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -366,12 +366,7 @@ static void __fput(struct file *file)
|
||||
}
|
||||
fops_put(file->f_op);
|
||||
put_pid(file->f_owner.pid);
|
||||
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
|
||||
- i_readcount_dec(inode);
|
||||
- if (mode & FMODE_WRITER) {
|
||||
- put_write_access(inode);
|
||||
- __mnt_drop_write(mnt);
|
||||
- }
|
||||
+ put_file_access(file);
|
||||
dput(dentry);
|
||||
if (unlikely(mode & FMODE_NEED_UNMOUNT))
|
||||
dissolve_on_fput(mnt);
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index 3e8dbf777ce2..c3701d285c69 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -96,6 +96,16 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
extern struct file *alloc_empty_file(int, const struct cred *);
|
||||
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
|
||||
|
||||
+static inline void put_file_access(struct file *file)
|
||||
+{
|
||||
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
|
||||
+ i_readcount_dec(file->f_inode);
|
||||
+ } else if (file->f_mode & FMODE_WRITER) {
|
||||
+ put_write_access(file->f_inode);
|
||||
+ __mnt_drop_write(file->f_path.mnt);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* super.c
|
||||
*/
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index 51052202ecdc..a84909d62168 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -861,7 +861,9 @@ static int do_dentry_open(struct file *f,
|
||||
return 0;
|
||||
}
|
||||
|
||||
- if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
|
||||
+ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
|
||||
+ i_readcount_inc(inode);
|
||||
+ } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
|
||||
error = get_write_access(inode);
|
||||
if (unlikely(error))
|
||||
goto cleanup_file;
|
||||
@@ -901,8 +903,6 @@ static int do_dentry_open(struct file *f,
|
||||
goto cleanup_all;
|
||||
}
|
||||
f->f_mode |= FMODE_OPENED;
|
||||
- if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
|
||||
- i_readcount_inc(inode);
|
||||
if ((f->f_mode & FMODE_READ) &&
|
||||
likely(f->f_op->read || f->f_op->read_iter))
|
||||
f->f_mode |= FMODE_CAN_READ;
|
||||
@@ -948,10 +948,7 @@ static int do_dentry_open(struct file *f,
|
||||
if (WARN_ON_ONCE(error > 0))
|
||||
error = -EINVAL;
|
||||
fops_put(f->f_op);
|
||||
- if (f->f_mode & FMODE_WRITER) {
|
||||
- put_write_access(inode);
|
||||
- __mnt_drop_write(f->f_path.mnt);
|
||||
- }
|
||||
+ put_file_access(f);
|
||||
cleanup_file:
|
||||
path_put(&f->f_path);
|
||||
f->f_path.mnt = NULL;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,122 @@
|
||||
From f3196ef468589e50d09ad0edc6d2874c267418e2 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 09:46:36 +0200
|
||||
Subject: [PATCH] fs: use a helper for opening kernel internal files
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- include/linux/fs.h: context fuzz
|
||||
- fs/overlayfs/util.c: previous backport introduced an
|
||||
open_with_fake_path() caller that also needs to be renamed here
|
||||
|
||||
commit cbb0b9d4bbcfa96e7872808a63be03202536f1bc
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Thu Jun 15 14:22:26 2023 +0300
|
||||
|
||||
fs: use a helper for opening kernel internal files
|
||||
|
||||
cachefiles uses kernel_open_tmpfile() to open kernel internal tmpfile
|
||||
without accounting for nr_files.
|
||||
|
||||
cachefiles uses open_with_fake_path() for the same reason without the
|
||||
need for a fake path.
|
||||
|
||||
Fork open_with_fake_path() to kernel_file_open() which only does the
|
||||
noaccount part and use it in cachefiles.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Message-Id: <20230615112229.2143178-3-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
|
||||
index 6f5c59baec08..bc2bb2001318 100644
|
||||
--- a/fs/cachefiles/namei.c
|
||||
+++ b/fs/cachefiles/namei.c
|
||||
@@ -560,8 +560,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
|
||||
*/
|
||||
path.mnt = cache->mnt;
|
||||
path.dentry = dentry;
|
||||
- file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
|
||||
- d_backing_inode(dentry), cache->cache_cred);
|
||||
+ file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
|
||||
+ d_backing_inode(dentry), cache->cache_cred);
|
||||
if (IS_ERR(file)) {
|
||||
trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
|
||||
PTR_ERR(file),
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index a84909d62168..3eac96e10eb0 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -1089,6 +1089,39 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
}
|
||||
EXPORT_SYMBOL(dentry_create);
|
||||
|
||||
+/**
|
||||
+ * kernel_file_open - open a file for kernel internal use
|
||||
+ * @path: path of the file to open
|
||||
+ * @flags: open flags
|
||||
+ * @inode: the inode
|
||||
+ * @cred: credentials for open
|
||||
+ *
|
||||
+ * Open a file for use by in-kernel consumers. The file is not accounted
|
||||
+ * against nr_files and must not be installed into the file descriptor
|
||||
+ * table.
|
||||
+ *
|
||||
+ * Return: Opened file on success, an error pointer on failure.
|
||||
+ */
|
||||
+struct file *kernel_file_open(const struct path *path, int flags,
|
||||
+ struct inode *inode, const struct cred *cred)
|
||||
+{
|
||||
+ struct file *f;
|
||||
+ int error;
|
||||
+
|
||||
+ f = alloc_empty_file_noaccount(flags, cred);
|
||||
+ if (IS_ERR(f))
|
||||
+ return f;
|
||||
+
|
||||
+ f->f_path = *path;
|
||||
+ error = do_dentry_open(f, inode, NULL);
|
||||
+ if (error) {
|
||||
+ fput(f);
|
||||
+ f = ERR_PTR(error);
|
||||
+ }
|
||||
+ return f;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(kernel_file_open);
|
||||
+
|
||||
struct file *open_with_fake_path(const struct path *path, int flags,
|
||||
struct inode *inode, const struct cred *cred)
|
||||
{
|
||||
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
|
||||
index 2cdbb70d2b5d..6b31b6587e4d 100644
|
||||
--- a/fs/overlayfs/util.c
|
||||
+++ b/fs/overlayfs/util.c
|
||||
@@ -1370,7 +1370,7 @@ int ovl_ensure_verity_loaded(struct path *datapath)
|
||||
* If this inode was not yet opened, the verity info hasn't been
|
||||
* loaded yet, so we need to do that here to force it into memory.
|
||||
*/
|
||||
- filp = open_with_fake_path(datapath, O_RDONLY, inode, current_cred());
|
||||
+ filp = kernel_file_open(datapath, O_RDONLY, inode, current_cred());
|
||||
if (IS_ERR(filp))
|
||||
return PTR_ERR(filp);
|
||||
fput(filp);
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index be94651061c1..363cdadb04ba 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -1833,6 +1833,8 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
|
||||
struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
|
||||
const struct path *parentpath,
|
||||
umode_t mode, int open_flag, const struct cred *cred);
|
||||
+struct file *kernel_file_open(const struct path *path, int flags,
|
||||
+ struct inode *inode, const struct cred *cred);
|
||||
|
||||
int vfs_mkobj(struct dentry *, umode_t,
|
||||
int (*f)(struct dentry *, umode_t, void *),
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,124 @@
|
||||
From f226635eb71a0c5f680f89a64ec6332e5b2f8ee7 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 15:22:00 +0200
|
||||
Subject: [PATCH] fs: move kmem_cache_zalloc() into alloc_empty_file*() helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- only context fuzz
|
||||
|
||||
commit 8a05a8c31d06c5d0d67b273a4a00f87269adde82
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Thu Jun 15 14:22:27 2023 +0300
|
||||
|
||||
fs: move kmem_cache_zalloc() into alloc_empty_file*() helpers
|
||||
|
||||
Use a common helper init_file() instead of __alloc_file() for
|
||||
alloc_empty_file*() helpers and improrve the documentation.
|
||||
|
||||
This is needed for a follow up patch that allocates a backing_file
|
||||
container.
|
||||
|
||||
Suggested-by: Christoph Hellwig <hch@lst.de>
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Message-Id: <20230615112229.2143178-4-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index 845c741dc518..9fee3de138d6 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -131,20 +131,15 @@ static int __init init_fs_stat_sysctls(void)
|
||||
fs_initcall(init_fs_stat_sysctls);
|
||||
#endif
|
||||
|
||||
-static struct file *__alloc_file(int flags, const struct cred *cred)
|
||||
+static int init_file(struct file *f, int flags, const struct cred *cred)
|
||||
{
|
||||
- struct file *f;
|
||||
int error;
|
||||
|
||||
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
|
||||
- if (unlikely(!f))
|
||||
- return ERR_PTR(-ENOMEM);
|
||||
-
|
||||
f->f_cred = get_cred(cred);
|
||||
error = security_file_alloc(f);
|
||||
if (unlikely(error)) {
|
||||
file_free_rcu(&f->f_u.fu_rcuhead);
|
||||
- return ERR_PTR(error);
|
||||
+ return error;
|
||||
}
|
||||
|
||||
atomic_long_set(&f->f_count, 1);
|
||||
@@ -155,7 +150,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
|
||||
f->f_mode = OPEN_FMODE(flags);
|
||||
/* f->f_version: 0 */
|
||||
|
||||
- return f;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/* Find an unused file structure and return a pointer to it.
|
||||
@@ -172,6 +167,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
{
|
||||
static long old_max;
|
||||
struct file *f;
|
||||
+ int error;
|
||||
|
||||
/*
|
||||
* Privileged users can go above max_files
|
||||
@@ -185,9 +181,15 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
goto over;
|
||||
}
|
||||
|
||||
- f = __alloc_file(flags, cred);
|
||||
- if (!IS_ERR(f))
|
||||
- percpu_counter_inc(&nr_files);
|
||||
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
|
||||
+ if (unlikely(!f))
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ error = init_file(f, flags, cred);
|
||||
+ if (unlikely(error))
|
||||
+ return ERR_PTR(error);
|
||||
+
|
||||
+ percpu_counter_inc(&nr_files);
|
||||
|
||||
return f;
|
||||
|
||||
@@ -203,14 +205,23 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
/*
|
||||
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
|
||||
*
|
||||
- * Should not be used unless there's a very good reason to do so.
|
||||
+ * This is only for kernel internal use, and the allocate file must not be
|
||||
+ * installed into file tables or such.
|
||||
*/
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
{
|
||||
- struct file *f = __alloc_file(flags, cred);
|
||||
+ struct file *f;
|
||||
+ int error;
|
||||
+
|
||||
+ f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
|
||||
+ if (unlikely(!f))
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ error = init_file(f, flags, cred);
|
||||
+ if (unlikely(error))
|
||||
+ return ERR_PTR(error);
|
||||
|
||||
- if (!IS_ERR(f))
|
||||
- f->f_mode |= FMODE_NOACCOUNT;
|
||||
+ f->f_mode |= FMODE_NOACCOUNT;
|
||||
|
||||
return f;
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,253 @@
|
||||
From 9e62303890ee2ae993e8c78bb442176d2467e927 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 09:49:05 +0200
|
||||
Subject: [PATCH] fs: use backing_file container for internal files with "fake"
|
||||
f_path
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 62d53c4a1dfe347bd87ede46ffad38c9a3870338
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Thu Jun 15 14:22:28 2023 +0300
|
||||
|
||||
fs: use backing_file container for internal files with "fake" f_path
|
||||
|
||||
Overlayfs uses open_with_fake_path() to allocate internal kernel files,
|
||||
with a "fake" path - whose f_path is not on the same fs as f_inode.
|
||||
|
||||
Allocate a container struct backing_file for those internal files, that
|
||||
is used to hold the "fake" ovl path along with the real path.
|
||||
|
||||
backing_file_real_path() can be used to access the stored real path.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Message-Id: <20230615112229.2143178-5-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index 9fee3de138d6..3f02d00c1396 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
|
||||
|
||||
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
|
||||
|
||||
+/* Container for backing file with optional real path */
|
||||
+struct backing_file {
|
||||
+ struct file file;
|
||||
+ struct path real_path;
|
||||
+};
|
||||
+
|
||||
+static inline struct backing_file *backing_file(struct file *f)
|
||||
+{
|
||||
+ return container_of(f, struct backing_file, file);
|
||||
+}
|
||||
+
|
||||
+struct path *backing_file_real_path(struct file *f)
|
||||
+{
|
||||
+ return &backing_file(f)->real_path;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_real_path);
|
||||
+
|
||||
static void file_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
|
||||
|
||||
put_cred(f->f_cred);
|
||||
- kmem_cache_free(filp_cachep, f);
|
||||
+ if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
+ kfree(backing_file(f));
|
||||
+ else
|
||||
+ kmem_cache_free(filp_cachep, f);
|
||||
}
|
||||
|
||||
static inline void file_free(struct file *f)
|
||||
{
|
||||
security_file_free(f);
|
||||
- if (!(f->f_mode & FMODE_NOACCOUNT))
|
||||
+ if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
+ path_put(backing_file_real_path(f));
|
||||
+ if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
|
||||
percpu_counter_dec(&nr_files);
|
||||
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
|
||||
}
|
||||
@@ -226,6 +248,30 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
return f;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Variant of alloc_empty_file() that allocates a backing_file container
|
||||
+ * and doesn't check and modify nr_files.
|
||||
+ *
|
||||
+ * This is only for kernel internal use, and the allocate file must not be
|
||||
+ * installed into file tables or such.
|
||||
+ */
|
||||
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
||||
+{
|
||||
+ struct backing_file *ff;
|
||||
+ int error;
|
||||
+
|
||||
+ ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
|
||||
+ if (unlikely(!ff))
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ error = init_file(&ff->file, flags, cred);
|
||||
+ if (unlikely(error))
|
||||
+ return ERR_PTR(error);
|
||||
+
|
||||
+ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
|
||||
+ return &ff->file;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* file_init_path - initialize a 'struct file' based on path
|
||||
*
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index c3701d285c69..62b558fa6395 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -93,8 +93,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
/*
|
||||
* file_table.c
|
||||
*/
|
||||
-extern struct file *alloc_empty_file(int, const struct cred *);
|
||||
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
|
||||
+struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
|
||||
static inline void put_file_access(struct file *file)
|
||||
{
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index 3eac96e10eb0..e2419242456e 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -1122,23 +1122,44 @@ struct file *kernel_file_open(const struct path *path, int flags,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_file_open);
|
||||
|
||||
-struct file *open_with_fake_path(const struct path *path, int flags,
|
||||
- struct inode *inode, const struct cred *cred)
|
||||
+/**
|
||||
+ * backing_file_open - open a backing file for kernel internal use
|
||||
+ * @path: path of the file to open
|
||||
+ * @flags: open flags
|
||||
+ * @path: path of the backing file
|
||||
+ * @cred: credentials for open
|
||||
+ *
|
||||
+ * Open a backing file for a stackable filesystem (e.g., overlayfs).
|
||||
+ * @path may be on the stackable filesystem and backing inode on the
|
||||
+ * underlying filesystem. In this case, we want to be able to return
|
||||
+ * the @real_path of the backing inode. This is done by embedding the
|
||||
+ * returned file into a container structure that also stores the path of
|
||||
+ * the backing inode on the underlying filesystem, which can be
|
||||
+ * retrieved using backing_file_real_path().
|
||||
+ */
|
||||
+struct file *backing_file_open(const struct path *path, int flags,
|
||||
+ const struct path *real_path,
|
||||
+ const struct cred *cred)
|
||||
{
|
||||
- struct file *f = alloc_empty_file_noaccount(flags, cred);
|
||||
- if (!IS_ERR(f)) {
|
||||
- int error;
|
||||
+ struct file *f;
|
||||
+ int error;
|
||||
|
||||
- f->f_path = *path;
|
||||
- error = do_dentry_open(f, inode, NULL);
|
||||
- if (error) {
|
||||
- fput(f);
|
||||
- f = ERR_PTR(error);
|
||||
- }
|
||||
+ f = alloc_empty_backing_file(flags, cred);
|
||||
+ if (IS_ERR(f))
|
||||
+ return f;
|
||||
+
|
||||
+ f->f_path = *path;
|
||||
+ path_get(real_path);
|
||||
+ *backing_file_real_path(f) = *real_path;
|
||||
+ error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
|
||||
+ if (error) {
|
||||
+ fput(f);
|
||||
+ f = ERR_PTR(error);
|
||||
}
|
||||
+
|
||||
return f;
|
||||
}
|
||||
-EXPORT_SYMBOL(open_with_fake_path);
|
||||
+EXPORT_SYMBOL_GPL(backing_file_open);
|
||||
|
||||
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
|
||||
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 5db89c8de140..99f2ae8e3864 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -65,8 +65,8 @@ static struct file *ovl_open_realfile(const struct file *file,
|
||||
if (!inode_owner_or_capable(real_idmap, realinode))
|
||||
flags &= ~O_NOATIME;
|
||||
|
||||
- realfile = open_with_fake_path(&file->f_path, flags, realinode,
|
||||
- current_cred());
|
||||
+ realfile = backing_file_open(&file->f_path, flags, realpath,
|
||||
+ current_cred());
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 363cdadb04ba..48ec31b9d230 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -167,6 +167,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
/* Supports IOCB_HAS_METADATA */
|
||||
#define FMODE_HAS_METADATA ((__force fmode_t)0x800000)
|
||||
|
||||
+/* File is embedded in backing_file object */
|
||||
+#define FMODE_BACKING ((__force fmode_t)0x2000000)
|
||||
+
|
||||
/* File was opened by fanotify and shouldn't generate fanotify events */
|
||||
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
|
||||
|
||||
@@ -2579,11 +2582,31 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
|
||||
return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
|
||||
name, flags, mode);
|
||||
}
|
||||
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
|
||||
-extern struct file *dentry_create(const struct path *path, int flags,
|
||||
- umode_t mode, const struct cred *cred);
|
||||
-extern struct file * open_with_fake_path(const struct path *, int,
|
||||
- struct inode*, const struct cred *);
|
||||
+struct file *dentry_open(const struct path *path, int flags,
|
||||
+ const struct cred *creds);
|
||||
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
+ const struct cred *cred);
|
||||
+struct file *backing_file_open(const struct path *path, int flags,
|
||||
+ const struct path *real_path,
|
||||
+ const struct cred *cred);
|
||||
+struct path *backing_file_real_path(struct file *f);
|
||||
+
|
||||
+/*
|
||||
+ * file_real_path - get the path corresponding to f_inode
|
||||
+ *
|
||||
+ * When opening a backing file for a stackable filesystem (e.g.,
|
||||
+ * overlayfs) f_path may be on the stackable filesystem and f_inode on
|
||||
+ * the underlying filesystem. When the path associated with f_inode is
|
||||
+ * needed, this helper should be used instead of accessing f_path
|
||||
+ * directly.
|
||||
+*/
|
||||
+static inline const struct path *file_real_path(struct file *f)
|
||||
+{
|
||||
+ if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
+ return backing_file_real_path(f);
|
||||
+ return &f->f_path;
|
||||
+}
|
||||
+
|
||||
static inline struct file *file_clone_open(struct file *file)
|
||||
{
|
||||
return dentry_open(&file->f_path, file->f_flags, file->f_cred);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,75 @@
|
||||
From c030fdbbb0542056bdd257409b781cee8d8b8c39 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 09:49:52 +0200
|
||||
Subject: [PATCH] ovl: enable fsnotify events on underlying real files
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit bc2473c90fca55bf95b2ab6af1dacee26a4f92f6
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Thu Jun 15 14:22:29 2023 +0300
|
||||
|
||||
ovl: enable fsnotify events on underlying real files
|
||||
|
||||
Overlayfs creates the real underlying files with fake f_path, whose
|
||||
f_inode is on the underlying fs and f_path on overlayfs.
|
||||
|
||||
Those real files were open with FMODE_NONOTIFY, because fsnotify code was
|
||||
not prapared to handle fsnotify hooks on files with fake path correctly
|
||||
and fanotify would report unexpected event->fd with fake overlayfs path,
|
||||
when the underlying fs was being watched.
|
||||
|
||||
Teach fsnotify to handle events on the real files, and do not set real
|
||||
files to FMODE_NONOTIFY to allow operations on real file (e.g. open,
|
||||
access, modify, close) to generate async and permission events.
|
||||
|
||||
Because fsnotify does not have notifications on address space
|
||||
operations, we do not need to worry about ->vm_file not reporting
|
||||
events to a watched overlayfs when users are accessing a mapped
|
||||
overlayfs file.
|
||||
|
||||
Acked-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Message-Id: <20230615112229.2143178-6-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 99f2ae8e3864..cd0770bb3020 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -38,8 +38,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
|
||||
return 'm';
|
||||
}
|
||||
|
||||
-/* No atime modification nor notify on underlying */
|
||||
-#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
|
||||
+/* No atime modification on underlying */
|
||||
+#define OVL_OPEN_FLAGS (O_NOATIME)
|
||||
|
||||
static struct file *ovl_open_realfile(const struct file *file,
|
||||
const struct path *realpath)
|
||||
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
|
||||
index bb8467cd11ae..ed48e4f1e755 100644
|
||||
--- a/include/linux/fsnotify.h
|
||||
+++ b/include/linux/fsnotify.h
|
||||
@@ -91,11 +91,13 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
|
||||
|
||||
static inline int fsnotify_file(struct file *file, __u32 mask)
|
||||
{
|
||||
- const struct path *path = &file->f_path;
|
||||
+ const struct path *path;
|
||||
|
||||
if (file->f_mode & FMODE_NONOTIFY)
|
||||
return 0;
|
||||
|
||||
+ /* Overlayfs internal files have fake f_path */
|
||||
+ path = file_real_path(file);
|
||||
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,82 @@
|
||||
From 119b885de3aa505ccc26df2a8a074555e04be774 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 20:28:16 +0200
|
||||
Subject: [PATCH] fs: move cleanup from init_file() into its callers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
Conflicts:
|
||||
- fs/file_table.c: slightly different argument to file_free_rcu()
|
||||
downstream, replacement kept the same
|
||||
|
||||
commit dff745c1221a402b4921d54f292288373cff500c
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Sat Jul 1 20:11:34 2023 +0300
|
||||
|
||||
fs: move cleanup from init_file() into its callers
|
||||
|
||||
The use of file_free_rcu() in init_file() to free the struct that was
|
||||
allocated by the caller was hacky and we got what we deserved.
|
||||
|
||||
Let init_file() and its callers take care of cleaning up each after
|
||||
their own allocated resources on error.
|
||||
|
||||
Fixes: 62d53c4a1dfe ("fs: use backing_file container for internal files with "fake" f_path") # mainline only
|
||||
Reported-and-tested-by: syzbot+ada42aab05cf51b00e98@syzkaller.appspotmail.com
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Message-Id: <20230701171134.239409-1-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index 3f02d00c1396..b0a8c2608530 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -160,7 +160,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
|
||||
f->f_cred = get_cred(cred);
|
||||
error = security_file_alloc(f);
|
||||
if (unlikely(error)) {
|
||||
- file_free_rcu(&f->f_u.fu_rcuhead);
|
||||
+ put_cred(f->f_cred);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -208,8 +208,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
error = init_file(f, flags, cred);
|
||||
- if (unlikely(error))
|
||||
+ if (unlikely(error)) {
|
||||
+ kmem_cache_free(filp_cachep, f);
|
||||
return ERR_PTR(error);
|
||||
+ }
|
||||
|
||||
percpu_counter_inc(&nr_files);
|
||||
|
||||
@@ -240,8 +242,10 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
error = init_file(f, flags, cred);
|
||||
- if (unlikely(error))
|
||||
+ if (unlikely(error)) {
|
||||
+ kmem_cache_free(filp_cachep, f);
|
||||
return ERR_PTR(error);
|
||||
+ }
|
||||
|
||||
f->f_mode |= FMODE_NOACCOUNT;
|
||||
|
||||
@@ -265,8 +269,10 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
error = init_file(&ff->file, flags, cred);
|
||||
- if (unlikely(error))
|
||||
+ if (unlikely(error)) {
|
||||
+ kfree(ff);
|
||||
return ERR_PTR(error);
|
||||
+ }
|
||||
|
||||
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
|
||||
return &ff->file;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,119 @@
|
||||
From f792642489a3af9cec81ca366edffd79fe1d1359 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 15:16:52 +0200
|
||||
Subject: [PATCH] lsm: constify the 'file' parameter in
|
||||
security_binder_transfer_file()
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 8e4672d6f902d5c4db1e87e8aa9f530149d85bc6
|
||||
Author: Khadija Kamran <kamrankhadijadj@gmail.com>
|
||||
Date: Sat Aug 12 20:31:08 2023 +0500
|
||||
|
||||
lsm: constify the 'file' parameter in security_binder_transfer_file()
|
||||
|
||||
SELinux registers the implementation for the "binder_transfer_file"
|
||||
hook. Looking at the function implementation we observe that the
|
||||
parameter "file" is not changing.
|
||||
|
||||
Mark the "file" parameter of LSM hook security_binder_transfer_file() as
|
||||
"const" since it will not be changing in the LSM hook.
|
||||
|
||||
Signed-off-by: Khadija Kamran <kamrankhadijadj@gmail.com>
|
||||
[PM: subject line whitespace fix]
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
|
||||
index 1c2be7057bd9..b6fbb446bab7 100644
|
||||
--- a/include/linux/lsm_hook_defs.h
|
||||
+++ b/include/linux/lsm_hook_defs.h
|
||||
@@ -32,7 +32,7 @@ LSM_HOOK(int, 0, binder_transaction, const struct cred *from,
|
||||
LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from,
|
||||
const struct cred *to)
|
||||
LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from,
|
||||
- const struct cred *to, struct file *file)
|
||||
+ const struct cred *to, const struct file *file)
|
||||
LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child,
|
||||
unsigned int mode)
|
||||
LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent)
|
||||
diff --git a/include/linux/security.h b/include/linux/security.h
|
||||
index 3d216c94fd69..d2888c127859 100644
|
||||
--- a/include/linux/security.h
|
||||
+++ b/include/linux/security.h
|
||||
@@ -269,7 +269,7 @@ int security_binder_transaction(const struct cred *from,
|
||||
int security_binder_transfer_binder(const struct cred *from,
|
||||
const struct cred *to);
|
||||
int security_binder_transfer_file(const struct cred *from,
|
||||
- const struct cred *to, struct file *file);
|
||||
+ const struct cred *to, const struct file *file);
|
||||
int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
|
||||
int security_ptrace_traceme(struct task_struct *parent);
|
||||
int security_capget(struct task_struct *target,
|
||||
@@ -539,7 +539,7 @@ static inline int security_binder_transfer_binder(const struct cred *from,
|
||||
|
||||
static inline int security_binder_transfer_file(const struct cred *from,
|
||||
const struct cred *to,
|
||||
- struct file *file)
|
||||
+ const struct file *file)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
diff --git a/security/security.c b/security/security.c
|
||||
index 2092b657af9f..b59af216324f 100644
|
||||
--- a/security/security.c
|
||||
+++ b/security/security.c
|
||||
@@ -905,7 +905,7 @@ int security_binder_transfer_binder(const struct cred *from,
|
||||
}
|
||||
|
||||
int security_binder_transfer_file(const struct cred *from,
|
||||
- const struct cred *to, struct file *file)
|
||||
+ const struct cred *to, const struct file *file)
|
||||
{
|
||||
return call_int_hook(binder_transfer_file, from, to, file);
|
||||
}
|
||||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
|
||||
index 22173e8e88e2..deacc9a63fae 100644
|
||||
--- a/security/selinux/hooks.c
|
||||
+++ b/security/selinux/hooks.c
|
||||
@@ -1703,7 +1703,7 @@ static inline int file_path_has_perm(const struct cred *cred,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
-static int bpf_fd_pass(struct file *file, u32 sid);
|
||||
+static int bpf_fd_pass(const struct file *file, u32 sid);
|
||||
#endif
|
||||
|
||||
/* Check whether a task can use an open file descriptor to
|
||||
@@ -1976,7 +1976,7 @@ static inline u32 file_mask_to_av(int mode, int mask)
|
||||
}
|
||||
|
||||
/* Convert a Linux file to an access vector. */
|
||||
-static inline u32 file_to_av(struct file *file)
|
||||
+static inline u32 file_to_av(const struct file *file)
|
||||
{
|
||||
u32 av = 0;
|
||||
|
||||
@@ -2054,7 +2054,7 @@ static int selinux_binder_transfer_binder(const struct cred *from,
|
||||
|
||||
static int selinux_binder_transfer_file(const struct cred *from,
|
||||
const struct cred *to,
|
||||
- struct file *file)
|
||||
+ const struct file *file)
|
||||
{
|
||||
u32 sid = cred_sid(to);
|
||||
struct file_security_struct *fsec = selinux_file(file);
|
||||
@@ -6885,7 +6885,7 @@ static u32 bpf_map_fmode_to_av(fmode_t fmode)
|
||||
* access the bpf object and that's why we have to add this additional check in
|
||||
* selinux_file_receive and selinux_binder_transfer_files.
|
||||
*/
|
||||
-static int bpf_fd_pass(struct file *file, u32 sid)
|
||||
+static int bpf_fd_pass(const struct file *file, u32 sid)
|
||||
{
|
||||
struct bpf_security_struct *bpfsec;
|
||||
struct bpf_prog *prog;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,73 @@
|
||||
From 7adb9b7eb6c32cc5b7cea983ed187cf3f4122cf5 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:49:48 +0200
|
||||
Subject: [PATCH] cachefiles: use kiocb_{start,end}_write() helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit e6fa4c728fb671765291cca3a905986612c06b6e
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Thu Aug 17 17:13:37 2023 +0300
|
||||
|
||||
cachefiles: use kiocb_{start,end}_write() helpers
|
||||
|
||||
Use helpers instead of the open coded dance to silence lockdep warnings.
|
||||
|
||||
Suggested-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Reviewed-by: Jan Kara <jack@suse.cz>
|
||||
Reviewed-by: Jens Axboe <axboe@kernel.dk>
|
||||
Message-Id: <20230817141337.1025891-8-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
|
||||
index 175a25fcade8..009d23cd435b 100644
|
||||
--- a/fs/cachefiles/io.c
|
||||
+++ b/fs/cachefiles/io.c
|
||||
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
|
||||
|
||||
_enter("%ld", ret);
|
||||
|
||||
- /* Tell lockdep we inherited freeze protection from submission thread */
|
||||
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
|
||||
- __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
|
||||
+ kiocb_end_write(iocb);
|
||||
|
||||
if (ret < 0)
|
||||
trace_cachefiles_io_error(object, inode, ret,
|
||||
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
|
||||
{
|
||||
struct cachefiles_cache *cache;
|
||||
struct cachefiles_kiocb *ki;
|
||||
- struct inode *inode;
|
||||
unsigned int old_nofs;
|
||||
ssize_t ret;
|
||||
size_t len = iov_iter_count(iter);
|
||||
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
|
||||
ki->iocb.ki_complete = cachefiles_write_complete;
|
||||
atomic_long_add(ki->b_writing, &cache->b_writing);
|
||||
|
||||
- /* Open-code file_start_write here to grab freeze protection, which
|
||||
- * will be released by another thread in aio_complete_rw(). Fool
|
||||
- * lockdep by telling it the lock got released so that it doesn't
|
||||
- * complain about the held lock when we return to userspace.
|
||||
- */
|
||||
- inode = file_inode(file);
|
||||
- __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
|
||||
- __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
|
||||
+ kiocb_start_write(&ki->iocb);
|
||||
|
||||
get_file(ki->iocb.ki_filp);
|
||||
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
|
||||
|
||||
- trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
|
||||
+ trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
|
||||
old_nofs = memalloc_nofs_save();
|
||||
ret = cachefiles_inject_write_error();
|
||||
if (ret == 0)
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
188
SOURCES/1327-fs-fix-kernel-doc-warnings.patch
Normal file
188
SOURCES/1327-fs-fix-kernel-doc-warnings.patch
Normal file
@ -0,0 +1,188 @@
|
||||
From fb0111b77bf45d069d7722682a1a1d202fbf5f7d Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 08:59:20 +0200
|
||||
Subject: [PATCH] fs: Fix kernel-doc warnings
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/fs_context.c: one docstring fix was already applied
|
||||
|
||||
commit 35931eb3945b8d38c31f8e956aee3cf31c52121b
|
||||
Author: Matthew Wilcox (Oracle) <willy@infradead.org>
|
||||
Date: Fri Aug 18 21:08:24 2023 +0100
|
||||
|
||||
fs: Fix kernel-doc warnings
|
||||
|
||||
These have a variety of causes and a corresponding variety of solutions.
|
||||
|
||||
Signed-off-by: "Matthew Wilcox (Oracle)" <willy@infradead.org>
|
||||
Message-Id: <20230818200824.2720007-1-willy@infradead.org>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file.c b/fs/file.c
|
||||
index 236e11a38c18..70962ee8c68b 100644
|
||||
--- a/fs/file.c
|
||||
+++ b/fs/file.c
|
||||
@@ -679,7 +679,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
|
||||
|
||||
/**
|
||||
* last_fd - return last valid index into fd table
|
||||
- * @cur_fds: files struct
|
||||
+ * @fdt: File descriptor table.
|
||||
*
|
||||
* Context: Either rcu read lock or files_lock must be held.
|
||||
*
|
||||
@@ -734,6 +734,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
|
||||
*
|
||||
* @fd: starting file descriptor to close
|
||||
* @max_fd: last file descriptor to close
|
||||
+ * @flags: CLOSE_RANGE flags.
|
||||
*
|
||||
* This closes a range of file descriptors. All file descriptors
|
||||
* from @fd up to and including @max_fd are closed.
|
||||
diff --git a/fs/fs_context.c b/fs/fs_context.c
|
||||
index 648d2ee9e5fc..3473e63e8399 100644
|
||||
--- a/fs/fs_context.c
|
||||
+++ b/fs/fs_context.c
|
||||
@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
|
||||
|
||||
/**
|
||||
* vfs_parse_fs_string - Convenience function to just parse a string.
|
||||
+ * @fc: Filesystem context.
|
||||
+ * @key: Parameter name.
|
||||
+ * @value: Default value.
|
||||
+ * @v_size: Maximum number of bytes in the value.
|
||||
*/
|
||||
int vfs_parse_fs_string(struct fs_context *fc, const char *key,
|
||||
const char *value, size_t v_size)
|
||||
@@ -357,7 +361,7 @@ void fc_drop_locked(struct fs_context *fc)
|
||||
static void legacy_fs_context_free(struct fs_context *fc);
|
||||
|
||||
/**
|
||||
- * vfs_dup_fc_config: Duplicate a filesystem context.
|
||||
+ * vfs_dup_fs_context - Duplicate a filesystem context.
|
||||
* @src_fc: The context to copy.
|
||||
*/
|
||||
struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
|
||||
@@ -403,7 +407,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
|
||||
|
||||
/**
|
||||
* logfc - Log a message to a filesystem context
|
||||
- * @fc: The filesystem context to log to.
|
||||
+ * @log: The filesystem context to log to, or NULL to use printk.
|
||||
+ * @prefix: A string to prefix the output with, or NULL.
|
||||
+ * @level: 'w' for a warning, 'e' for an error. Anything else is a notice.
|
||||
* @fmt: The format of the buffer.
|
||||
*/
|
||||
void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
|
||||
diff --git a/fs/ioctl.c b/fs/ioctl.c
|
||||
index 088462ee5a81..64776891120c 100644
|
||||
--- a/fs/ioctl.c
|
||||
+++ b/fs/ioctl.c
|
||||
@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
|
||||
* Returns 0 on success, -errno on error, 1 if this was the last
|
||||
* extent that will fit in user array.
|
||||
*/
|
||||
-#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
|
||||
-#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
|
||||
-#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
|
||||
int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
|
||||
u64 phys, u64 len, u32 flags)
|
||||
{
|
||||
@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
|
||||
if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
|
||||
return 1;
|
||||
|
||||
+#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
|
||||
+#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
|
||||
+#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
|
||||
+
|
||||
if (flags & SET_UNKNOWN_FLAGS)
|
||||
flags |= FIEMAP_EXTENT_UNKNOWN;
|
||||
if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
|
||||
@@ -913,6 +914,9 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
|
||||
#ifdef CONFIG_COMPAT
|
||||
/**
|
||||
* compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
|
||||
+ * @file: The file to operate on.
|
||||
+ * @cmd: The ioctl command number.
|
||||
+ * @arg: The argument to the ioctl.
|
||||
*
|
||||
* This is not normally called as a function, but instead set in struct
|
||||
* file_operations as
|
||||
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
|
||||
index 5d826274570c..c429c42a6867 100644
|
||||
--- a/fs/kernel_read_file.c
|
||||
+++ b/fs/kernel_read_file.c
|
||||
@@ -8,16 +8,16 @@
|
||||
/**
|
||||
* kernel_read_file() - read file contents into a kernel buffer
|
||||
*
|
||||
- * @file file to read from
|
||||
- * @offset where to start reading from (see below).
|
||||
- * @buf pointer to a "void *" buffer for reading into (if
|
||||
+ * @file: file to read from
|
||||
+ * @offset: where to start reading from (see below).
|
||||
+ * @buf: pointer to a "void *" buffer for reading into (if
|
||||
* *@buf is NULL, a buffer will be allocated, and
|
||||
* @buf_size will be ignored)
|
||||
- * @buf_size size of buf, if already allocated. If @buf not
|
||||
+ * @buf_size: size of buf, if already allocated. If @buf not
|
||||
* allocated, this is the largest size to allocate.
|
||||
- * @file_size if non-NULL, the full size of @file will be
|
||||
+ * @file_size: if non-NULL, the full size of @file will be
|
||||
* written here.
|
||||
- * @id the kernel_read_file_id identifying the type of
|
||||
+ * @id: the kernel_read_file_id identifying the type of
|
||||
* file contents being read (for LSMs to examine)
|
||||
*
|
||||
* @offset must be 0 unless both @buf and @file_size are non-NULL
|
||||
diff --git a/fs/namei.c b/fs/namei.c
|
||||
index 0a4b15d9a010..23c73afe57d3 100644
|
||||
--- a/fs/namei.c
|
||||
+++ b/fs/namei.c
|
||||
@@ -644,6 +644,8 @@ static bool nd_alloc_stack(struct nameidata *nd)
|
||||
|
||||
/**
|
||||
* path_connected - Verify that a dentry is below mnt.mnt_root
|
||||
+ * @mnt: The mountpoint to check.
|
||||
+ * @dentry: The dentry to check.
|
||||
*
|
||||
* Rename can sometimes move a file or directory outside of a bind
|
||||
* mount, path_connected allows those cases to be detected.
|
||||
@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls);
|
||||
/**
|
||||
* may_follow_link - Check symlink following for unsafe situations
|
||||
* @nd: nameidata pathwalk data
|
||||
+ * @inode: Used for idmapping.
|
||||
*
|
||||
* In the case of the sysctl_protected_symlinks sysctl being enabled,
|
||||
* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index e2419242456e..ef2cc51d468c 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -1126,7 +1126,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
|
||||
* backing_file_open - open a backing file for kernel internal use
|
||||
* @path: path of the file to open
|
||||
* @flags: open flags
|
||||
- * @path: path of the backing file
|
||||
+ * @real_path: path of the backing file
|
||||
* @cred: credentials for open
|
||||
*
|
||||
* Open a backing file for a stackable filesystem (e.g., overlayfs).
|
||||
@@ -1534,7 +1534,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
|
||||
}
|
||||
|
||||
/**
|
||||
- * close_range() - Close all file descriptors in a given range.
|
||||
+ * sys_close_range() - Close all file descriptors in a given range.
|
||||
*
|
||||
* @fd: starting file descriptor to close
|
||||
* @max_fd: last file descriptor to close
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
300
SOURCES/1328-fs-rename-mnt-want-drop-write-helpers.patch
Normal file
300
SOURCES/1328-fs-rename-mnt-want-drop-write-helpers.patch
Normal file
@ -0,0 +1,300 @@
|
||||
From 484050dcbd77701cd8a295037aaa6365e0dc2f4e Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 09:57:52 +0200
|
||||
Subject: [PATCH] fs: rename __mnt_{want,drop}_write*() helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/inode.c: context fuzz
|
||||
- fs/internal.h: dropped one hunk that modifies a comment in
|
||||
sb_start_ro_state_change(), which is not present downstream
|
||||
- fs/namespace.c: needed to rename also exports because of downstream
|
||||
commit fb4415394e59 ("fs: export mnt_{get,put}_write_access() to modules")
|
||||
- fs/overlayfs/util.c: previous backport introduced __mnt_...()
|
||||
callers that also need to be renamed here
|
||||
|
||||
commit 3e15dcf77b23b8e9b9b7f3c0d4def8fe9c12c534
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Fri Sep 8 16:28:59 2023 +0300
|
||||
|
||||
fs: rename __mnt_{want,drop}_write*() helpers
|
||||
|
||||
Before exporting these helpers to modules, make their names more
|
||||
meaningful.
|
||||
|
||||
The names mnt_{get,put)_write_access*() were chosen, because they rhyme
|
||||
with the inode {get,put)_write_access() helpers, which have a very close
|
||||
meaning for the inode object.
|
||||
|
||||
Suggested-by: Christian Brauner <brauner@kernel.org>
|
||||
Link: https://lore.kernel.org/r/20230817-anfechtbar-ruhelosigkeit-8c6cca8443fc@brauner/
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Message-Id: <20230908132900.2983519-2-amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/inode.c b/fs/inode.c
|
||||
index 2bc233f8db22..fc484773431b 100644
|
||||
--- a/fs/inode.c
|
||||
+++ b/fs/inode.c
|
||||
@@ -1980,7 +1980,7 @@ void touch_atime(const struct path *path)
|
||||
if (!sb_start_write_trylock(inode->i_sb))
|
||||
return;
|
||||
|
||||
- if (__mnt_want_write(mnt) != 0)
|
||||
+ if (mnt_get_write_access(mnt) != 0)
|
||||
goto skip_update;
|
||||
/*
|
||||
* File systems can error out when updating inodes if they need to
|
||||
@@ -1993,7 +1993,7 @@ void touch_atime(const struct path *path)
|
||||
*/
|
||||
now = current_time(inode);
|
||||
update_time(inode, &now, S_ATIME);
|
||||
- __mnt_drop_write(mnt);
|
||||
+ mnt_put_write_access(mnt);
|
||||
skip_update:
|
||||
sb_end_write(inode->i_sb);
|
||||
}
|
||||
@@ -2110,9 +2110,9 @@ static int __file_update_time(struct file *file, struct timespec64 *now,
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
/* try to update time settings */
|
||||
- if (!__mnt_want_write_file(file)) {
|
||||
+ if (!mnt_get_write_access_file(file)) {
|
||||
ret = update_time(inode, now, sync_mode);
|
||||
- __mnt_drop_write_file(file);
|
||||
+ mnt_put_write_access_file(file);
|
||||
}
|
||||
|
||||
return ret;
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index 62b558fa6395..85bf69115cf4 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -76,8 +76,8 @@ extern int sb_prepare_remount_readonly(struct super_block *);
|
||||
|
||||
extern void __init mnt_init(void);
|
||||
|
||||
-extern int __mnt_want_write_file(struct file *);
|
||||
-extern void __mnt_drop_write_file(struct file *);
|
||||
+int mnt_get_write_access_file(struct file *file);
|
||||
+void mnt_put_write_access_file(struct file *file);
|
||||
|
||||
extern void dissolve_on_fput(struct vfsmount *);
|
||||
|
||||
@@ -103,7 +103,7 @@ static inline void put_file_access(struct file *file)
|
||||
i_readcount_dec(file->f_inode);
|
||||
} else if (file->f_mode & FMODE_WRITER) {
|
||||
put_write_access(file->f_inode);
|
||||
- __mnt_drop_write(file->f_path.mnt);
|
||||
+ mnt_put_write_access(file->f_path.mnt);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/fs/namespace.c b/fs/namespace.c
|
||||
index d032d84d66ed..218f1b77bf56 100644
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -333,16 +333,16 @@ static int mnt_is_readonly(struct vfsmount *mnt)
|
||||
* can determine when writes are able to occur to a filesystem.
|
||||
*/
|
||||
/**
|
||||
- * __mnt_want_write - get write access to a mount without freeze protection
|
||||
+ * mnt_get_write_access - get write access to a mount without freeze protection
|
||||
* @m: the mount on which to take a write
|
||||
*
|
||||
* This tells the low-level filesystem that a write is about to be performed to
|
||||
* it, and makes sure that writes are allowed (mnt it read-write) before
|
||||
* returning success. This operation does not protect against filesystem being
|
||||
- * frozen. When the write operation is finished, __mnt_drop_write() must be
|
||||
+ * frozen. When the write operation is finished, mnt_put_write_access() must be
|
||||
* called. This is effectively a refcount.
|
||||
*/
|
||||
-int __mnt_want_write(struct vfsmount *m)
|
||||
+int mnt_get_write_access(struct vfsmount *m)
|
||||
{
|
||||
struct mount *mnt = real_mount(m);
|
||||
int ret = 0;
|
||||
@@ -371,7 +371,7 @@ int __mnt_want_write(struct vfsmount *m)
|
||||
|
||||
return ret;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(__mnt_want_write);
|
||||
+EXPORT_SYMBOL_GPL(mnt_get_write_access);
|
||||
|
||||
/**
|
||||
* mnt_want_write - get write access to a mount
|
||||
@@ -387,7 +387,7 @@ int mnt_want_write(struct vfsmount *m)
|
||||
int ret;
|
||||
|
||||
sb_start_write(m->mnt_sb);
|
||||
- ret = __mnt_want_write(m);
|
||||
+ ret = mnt_get_write_access(m);
|
||||
if (ret)
|
||||
sb_end_write(m->mnt_sb);
|
||||
return ret;
|
||||
@@ -395,15 +395,15 @@ int mnt_want_write(struct vfsmount *m)
|
||||
EXPORT_SYMBOL_GPL(mnt_want_write);
|
||||
|
||||
/**
|
||||
- * __mnt_want_write_file - get write access to a file's mount
|
||||
+ * mnt_get_write_access_file - get write access to a file's mount
|
||||
* @file: the file who's mount on which to take a write
|
||||
*
|
||||
- * This is like __mnt_want_write, but if the file is already open for writing it
|
||||
+ * This is like mnt_get_write_access, but if @file is already open for write it
|
||||
* skips incrementing mnt_writers (since the open file already has a reference)
|
||||
* and instead only does the check for emergency r/o remounts. This must be
|
||||
- * paired with __mnt_drop_write_file.
|
||||
+ * paired with mnt_put_write_access_file.
|
||||
*/
|
||||
-int __mnt_want_write_file(struct file *file)
|
||||
+int mnt_get_write_access_file(struct file *file)
|
||||
{
|
||||
if (file->f_mode & FMODE_WRITER) {
|
||||
/*
|
||||
@@ -414,7 +414,7 @@ int __mnt_want_write_file(struct file *file)
|
||||
return -EROFS;
|
||||
return 0;
|
||||
}
|
||||
- return __mnt_want_write(file->f_path.mnt);
|
||||
+ return mnt_get_write_access(file->f_path.mnt);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -431,7 +431,7 @@ int mnt_want_write_file(struct file *file)
|
||||
int ret;
|
||||
|
||||
sb_start_write(file_inode(file)->i_sb);
|
||||
- ret = __mnt_want_write_file(file);
|
||||
+ ret = mnt_get_write_access_file(file);
|
||||
if (ret)
|
||||
sb_end_write(file_inode(file)->i_sb);
|
||||
return ret;
|
||||
@@ -439,20 +439,20 @@ int mnt_want_write_file(struct file *file)
|
||||
EXPORT_SYMBOL_GPL(mnt_want_write_file);
|
||||
|
||||
/**
|
||||
- * __mnt_drop_write - give up write access to a mount
|
||||
+ * mnt_put_write_access - give up write access to a mount
|
||||
* @mnt: the mount on which to give up write access
|
||||
*
|
||||
* Tells the low-level filesystem that we are done
|
||||
* performing writes to it. Must be matched with
|
||||
- * __mnt_want_write() call above.
|
||||
+ * mnt_get_write_access() call above.
|
||||
*/
|
||||
-void __mnt_drop_write(struct vfsmount *mnt)
|
||||
+void mnt_put_write_access(struct vfsmount *mnt)
|
||||
{
|
||||
preempt_disable();
|
||||
mnt_dec_writers(real_mount(mnt));
|
||||
preempt_enable();
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(__mnt_drop_write);
|
||||
+EXPORT_SYMBOL_GPL(mnt_put_write_access);
|
||||
|
||||
/**
|
||||
* mnt_drop_write - give up write access to a mount
|
||||
@@ -464,20 +464,20 @@ EXPORT_SYMBOL_GPL(__mnt_drop_write);
|
||||
*/
|
||||
void mnt_drop_write(struct vfsmount *mnt)
|
||||
{
|
||||
- __mnt_drop_write(mnt);
|
||||
+ mnt_put_write_access(mnt);
|
||||
sb_end_write(mnt->mnt_sb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mnt_drop_write);
|
||||
|
||||
-void __mnt_drop_write_file(struct file *file)
|
||||
+void mnt_put_write_access_file(struct file *file)
|
||||
{
|
||||
if (!(file->f_mode & FMODE_WRITER))
|
||||
- __mnt_drop_write(file->f_path.mnt);
|
||||
+ mnt_put_write_access(file->f_path.mnt);
|
||||
}
|
||||
|
||||
void mnt_drop_write_file(struct file *file)
|
||||
{
|
||||
- __mnt_drop_write_file(file);
|
||||
+ mnt_put_write_access_file(file);
|
||||
sb_end_write(file_inode(file)->i_sb);
|
||||
}
|
||||
EXPORT_SYMBOL(mnt_drop_write_file);
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index ef2cc51d468c..b75b1ab6305b 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -867,7 +867,7 @@ static int do_dentry_open(struct file *f,
|
||||
error = get_write_access(inode);
|
||||
if (unlikely(error))
|
||||
goto cleanup_file;
|
||||
- error = __mnt_want_write(f->f_path.mnt);
|
||||
+ error = mnt_get_write_access(f->f_path.mnt);
|
||||
if (unlikely(error)) {
|
||||
put_write_access(inode);
|
||||
goto cleanup_file;
|
||||
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
|
||||
index 6b31b6587e4d..81ef76c77cab 100644
|
||||
--- a/fs/overlayfs/util.c
|
||||
+++ b/fs/overlayfs/util.c
|
||||
@@ -21,7 +21,7 @@
|
||||
int ovl_get_write_access(struct dentry *dentry)
|
||||
{
|
||||
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
|
||||
- return __mnt_want_write(ovl_upper_mnt(ofs));
|
||||
+ return mnt_get_write_access(ovl_upper_mnt(ofs));
|
||||
}
|
||||
|
||||
/* Get write access to upper sb - may block if upper sb is frozen */
|
||||
@@ -40,7 +40,7 @@ int ovl_want_write(struct dentry *dentry)
|
||||
void ovl_put_write_access(struct dentry *dentry)
|
||||
{
|
||||
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
|
||||
- __mnt_drop_write(ovl_upper_mnt(ofs));
|
||||
+ mnt_put_write_access(ovl_upper_mnt(ofs));
|
||||
}
|
||||
|
||||
void ovl_end_write(struct dentry *dentry)
|
||||
diff --git a/include/linux/mount.h b/include/linux/mount.h
|
||||
index 37dc2a161f73..0f214b0a0992 100644
|
||||
--- a/include/linux/mount.h
|
||||
+++ b/include/linux/mount.h
|
||||
@@ -92,8 +92,8 @@ extern bool __mnt_is_readonly(struct vfsmount *mnt);
|
||||
extern bool mnt_may_suid(struct vfsmount *mnt);
|
||||
|
||||
extern struct vfsmount *clone_private_mount(const struct path *path);
|
||||
-extern int __mnt_want_write(struct vfsmount *);
|
||||
-extern void __mnt_drop_write(struct vfsmount *);
|
||||
+int mnt_get_write_access(struct vfsmount *mnt);
|
||||
+void mnt_put_write_access(struct vfsmount *mnt);
|
||||
|
||||
extern struct vfsmount *fc_mount(struct fs_context *fc);
|
||||
extern struct vfsmount *fc_mount_longterm(struct fs_context *fc);
|
||||
diff --git a/kernel/acct.c b/kernel/acct.c
|
||||
index bbea312b9d76..b3e00389d42d 100644
|
||||
--- a/kernel/acct.c
|
||||
+++ b/kernel/acct.c
|
||||
@@ -235,7 +235,7 @@ static int acct_on(struct filename *pathname)
|
||||
filp_close(file, NULL);
|
||||
return PTR_ERR(internal);
|
||||
}
|
||||
- err = __mnt_want_write(internal);
|
||||
+ err = mnt_get_write_access(internal);
|
||||
if (err) {
|
||||
mntput(internal);
|
||||
kfree(acct);
|
||||
@@ -260,7 +260,7 @@ static int acct_on(struct filename *pathname)
|
||||
old = xchg(&ns->bacct, &acct->pin);
|
||||
mutex_unlock(&acct->lock);
|
||||
pin_kill(old);
|
||||
- __mnt_drop_write(mnt);
|
||||
+ mnt_put_write_access(mnt);
|
||||
mntput(mnt);
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,109 @@
|
||||
From fed3cc66e15fba2f8b48257cfbe82ab6eac11a39 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:35:52 +0200
|
||||
Subject: [PATCH] fs: get mnt_writers count for an open backing file's real
|
||||
path
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 83bc1d294130cc471a89ce10770daa281a93fcb0
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Mon Oct 9 18:37:10 2023 +0300
|
||||
|
||||
fs: get mnt_writers count for an open backing file's real path
|
||||
|
||||
A writeable mapped backing file can perform writes to the real inode.
|
||||
Therefore, the real path mount must be kept writable so long as the
|
||||
writable map exists.
|
||||
|
||||
This may not be strictly needed for ovelrayfs private upper mount,
|
||||
but it is correct to take the mnt_writers count in the vfs helper.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Link: https://lore.kernel.org/r/20231009153712.1566422-2-amir73il@gmail.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index 85bf69115cf4..29369382249d 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -97,13 +97,20 @@ struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
|
||||
+static inline void file_put_write_access(struct file *file)
|
||||
+{
|
||||
+ put_write_access(file->f_inode);
|
||||
+ mnt_put_write_access(file->f_path.mnt);
|
||||
+ if (unlikely(file->f_mode & FMODE_BACKING))
|
||||
+ mnt_put_write_access(backing_file_real_path(file)->mnt);
|
||||
+}
|
||||
+
|
||||
static inline void put_file_access(struct file *file)
|
||||
{
|
||||
if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
|
||||
i_readcount_dec(file->f_inode);
|
||||
} else if (file->f_mode & FMODE_WRITER) {
|
||||
- put_write_access(file->f_inode);
|
||||
- mnt_put_write_access(file->f_path.mnt);
|
||||
+ file_put_write_access(file);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index b75b1ab6305b..64e4bbd1f28c 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -842,6 +842,30 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
|
||||
return ksys_fchown(fd, user, group);
|
||||
}
|
||||
|
||||
+static inline int file_get_write_access(struct file *f)
|
||||
+{
|
||||
+ int error;
|
||||
+
|
||||
+ error = get_write_access(f->f_inode);
|
||||
+ if (unlikely(error))
|
||||
+ return error;
|
||||
+ error = mnt_get_write_access(f->f_path.mnt);
|
||||
+ if (unlikely(error))
|
||||
+ goto cleanup_inode;
|
||||
+ if (unlikely(f->f_mode & FMODE_BACKING)) {
|
||||
+ error = mnt_get_write_access(backing_file_real_path(f)->mnt);
|
||||
+ if (unlikely(error))
|
||||
+ goto cleanup_mnt;
|
||||
+ }
|
||||
+ return 0;
|
||||
+
|
||||
+cleanup_mnt:
|
||||
+ mnt_put_write_access(f->f_path.mnt);
|
||||
+cleanup_inode:
|
||||
+ put_write_access(f->f_inode);
|
||||
+ return error;
|
||||
+}
|
||||
+
|
||||
static int do_dentry_open(struct file *f,
|
||||
struct inode *inode,
|
||||
int (*open)(struct inode *, struct file *))
|
||||
@@ -864,14 +888,9 @@ static int do_dentry_open(struct file *f,
|
||||
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
|
||||
i_readcount_inc(inode);
|
||||
} else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
|
||||
- error = get_write_access(inode);
|
||||
+ error = file_get_write_access(f);
|
||||
if (unlikely(error))
|
||||
goto cleanup_file;
|
||||
- error = mnt_get_write_access(f->f_path.mnt);
|
||||
- if (unlikely(error)) {
|
||||
- put_write_access(inode);
|
||||
- goto cleanup_file;
|
||||
- }
|
||||
f->f_mode |= FMODE_WRITER;
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,153 @@
|
||||
From f3d228ade9542a4fac0e0d4d2721e2a94f2d6d98 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:36:39 +0200
|
||||
Subject: [PATCH] fs: create helper file_user_path() for user displayed mapped
|
||||
file path
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 08582d678fcf11fc86188f0b92239d3d49667d8e
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Mon Oct 9 18:37:11 2023 +0300
|
||||
|
||||
fs: create helper file_user_path() for user displayed mapped file path
|
||||
|
||||
Overlayfs uses backing files with "fake" overlayfs f_path and "real"
|
||||
underlying f_inode, in order to use underlying inode aops for mapped
|
||||
files and to display the overlayfs path in /proc/<pid>/maps.
|
||||
|
||||
In preparation for storing the overlayfs "fake" path instead of the
|
||||
underlying "real" path in struct backing_file, define a noop helper
|
||||
file_user_path() that returns f_path for now.
|
||||
|
||||
Use the new helper in procfs and kernel logs whenever a path of a
|
||||
mapped file is displayed to users.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Link: https://lore.kernel.org/r/20231009153712.1566422-3-amir73il@gmail.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
|
||||
index 7654c2e42dc0..134c48374ecd 100644
|
||||
--- a/arch/arc/kernel/troubleshoot.c
|
||||
+++ b/arch/arc/kernel/troubleshoot.c
|
||||
@@ -90,10 +90,12 @@ static void show_faulting_vma(unsigned long address)
|
||||
*/
|
||||
if (vma) {
|
||||
char buf[ARC_PATH_MAX];
|
||||
- char *nm = "?";
|
||||
+ char *nm = "anon";
|
||||
|
||||
if (vma->vm_file) {
|
||||
- nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1);
|
||||
+ /* XXX: can we use %pD below and get rid of buf? */
|
||||
+ nm = d_path(file_user_path(vma->vm_file), buf,
|
||||
+ ARC_PATH_MAX-1);
|
||||
if (IS_ERR(nm))
|
||||
nm = "?";
|
||||
}
|
||||
diff --git a/fs/proc/base.c b/fs/proc/base.c
|
||||
index dbb251465954..cb79b5f1d459 100644
|
||||
--- a/fs/proc/base.c
|
||||
+++ b/fs/proc/base.c
|
||||
@@ -2190,7 +2190,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
|
||||
rc = -ENOENT;
|
||||
vma = find_exact_vma(mm, vm_start, vm_end);
|
||||
if (vma && vma->vm_file) {
|
||||
- *path = vma->vm_file->f_path;
|
||||
+ *path = *file_user_path(vma->vm_file);
|
||||
path_get(path);
|
||||
rc = 0;
|
||||
}
|
||||
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
|
||||
index 13452b32e2bd..b7e06be41224 100644
|
||||
--- a/fs/proc/nommu.c
|
||||
+++ b/fs/proc/nommu.c
|
||||
@@ -59,7 +59,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
|
||||
|
||||
if (file) {
|
||||
seq_pad(m, ' ');
|
||||
- seq_file_path(m, file, "");
|
||||
+ seq_path(m, file_user_path(file), "");
|
||||
}
|
||||
|
||||
seq_putc(m, '\n');
|
||||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
|
||||
index e396e52ca096..6180dc935136 100644
|
||||
--- a/fs/proc/task_mmu.c
|
||||
+++ b/fs/proc/task_mmu.c
|
||||
@@ -295,7 +295,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
|
||||
if (anon_name)
|
||||
seq_printf(m, "[anon_shmem:%s]", anon_name->name);
|
||||
else
|
||||
- seq_file_path(m, file, "\n");
|
||||
+ seq_path(m, file_user_path(file), "\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -1952,7 +1952,7 @@ static int show_numa_map(struct seq_file *m, void *v)
|
||||
|
||||
if (file) {
|
||||
seq_puts(m, " file=");
|
||||
- seq_file_path(m, file, "\n\t= ");
|
||||
+ seq_path(m, file_user_path(file), "\n\t= ");
|
||||
} else if (vma_is_initial_heap(vma)) {
|
||||
seq_puts(m, " heap");
|
||||
} else if (vma_is_initial_stack(vma)) {
|
||||
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
|
||||
index 4d52623e1bff..a3822c149f12 100644
|
||||
--- a/fs/proc/task_nommu.c
|
||||
+++ b/fs/proc/task_nommu.c
|
||||
@@ -162,7 +162,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
|
||||
|
||||
if (file) {
|
||||
seq_pad(m, ' ');
|
||||
- seq_file_path(m, file, "");
|
||||
+ seq_path(m, file_user_path(file), "");
|
||||
} else if (mm && vma_is_initial_stack(vma)) {
|
||||
seq_pad(m, ' ');
|
||||
seq_puts(m, "[stack]");
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 48ec31b9d230..4ee6ed9e2634 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2607,6 +2607,20 @@ static inline const struct path *file_real_path(struct file *f)
|
||||
return &f->f_path;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * file_user_path - get the path to display for memory mapped file
|
||||
+ *
|
||||
+ * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
|
||||
+ * stored in ->vm_file is a backing file whose f_inode is on the underlying
|
||||
+ * filesystem. When the mapped file path is displayed to user (e.g. via
|
||||
+ * /proc/<pid>/maps), this helper should be used to get the path to display
|
||||
+ * to the user, which is the path of the fd that user has requested to map.
|
||||
+ */
|
||||
+static inline const struct path *file_user_path(struct file *f)
|
||||
+{
|
||||
+ return &f->f_path;
|
||||
+}
|
||||
+
|
||||
static inline struct file *file_clone_open(struct file *file)
|
||||
{
|
||||
return dentry_open(&file->f_path, file->f_flags, file->f_cred);
|
||||
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
|
||||
index 0fda3619c425..6d89bc793c96 100644
|
||||
--- a/kernel/trace/trace_output.c
|
||||
+++ b/kernel/trace/trace_output.c
|
||||
@@ -405,7 +405,7 @@ static int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
|
||||
vmstart = vma->vm_start;
|
||||
}
|
||||
if (file) {
|
||||
- ret = trace_seq_path(s, &file->f_path);
|
||||
+ ret = trace_seq_path(s, file_user_path(file));
|
||||
if (ret)
|
||||
trace_seq_printf(s, "[+0x%lx]",
|
||||
ip - vmstart);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,240 @@
|
||||
From da5c3bc8093871a2c6c90187b645cc29cc0230ef Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:37:33 +0200
|
||||
Subject: [PATCH] fs: store real path instead of fake path in backing file
|
||||
f_path
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/internal.h: context fuzz
|
||||
|
||||
commit def3ae83da02f87005210fa3d448c5dd37ba4105
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Mon Oct 9 18:37:12 2023 +0300
|
||||
|
||||
fs: store real path instead of fake path in backing file f_path
|
||||
|
||||
A backing file struct stores two path's, one "real" path that is referring
|
||||
to f_inode and one "fake" path, which should be displayed to users in
|
||||
/proc/<pid>/maps.
|
||||
|
||||
There is a lot more potential code that needs to know the "real" path, then
|
||||
code that needs to know the "fake" path.
|
||||
|
||||
Instead of code having to request the "real" path with file_real_path(),
|
||||
store the "real" path in f_path and require code that needs to know the
|
||||
"fake" path request it with file_user_path().
|
||||
Replace the file_real_path() helper with a simple const accessor f_path().
|
||||
|
||||
After this change, file_dentry() is not expected to observe any files
|
||||
with overlayfs f_path and real f_inode, so the call to ->d_real() should
|
||||
not be needed. Leave the ->d_real() call for now and add an assertion
|
||||
in ovl_d_real() to catch if we made wrong assumptions.
|
||||
|
||||
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
|
||||
Link: https://lore.kernel.org/r/CAJfpegtt48eXhhjDFA1ojcHPNKj3Go6joryCPtEFAKpocyBsnw@mail.gmail.com/
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Link: https://lore.kernel.org/r/20231009153712.1566422-4-amir73il@gmail.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index b0a8c2608530..e5c7b9705109 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -44,10 +44,10 @@ static struct kmem_cache *filp_cachep __read_mostly;
|
||||
|
||||
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
|
||||
|
||||
-/* Container for backing file with optional real path */
|
||||
+/* Container for backing file with optional user path */
|
||||
struct backing_file {
|
||||
struct file file;
|
||||
- struct path real_path;
|
||||
+ struct path user_path;
|
||||
};
|
||||
|
||||
static inline struct backing_file *backing_file(struct file *f)
|
||||
@@ -55,11 +55,11 @@ static inline struct backing_file *backing_file(struct file *f)
|
||||
return container_of(f, struct backing_file, file);
|
||||
}
|
||||
|
||||
-struct path *backing_file_real_path(struct file *f)
|
||||
+struct path *backing_file_user_path(struct file *f)
|
||||
{
|
||||
- return &backing_file(f)->real_path;
|
||||
+ return &backing_file(f)->user_path;
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(backing_file_real_path);
|
||||
+EXPORT_SYMBOL_GPL(backing_file_user_path);
|
||||
|
||||
static void file_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
@@ -76,7 +76,7 @@ static inline void file_free(struct file *f)
|
||||
{
|
||||
security_file_free(f);
|
||||
if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
- path_put(backing_file_real_path(f));
|
||||
+ path_put(backing_file_user_path(f));
|
||||
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
|
||||
percpu_counter_dec(&nr_files);
|
||||
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index 29369382249d..bd0934d0521b 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -102,7 +102,7 @@ static inline void file_put_write_access(struct file *file)
|
||||
put_write_access(file->f_inode);
|
||||
mnt_put_write_access(file->f_path.mnt);
|
||||
if (unlikely(file->f_mode & FMODE_BACKING))
|
||||
- mnt_put_write_access(backing_file_real_path(file)->mnt);
|
||||
+ mnt_put_write_access(backing_file_user_path(file)->mnt);
|
||||
}
|
||||
|
||||
static inline void put_file_access(struct file *file)
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index 64e4bbd1f28c..45547548a0e5 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -853,7 +853,7 @@ static inline int file_get_write_access(struct file *f)
|
||||
if (unlikely(error))
|
||||
goto cleanup_inode;
|
||||
if (unlikely(f->f_mode & FMODE_BACKING)) {
|
||||
- error = mnt_get_write_access(backing_file_real_path(f)->mnt);
|
||||
+ error = mnt_get_write_access(backing_file_user_path(f)->mnt);
|
||||
if (unlikely(error))
|
||||
goto cleanup_mnt;
|
||||
}
|
||||
@@ -1143,20 +1143,19 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
|
||||
|
||||
/**
|
||||
* backing_file_open - open a backing file for kernel internal use
|
||||
- * @path: path of the file to open
|
||||
+ * @user_path: path that the user reuqested to open
|
||||
* @flags: open flags
|
||||
* @real_path: path of the backing file
|
||||
* @cred: credentials for open
|
||||
*
|
||||
* Open a backing file for a stackable filesystem (e.g., overlayfs).
|
||||
- * @path may be on the stackable filesystem and backing inode on the
|
||||
- * underlying filesystem. In this case, we want to be able to return
|
||||
- * the @real_path of the backing inode. This is done by embedding the
|
||||
- * returned file into a container structure that also stores the path of
|
||||
- * the backing inode on the underlying filesystem, which can be
|
||||
- * retrieved using backing_file_real_path().
|
||||
+ * @user_path may be on the stackable filesystem and @real_path on the
|
||||
+ * underlying filesystem. In this case, we want to be able to return the
|
||||
+ * @user_path of the stackable filesystem. This is done by embedding the
|
||||
+ * returned file into a container structure that also stores the stacked
|
||||
+ * file's path, which can be retrieved using backing_file_user_path().
|
||||
*/
|
||||
-struct file *backing_file_open(const struct path *path, int flags,
|
||||
+struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
const struct path *real_path,
|
||||
const struct cred *cred)
|
||||
{
|
||||
@@ -1167,9 +1166,9 @@ struct file *backing_file_open(const struct path *path, int flags,
|
||||
if (IS_ERR(f))
|
||||
return f;
|
||||
|
||||
- f->f_path = *path;
|
||||
- path_get(real_path);
|
||||
- *backing_file_real_path(f) = *real_path;
|
||||
+ path_get(user_path);
|
||||
+ *backing_file_user_path(f) = *user_path;
|
||||
+ f->f_path = *real_path;
|
||||
error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
|
||||
if (error) {
|
||||
fput(f);
|
||||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
|
||||
index c49b1e7575d3..0779c8290ec4 100644
|
||||
--- a/fs/overlayfs/super.c
|
||||
+++ b/fs/overlayfs/super.c
|
||||
@@ -34,14 +34,22 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
|
||||
struct dentry *real = NULL, *lower;
|
||||
int err;
|
||||
|
||||
- /* It's an overlay file */
|
||||
+ /*
|
||||
+ * vfs is only expected to call d_real() with NULL from d_real_inode()
|
||||
+ * and with overlay inode from file_dentry() on an overlay file.
|
||||
+ *
|
||||
+ * TODO: remove @inode argument from d_real() API, remove code in this
|
||||
+ * function that deals with non-NULL @inode and remove d_real() call
|
||||
+ * from file_dentry().
|
||||
+ */
|
||||
if (inode && d_inode(dentry) == inode)
|
||||
return dentry;
|
||||
+ else if (inode)
|
||||
+ goto bug;
|
||||
|
||||
if (!d_is_reg(dentry)) {
|
||||
- if (!inode || inode == d_inode(dentry))
|
||||
- return dentry;
|
||||
- goto bug;
|
||||
+ /* d_real_inode() is only relevant for regular files */
|
||||
+ return dentry;
|
||||
}
|
||||
|
||||
real = ovl_dentry_upper(dentry);
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 4ee6ed9e2634..1927fcdad989 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2586,26 +2586,10 @@ struct file *dentry_open(const struct path *path, int flags,
|
||||
const struct cred *creds);
|
||||
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
const struct cred *cred);
|
||||
-struct file *backing_file_open(const struct path *path, int flags,
|
||||
+struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
const struct path *real_path,
|
||||
const struct cred *cred);
|
||||
-struct path *backing_file_real_path(struct file *f);
|
||||
-
|
||||
-/*
|
||||
- * file_real_path - get the path corresponding to f_inode
|
||||
- *
|
||||
- * When opening a backing file for a stackable filesystem (e.g.,
|
||||
- * overlayfs) f_path may be on the stackable filesystem and f_inode on
|
||||
- * the underlying filesystem. When the path associated with f_inode is
|
||||
- * needed, this helper should be used instead of accessing f_path
|
||||
- * directly.
|
||||
-*/
|
||||
-static inline const struct path *file_real_path(struct file *f)
|
||||
-{
|
||||
- if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
- return backing_file_real_path(f);
|
||||
- return &f->f_path;
|
||||
-}
|
||||
+struct path *backing_file_user_path(struct file *f);
|
||||
|
||||
/*
|
||||
* file_user_path - get the path to display for memory mapped file
|
||||
@@ -2618,6 +2602,8 @@ static inline const struct path *file_real_path(struct file *f)
|
||||
*/
|
||||
static inline const struct path *file_user_path(struct file *f)
|
||||
{
|
||||
+ if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
+ return backing_file_user_path(f);
|
||||
return &f->f_path;
|
||||
}
|
||||
|
||||
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
|
||||
index ed48e4f1e755..bcb6609b54b3 100644
|
||||
--- a/include/linux/fsnotify.h
|
||||
+++ b/include/linux/fsnotify.h
|
||||
@@ -96,8 +96,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
|
||||
if (file->f_mode & FMODE_NONOTIFY)
|
||||
return 0;
|
||||
|
||||
- /* Overlayfs internal files have fake f_path */
|
||||
- path = file_real_path(file);
|
||||
+ path = &file->f_path;
|
||||
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,243 @@
|
||||
From ecbb1b105f3616d4342dba0f840dcf9265dfbea0 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:42:36 +0200
|
||||
Subject: [PATCH] fs: prepare for stackable filesystems backing file helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit f91a704f7161c2cf0fcd41fa9fbec4355b813fff
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Mon Oct 2 17:19:46 2023 +0300
|
||||
|
||||
fs: prepare for stackable filesystems backing file helpers
|
||||
|
||||
In preparation for factoring out some backing file io helpers from
|
||||
overlayfs, move backing_file_open() into a new file fs/backing-file.c
|
||||
and header.
|
||||
|
||||
Add a MAINTAINERS entry for stackable filesystems and add a Kconfig
|
||||
FS_STACK which stackable filesystems need to select.
|
||||
|
||||
For now, the backing_file struct, the backing_file alloc/free functions
|
||||
and the backing_file_real_path() accessor remain internal to file_table.c.
|
||||
We may change that in the future.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index f5dcab467670..3a29f2d3a2b1 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -7475,6 +7475,15 @@ F: fs/mnt_idmapping.c
|
||||
F: include/linux/mnt_idmapping.*
|
||||
F: tools/testing/selftests/mount_setattr/
|
||||
|
||||
+FILESYSTEMS [STACKABLE]
|
||||
+M: Miklos Szeredi <miklos@szeredi.hu>
|
||||
+M: Amir Goldstein <amir73il@gmail.com>
|
||||
+L: linux-fsdevel@vger.kernel.org
|
||||
+L: linux-unionfs@vger.kernel.org
|
||||
+S: Maintained
|
||||
+F: fs/backing-file.c
|
||||
+F: include/linux/backing-file.h
|
||||
+
|
||||
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
|
||||
M: Riku Voipio <riku.voipio@iki.fi>
|
||||
L: linux-hwmon@vger.kernel.org
|
||||
diff --git a/fs/Kconfig b/fs/Kconfig
|
||||
index 5378e55f87d3..a9c6fa9cff1f 100644
|
||||
--- a/fs/Kconfig
|
||||
+++ b/fs/Kconfig
|
||||
@@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER
|
||||
config FS_IOMAP
|
||||
bool
|
||||
|
||||
+# Stackable filesystems
|
||||
+config FS_STACK
|
||||
+ bool
|
||||
+
|
||||
config BUFFER_HEAD
|
||||
bool
|
||||
|
||||
diff --git a/fs/Makefile b/fs/Makefile
|
||||
index 0da17ff145c6..716c9fe04dec 100644
|
||||
--- a/fs/Makefile
|
||||
+++ b/fs/Makefile
|
||||
@@ -41,6 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
|
||||
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
|
||||
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
|
||||
|
||||
+obj-$(CONFIG_FS_STACK) += backing-file.o
|
||||
obj-$(CONFIG_FS_MBCACHE) += mbcache.o
|
||||
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
|
||||
obj-$(CONFIG_NFS_COMMON) += nfs_common/
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
new file mode 100644
|
||||
index 000000000000..04b33036f709
|
||||
--- /dev/null
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -0,0 +1,48 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+/*
|
||||
+ * Common helpers for stackable filesystems and backing files.
|
||||
+ *
|
||||
+ * Copyright (C) 2023 CTERA Networks.
|
||||
+ */
|
||||
+
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/backing-file.h>
|
||||
+
|
||||
+#include "internal.h"
|
||||
+
|
||||
+/**
|
||||
+ * backing_file_open - open a backing file for kernel internal use
|
||||
+ * @user_path: path that the user reuqested to open
|
||||
+ * @flags: open flags
|
||||
+ * @real_path: path of the backing file
|
||||
+ * @cred: credentials for open
|
||||
+ *
|
||||
+ * Open a backing file for a stackable filesystem (e.g., overlayfs).
|
||||
+ * @user_path may be on the stackable filesystem and @real_path on the
|
||||
+ * underlying filesystem. In this case, we want to be able to return the
|
||||
+ * @user_path of the stackable filesystem. This is done by embedding the
|
||||
+ * returned file into a container structure that also stores the stacked
|
||||
+ * file's path, which can be retrieved using backing_file_user_path().
|
||||
+ */
|
||||
+struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
+ const struct path *real_path,
|
||||
+ const struct cred *cred)
|
||||
+{
|
||||
+ struct file *f;
|
||||
+ int error;
|
||||
+
|
||||
+ f = alloc_empty_backing_file(flags, cred);
|
||||
+ if (IS_ERR(f))
|
||||
+ return f;
|
||||
+
|
||||
+ path_get(user_path);
|
||||
+ *backing_file_user_path(f) = *user_path;
|
||||
+ error = vfs_open(real_path, f);
|
||||
+ if (error) {
|
||||
+ fput(f);
|
||||
+ f = ERR_PTR(error);
|
||||
+ }
|
||||
+
|
||||
+ return f;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_open);
|
||||
diff --git a/fs/open.c b/fs/open.c
|
||||
index 45547548a0e5..4260d61560d4 100644
|
||||
--- a/fs/open.c
|
||||
+++ b/fs/open.c
|
||||
@@ -1141,44 +1141,6 @@ struct file *kernel_file_open(const struct path *path, int flags,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_file_open);
|
||||
|
||||
-/**
|
||||
- * backing_file_open - open a backing file for kernel internal use
|
||||
- * @user_path: path that the user reuqested to open
|
||||
- * @flags: open flags
|
||||
- * @real_path: path of the backing file
|
||||
- * @cred: credentials for open
|
||||
- *
|
||||
- * Open a backing file for a stackable filesystem (e.g., overlayfs).
|
||||
- * @user_path may be on the stackable filesystem and @real_path on the
|
||||
- * underlying filesystem. In this case, we want to be able to return the
|
||||
- * @user_path of the stackable filesystem. This is done by embedding the
|
||||
- * returned file into a container structure that also stores the stacked
|
||||
- * file's path, which can be retrieved using backing_file_user_path().
|
||||
- */
|
||||
-struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
- const struct path *real_path,
|
||||
- const struct cred *cred)
|
||||
-{
|
||||
- struct file *f;
|
||||
- int error;
|
||||
-
|
||||
- f = alloc_empty_backing_file(flags, cred);
|
||||
- if (IS_ERR(f))
|
||||
- return f;
|
||||
-
|
||||
- path_get(user_path);
|
||||
- *backing_file_user_path(f) = *user_path;
|
||||
- f->f_path = *real_path;
|
||||
- error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
|
||||
- if (error) {
|
||||
- fput(f);
|
||||
- f = ERR_PTR(error);
|
||||
- }
|
||||
-
|
||||
- return f;
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(backing_file_open);
|
||||
-
|
||||
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
|
||||
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
|
||||
|
||||
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
|
||||
index 6708e54b0e30..148d9567b5c3 100644
|
||||
--- a/fs/overlayfs/Kconfig
|
||||
+++ b/fs/overlayfs/Kconfig
|
||||
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config OVERLAY_FS
|
||||
tristate "Overlay filesystem support"
|
||||
+ select FS_STACK
|
||||
select EXPORTFS
|
||||
help
|
||||
An overlay filesystem combines two filesystems - an 'upper' filesystem
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index cd0770bb3020..634a96a65bfd 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
+#include <linux/backing-file.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#include "../internal.h" /* for sb_init_dio_done_wq */
|
||||
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
|
||||
new file mode 100644
|
||||
index 000000000000..55c9e804f780
|
||||
--- /dev/null
|
||||
+++ b/include/linux/backing-file.h
|
||||
@@ -0,0 +1,17 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
+/*
|
||||
+ * Common helpers for stackable filesystems and backing files.
|
||||
+ *
|
||||
+ * Copyright (C) 2023 CTERA Networks.
|
||||
+ */
|
||||
+
|
||||
+#ifndef _LINUX_BACKING_FILE_H
|
||||
+#define _LINUX_BACKING_FILE_H
|
||||
+
|
||||
+#include <linux/file.h>
|
||||
+
|
||||
+struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
+ const struct path *real_path,
|
||||
+ const struct cred *cred);
|
||||
+
|
||||
+#endif /* _LINUX_BACKING_FILE_H */
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 1927fcdad989..5f3ca25c77e5 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2586,9 +2586,6 @@ struct file *dentry_open(const struct path *path, int flags,
|
||||
const struct cred *creds);
|
||||
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
const struct cred *cred);
|
||||
-struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
- const struct path *real_path,
|
||||
- const struct cred *cred);
|
||||
struct path *backing_file_user_path(struct file *f);
|
||||
|
||||
/*
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,600 @@
|
||||
From 37577a7bb4d8c217f23a2b47e8fbe66640e0588d Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:51:36 +0200
|
||||
Subject: [PATCH] fs: factor out backing_file_{read,write}_iter() helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/overlayfs/file.c & fs/backing-file.c: carried over downstream
|
||||
logic of calling *_start_write()/*_end_write() because of not
|
||||
backported commits: 269aed7014b3 ("fs: move file_start_write() into
|
||||
vfs_iter_write()") and 6ae654392bb5 ("fs: move kiocb_start_write()
|
||||
into vfs_iocb_iter_write()") - those commits were skipped in this
|
||||
MR, because vfs_iter_write() appears in K-A-B-I stable lists and
|
||||
changing the convention could break partner modules
|
||||
|
||||
commit a6293b3e285cd0d7692141d7981a5f144f0e2f0b
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Wed Nov 22 17:48:52 2023 +0200
|
||||
|
||||
fs: factor out backing_file_{read,write}_iter() helpers
|
||||
|
||||
Overlayfs submits files io to backing files on other filesystems.
|
||||
Factor out some common helpers to perform io to backing files, into
|
||||
fs/backing-file.c.
|
||||
|
||||
Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
|
||||
Link: https://lore.kernel.org/r/CAJfpeguhmZbjP3JLqtUy0AdWaHOkAPWeP827BBWwRFEAUgnUcQ@mail.gmail.com
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
index 04b33036f709..6d915a45e288 100644
|
||||
--- a/fs/backing-file.c
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -2,6 +2,9 @@
|
||||
/*
|
||||
* Common helpers for stackable filesystems and backing files.
|
||||
*
|
||||
+ * Forked from fs/overlayfs/file.c.
|
||||
+ *
|
||||
+ * Copyright (C) 2017 Red Hat, Inc.
|
||||
* Copyright (C) 2023 CTERA Networks.
|
||||
*/
|
||||
|
||||
@@ -46,3 +49,213 @@ struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
return f;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(backing_file_open);
|
||||
+
|
||||
+struct backing_aio {
|
||||
+ struct kiocb iocb;
|
||||
+ refcount_t ref;
|
||||
+ struct kiocb *orig_iocb;
|
||||
+ /* used for aio completion */
|
||||
+ void (*end_write)(struct file *);
|
||||
+ struct work_struct work;
|
||||
+ long res;
|
||||
+};
|
||||
+
|
||||
+static struct kmem_cache *backing_aio_cachep;
|
||||
+
|
||||
+#define BACKING_IOCB_MASK \
|
||||
+ (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
|
||||
+
|
||||
+static rwf_t iocb_to_rw_flags(int flags)
|
||||
+{
|
||||
+ return (__force rwf_t)(flags & BACKING_IOCB_MASK);
|
||||
+}
|
||||
+
|
||||
+static void backing_aio_put(struct backing_aio *aio)
|
||||
+{
|
||||
+ if (refcount_dec_and_test(&aio->ref)) {
|
||||
+ fput(aio->iocb.ki_filp);
|
||||
+ kmem_cache_free(backing_aio_cachep, aio);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void backing_aio_cleanup(struct backing_aio *aio, long res)
|
||||
+{
|
||||
+ struct kiocb *iocb = &aio->iocb;
|
||||
+ struct kiocb *orig_iocb = aio->orig_iocb;
|
||||
+
|
||||
+ if (iocb->ki_flags & IOCB_WRITE)
|
||||
+ kiocb_end_write(iocb);
|
||||
+
|
||||
+ if (aio->end_write)
|
||||
+ aio->end_write(orig_iocb->ki_filp);
|
||||
+
|
||||
+ orig_iocb->ki_pos = iocb->ki_pos;
|
||||
+ backing_aio_put(aio);
|
||||
+}
|
||||
+
|
||||
+static void backing_aio_rw_complete(struct kiocb *iocb, long res)
|
||||
+{
|
||||
+ struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
|
||||
+ struct kiocb *orig_iocb = aio->orig_iocb;
|
||||
+
|
||||
+ backing_aio_cleanup(aio, res);
|
||||
+ orig_iocb->ki_complete(orig_iocb, res);
|
||||
+}
|
||||
+
|
||||
+static void backing_aio_complete_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct backing_aio *aio = container_of(work, struct backing_aio, work);
|
||||
+
|
||||
+ backing_aio_rw_complete(&aio->iocb, aio->res);
|
||||
+}
|
||||
+
|
||||
+static void backing_aio_queue_completion(struct kiocb *iocb, long res)
|
||||
+{
|
||||
+ struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
|
||||
+
|
||||
+ /*
|
||||
+ * Punt to a work queue to serialize updates of mtime/size.
|
||||
+ */
|
||||
+ aio->res = res;
|
||||
+ INIT_WORK(&aio->work, backing_aio_complete_work);
|
||||
+ queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
|
||||
+ &aio->work);
|
||||
+}
|
||||
+
|
||||
+static int backing_aio_init_wq(struct kiocb *iocb)
|
||||
+{
|
||||
+ struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
|
||||
+
|
||||
+ if (sb->s_dio_done_wq)
|
||||
+ return 0;
|
||||
+
|
||||
+ return sb_init_dio_done_wq(sb);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
+ struct kiocb *iocb, int flags,
|
||||
+ struct backing_file_ctx *ctx)
|
||||
+{
|
||||
+ struct backing_aio *aio = NULL;
|
||||
+ const struct cred *old_cred;
|
||||
+ ssize_t ret;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ if (!iov_iter_count(iter))
|
||||
+ return 0;
|
||||
+
|
||||
+ if (iocb->ki_flags & IOCB_DIRECT &&
|
||||
+ !(file->f_mode & FMODE_CAN_ODIRECT))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ old_cred = override_creds(ctx->cred);
|
||||
+ if (is_sync_kiocb(iocb)) {
|
||||
+ rwf_t rwf = iocb_to_rw_flags(flags);
|
||||
+
|
||||
+ ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
|
||||
+ } else {
|
||||
+ ret = -ENOMEM;
|
||||
+ aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
|
||||
+ if (!aio)
|
||||
+ goto out;
|
||||
+
|
||||
+ aio->orig_iocb = iocb;
|
||||
+ kiocb_clone(&aio->iocb, iocb, get_file(file));
|
||||
+ aio->iocb.ki_complete = backing_aio_rw_complete;
|
||||
+ refcount_set(&aio->ref, 2);
|
||||
+ ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
|
||||
+ backing_aio_put(aio);
|
||||
+ if (ret != -EIOCBQUEUED)
|
||||
+ backing_aio_cleanup(aio, ret);
|
||||
+ }
|
||||
+out:
|
||||
+ revert_creds(old_cred);
|
||||
+
|
||||
+ if (ctx->accessed)
|
||||
+ ctx->accessed(ctx->user_file);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_read_iter);
|
||||
+
|
||||
+ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
+ struct kiocb *iocb, int flags,
|
||||
+ struct backing_file_ctx *ctx)
|
||||
+{
|
||||
+ const struct cred *old_cred;
|
||||
+ ssize_t ret;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ if (!iov_iter_count(iter))
|
||||
+ return 0;
|
||||
+
|
||||
+ ret = file_remove_privs(ctx->user_file);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (iocb->ki_flags & IOCB_DIRECT &&
|
||||
+ !(file->f_mode & FMODE_CAN_ODIRECT))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ /*
|
||||
+ * Stacked filesystems don't support deferred completions, don't copy
|
||||
+ * this property in case it is set by the issuer.
|
||||
+ */
|
||||
+ flags &= ~IOCB_DIO_CALLER_COMP;
|
||||
+
|
||||
+ old_cred = override_creds(ctx->cred);
|
||||
+ if (is_sync_kiocb(iocb)) {
|
||||
+ rwf_t rwf = iocb_to_rw_flags(flags);
|
||||
+
|
||||
+ file_start_write(file);
|
||||
+ ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
|
||||
+ file_end_write(file);
|
||||
+ if (ctx->end_write)
|
||||
+ ctx->end_write(ctx->user_file);
|
||||
+ } else {
|
||||
+ struct backing_aio *aio;
|
||||
+
|
||||
+ ret = backing_aio_init_wq(iocb);
|
||||
+ if (ret)
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = -ENOMEM;
|
||||
+ aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
|
||||
+ if (!aio)
|
||||
+ goto out;
|
||||
+
|
||||
+ aio->orig_iocb = iocb;
|
||||
+ aio->end_write = ctx->end_write;
|
||||
+ kiocb_clone(&aio->iocb, iocb, get_file(file));
|
||||
+ aio->iocb.ki_flags = flags;
|
||||
+ aio->iocb.ki_complete = backing_aio_queue_completion;
|
||||
+ refcount_set(&aio->ref, 2);
|
||||
+ kiocb_start_write(&aio->iocb);
|
||||
+ ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
|
||||
+ backing_aio_put(aio);
|
||||
+ if (ret != -EIOCBQUEUED)
|
||||
+ backing_aio_cleanup(aio, ret);
|
||||
+ }
|
||||
+out:
|
||||
+ revert_creds(old_cred);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_write_iter);
|
||||
+
|
||||
+static int __init backing_aio_init(void)
|
||||
+{
|
||||
+ backing_aio_cachep = kmem_cache_create("backing_aio",
|
||||
+ sizeof(struct backing_aio),
|
||||
+ 0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
+ if (!backing_aio_cachep)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+fs_initcall(backing_aio_init);
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 634a96a65bfd..3eee9f45971e 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -16,19 +16,6 @@
|
||||
#include <linux/backing-file.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
-#include "../internal.h" /* for sb_init_dio_done_wq */
|
||||
-
|
||||
-struct ovl_aio_req {
|
||||
- struct kiocb iocb;
|
||||
- refcount_t ref;
|
||||
- struct kiocb *orig_iocb;
|
||||
- /* used for aio completion */
|
||||
- struct work_struct work;
|
||||
- long res;
|
||||
-};
|
||||
-
|
||||
-static struct kmem_cache *ovl_aio_request_cachep;
|
||||
-
|
||||
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
|
||||
{
|
||||
if (realinode != ovl_inode_upper(inode))
|
||||
@@ -271,83 +258,16 @@ static void ovl_file_accessed(struct file *file)
|
||||
touch_atime(&file->f_path);
|
||||
}
|
||||
|
||||
-#define OVL_IOCB_MASK \
|
||||
- (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
|
||||
-
|
||||
-static rwf_t iocb_to_rw_flags(int flags)
|
||||
-{
|
||||
- return (__force rwf_t)(flags & OVL_IOCB_MASK);
|
||||
-}
|
||||
-
|
||||
-static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
|
||||
-{
|
||||
- if (refcount_dec_and_test(&aio_req->ref)) {
|
||||
- fput(aio_req->iocb.ki_filp);
|
||||
- kmem_cache_free(ovl_aio_request_cachep, aio_req);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
|
||||
-{
|
||||
- struct kiocb *iocb = &aio_req->iocb;
|
||||
- struct kiocb *orig_iocb = aio_req->orig_iocb;
|
||||
-
|
||||
- if (iocb->ki_flags & IOCB_WRITE) {
|
||||
- kiocb_end_write(iocb);
|
||||
- ovl_file_modified(orig_iocb->ki_filp);
|
||||
- }
|
||||
-
|
||||
- orig_iocb->ki_pos = iocb->ki_pos;
|
||||
- ovl_aio_put(aio_req);
|
||||
-}
|
||||
-
|
||||
-static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
|
||||
-{
|
||||
- struct ovl_aio_req *aio_req = container_of(iocb,
|
||||
- struct ovl_aio_req, iocb);
|
||||
- struct kiocb *orig_iocb = aio_req->orig_iocb;
|
||||
-
|
||||
- ovl_aio_cleanup_handler(aio_req);
|
||||
- orig_iocb->ki_complete(orig_iocb, res);
|
||||
-}
|
||||
-
|
||||
-static void ovl_aio_complete_work(struct work_struct *work)
|
||||
-{
|
||||
- struct ovl_aio_req *aio_req = container_of(work,
|
||||
- struct ovl_aio_req, work);
|
||||
-
|
||||
- ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
|
||||
-}
|
||||
-
|
||||
-static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
|
||||
-{
|
||||
- struct ovl_aio_req *aio_req = container_of(iocb,
|
||||
- struct ovl_aio_req, iocb);
|
||||
- struct kiocb *orig_iocb = aio_req->orig_iocb;
|
||||
-
|
||||
- /*
|
||||
- * Punt to a work queue to serialize updates of mtime/size.
|
||||
- */
|
||||
- aio_req->res = res;
|
||||
- INIT_WORK(&aio_req->work, ovl_aio_complete_work);
|
||||
- queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
|
||||
- &aio_req->work);
|
||||
-}
|
||||
-
|
||||
-static int ovl_init_aio_done_wq(struct super_block *sb)
|
||||
-{
|
||||
- if (sb->s_dio_done_wq)
|
||||
- return 0;
|
||||
-
|
||||
- return sb_init_dio_done_wq(sb);
|
||||
-}
|
||||
-
|
||||
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct fd real;
|
||||
- const struct cred *old_cred;
|
||||
ssize_t ret;
|
||||
+ struct backing_file_ctx ctx = {
|
||||
+ .cred = ovl_creds(file_inode(file)->i_sb),
|
||||
+ .user_file = file,
|
||||
+ .accessed = ovl_file_accessed,
|
||||
+ };
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
@@ -356,37 +276,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- ret = -EINVAL;
|
||||
- if (iocb->ki_flags & IOCB_DIRECT &&
|
||||
- !(real.file->f_mode & FMODE_CAN_ODIRECT))
|
||||
- goto out_fdput;
|
||||
-
|
||||
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
- if (is_sync_kiocb(iocb)) {
|
||||
- rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
|
||||
-
|
||||
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
|
||||
- } else {
|
||||
- struct ovl_aio_req *aio_req;
|
||||
-
|
||||
- ret = -ENOMEM;
|
||||
- aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
|
||||
- if (!aio_req)
|
||||
- goto out;
|
||||
-
|
||||
- aio_req->orig_iocb = iocb;
|
||||
- kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
|
||||
- aio_req->iocb.ki_complete = ovl_aio_rw_complete;
|
||||
- refcount_set(&aio_req->ref, 2);
|
||||
- ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
|
||||
- ovl_aio_put(aio_req);
|
||||
- if (ret != -EIOCBQUEUED)
|
||||
- ovl_aio_cleanup_handler(aio_req);
|
||||
- }
|
||||
-out:
|
||||
- revert_creds(old_cred);
|
||||
- ovl_file_accessed(file);
|
||||
-out_fdput:
|
||||
+ ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
|
||||
+ &ctx);
|
||||
fdput(real);
|
||||
|
||||
return ret;
|
||||
@@ -397,9 +288,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct fd real;
|
||||
- const struct cred *old_cred;
|
||||
ssize_t ret;
|
||||
int ifl = iocb->ki_flags;
|
||||
+ struct backing_file_ctx ctx = {
|
||||
+ .cred = ovl_creds(inode->i_sb),
|
||||
+ .user_file = file,
|
||||
+ .end_write = ovl_file_modified,
|
||||
+ };
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
@@ -407,19 +302,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
inode_lock(inode);
|
||||
/* Update mode */
|
||||
ovl_copyattr(inode);
|
||||
- ret = file_remove_privs(file);
|
||||
- if (ret)
|
||||
- goto out_unlock;
|
||||
|
||||
ret = ovl_real_fdget(file, &real);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
- ret = -EINVAL;
|
||||
- if (iocb->ki_flags & IOCB_DIRECT &&
|
||||
- !(real.file->f_mode & FMODE_CAN_ODIRECT))
|
||||
- goto out_fdput;
|
||||
-
|
||||
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
||||
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
||||
|
||||
@@ -428,42 +315,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
* this property in case it is set by the issuer.
|
||||
*/
|
||||
ifl &= ~IOCB_DIO_CALLER_COMP;
|
||||
-
|
||||
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
- if (is_sync_kiocb(iocb)) {
|
||||
- rwf_t rwf = iocb_to_rw_flags(ifl);
|
||||
-
|
||||
- file_start_write(real.file);
|
||||
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
|
||||
- file_end_write(real.file);
|
||||
- /* Update size */
|
||||
- ovl_file_modified(file);
|
||||
- } else {
|
||||
- struct ovl_aio_req *aio_req;
|
||||
-
|
||||
- ret = ovl_init_aio_done_wq(inode->i_sb);
|
||||
- if (ret)
|
||||
- goto out;
|
||||
-
|
||||
- ret = -ENOMEM;
|
||||
- aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
|
||||
- if (!aio_req)
|
||||
- goto out;
|
||||
-
|
||||
- aio_req->orig_iocb = iocb;
|
||||
- kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
|
||||
- aio_req->iocb.ki_flags = ifl;
|
||||
- aio_req->iocb.ki_complete = ovl_aio_queue_completion;
|
||||
- refcount_set(&aio_req->ref, 2);
|
||||
- kiocb_start_write(&aio_req->iocb);
|
||||
- ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
|
||||
- ovl_aio_put(aio_req);
|
||||
- if (ret != -EIOCBQUEUED)
|
||||
- ovl_aio_cleanup_handler(aio_req);
|
||||
- }
|
||||
-out:
|
||||
- revert_creds(old_cred);
|
||||
-out_fdput:
|
||||
+ ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
|
||||
fdput(real);
|
||||
|
||||
out_unlock:
|
||||
@@ -775,19 +627,3 @@ const struct file_operations ovl_file_operations = {
|
||||
.copy_file_range = ovl_copy_file_range,
|
||||
.remap_file_range = ovl_remap_file_range,
|
||||
};
|
||||
-
|
||||
-int __init ovl_aio_request_cache_init(void)
|
||||
-{
|
||||
- ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
|
||||
- sizeof(struct ovl_aio_req),
|
||||
- 0, SLAB_HWCACHE_ALIGN, NULL);
|
||||
- if (!ovl_aio_request_cachep)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-void ovl_aio_request_cache_destroy(void)
|
||||
-{
|
||||
- kmem_cache_destroy(ovl_aio_request_cachep);
|
||||
-}
|
||||
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
|
||||
index a4b94a74b854..8b31bc3ee7a0 100644
|
||||
--- a/fs/overlayfs/overlayfs.h
|
||||
+++ b/fs/overlayfs/overlayfs.h
|
||||
@@ -417,6 +417,12 @@ int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
const struct cred *ovl_override_creds(struct super_block *sb);
|
||||
+
|
||||
+static inline const struct cred *ovl_creds(struct super_block *sb)
|
||||
+{
|
||||
+ return OVL_FS(sb)->creator_cred;
|
||||
+}
|
||||
+
|
||||
int ovl_can_decode_fh(struct super_block *sb);
|
||||
struct dentry *ovl_indexdir(struct super_block *sb);
|
||||
bool ovl_index_all(struct super_block *sb);
|
||||
@@ -835,8 +841,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
|
||||
|
||||
/* file.c */
|
||||
extern const struct file_operations ovl_file_operations;
|
||||
-int __init ovl_aio_request_cache_init(void);
|
||||
-void ovl_aio_request_cache_destroy(void);
|
||||
int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
|
||||
int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
|
||||
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);
|
||||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
|
||||
index 0779c8290ec4..37387fd98e34 100644
|
||||
--- a/fs/overlayfs/super.c
|
||||
+++ b/fs/overlayfs/super.c
|
||||
@@ -1522,14 +1522,10 @@ static int __init ovl_init(void)
|
||||
if (ovl_inode_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
- err = ovl_aio_request_cache_init();
|
||||
- if (!err) {
|
||||
- err = register_filesystem(&ovl_fs_type);
|
||||
- if (!err)
|
||||
- return 0;
|
||||
+ err = register_filesystem(&ovl_fs_type);
|
||||
+ if (!err)
|
||||
+ return 0;
|
||||
|
||||
- ovl_aio_request_cache_destroy();
|
||||
- }
|
||||
kmem_cache_destroy(ovl_inode_cachep);
|
||||
|
||||
return err;
|
||||
@@ -1545,7 +1541,6 @@ static void __exit ovl_exit(void)
|
||||
*/
|
||||
rcu_barrier();
|
||||
kmem_cache_destroy(ovl_inode_cachep);
|
||||
- ovl_aio_request_cache_destroy();
|
||||
}
|
||||
|
||||
module_init(ovl_init);
|
||||
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
|
||||
index 55c9e804f780..0648d548a418 100644
|
||||
--- a/include/linux/backing-file.h
|
||||
+++ b/include/linux/backing-file.h
|
||||
@@ -9,9 +9,24 @@
|
||||
#define _LINUX_BACKING_FILE_H
|
||||
|
||||
#include <linux/file.h>
|
||||
+#include <linux/uio.h>
|
||||
+#include <linux/fs.h>
|
||||
+
|
||||
+struct backing_file_ctx {
|
||||
+ const struct cred *cred;
|
||||
+ struct file *user_file;
|
||||
+ void (*accessed)(struct file *);
|
||||
+ void (*end_write)(struct file *);
|
||||
+};
|
||||
|
||||
struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
const struct path *real_path,
|
||||
const struct cred *cred);
|
||||
+ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
+ struct kiocb *iocb, int flags,
|
||||
+ struct backing_file_ctx *ctx);
|
||||
+ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
+ struct kiocb *iocb, int flags,
|
||||
+ struct backing_file_ctx *ctx);
|
||||
|
||||
#endif /* _LINUX_BACKING_FILE_H */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,189 @@
|
||||
From 5c28f6ec073077ce1239652c7a74555904eb0577 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:51:41 +0200
|
||||
Subject: [PATCH] fs: factor out backing_file_splice_{read,write}() helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 9b7e9e2f5d5c3d079ec46bc71b114012e362ea6e
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Fri Oct 13 12:13:12 2023 +0300
|
||||
|
||||
fs: factor out backing_file_splice_{read,write}() helpers
|
||||
|
||||
There is not much in those helpers, but it makes sense to have them
|
||||
logically next to the backing_file_{read,write}_iter() helpers as they
|
||||
may grow more common logic in the future.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
index 6d915a45e288..5cc411566ce0 100644
|
||||
--- a/fs/backing-file.c
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/backing-file.h>
|
||||
+#include <linux/splice.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -248,6 +249,56 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(backing_file_write_iter);
|
||||
|
||||
+ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
|
||||
+ struct pipe_inode_info *pipe, size_t len,
|
||||
+ unsigned int flags,
|
||||
+ struct backing_file_ctx *ctx)
|
||||
+{
|
||||
+ const struct cred *old_cred;
|
||||
+ ssize_t ret;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ old_cred = override_creds(ctx->cred);
|
||||
+ ret = vfs_splice_read(in, ppos, pipe, len, flags);
|
||||
+ revert_creds(old_cred);
|
||||
+
|
||||
+ if (ctx->accessed)
|
||||
+ ctx->accessed(ctx->user_file);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_splice_read);
|
||||
+
|
||||
+ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
|
||||
+ struct file *out, loff_t *ppos, size_t len,
|
||||
+ unsigned int flags,
|
||||
+ struct backing_file_ctx *ctx)
|
||||
+{
|
||||
+ const struct cred *old_cred;
|
||||
+ ssize_t ret;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ ret = file_remove_privs(ctx->user_file);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ old_cred = override_creds(ctx->cred);
|
||||
+ file_start_write(out);
|
||||
+ ret = iter_file_splice_write(pipe, out, ppos, len, flags);
|
||||
+ file_end_write(out);
|
||||
+ revert_creds(old_cred);
|
||||
+
|
||||
+ if (ctx->end_write)
|
||||
+ ctx->end_write(ctx->user_file);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_splice_write);
|
||||
+
|
||||
static int __init backing_aio_init(void)
|
||||
{
|
||||
backing_aio_cachep = kmem_cache_create("backing_aio",
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 3eee9f45971e..165a92b25c0a 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/uaccess.h>
|
||||
-#include <linux/splice.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
@@ -328,20 +327,21 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe, size_t len,
|
||||
unsigned int flags)
|
||||
{
|
||||
- const struct cred *old_cred;
|
||||
struct fd real;
|
||||
ssize_t ret;
|
||||
+ struct backing_file_ctx ctx = {
|
||||
+ .cred = ovl_creds(file_inode(in)->i_sb),
|
||||
+ .user_file = in,
|
||||
+ .accessed = ovl_file_accessed,
|
||||
+ };
|
||||
|
||||
ret = ovl_real_fdget(in, &real);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- old_cred = ovl_override_creds(file_inode(in)->i_sb);
|
||||
- ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
|
||||
- revert_creds(old_cred);
|
||||
- ovl_file_accessed(in);
|
||||
-
|
||||
+ ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
|
||||
fdput(real);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -357,30 +357,23 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
||||
loff_t *ppos, size_t len, unsigned int flags)
|
||||
{
|
||||
struct fd real;
|
||||
- const struct cred *old_cred;
|
||||
struct inode *inode = file_inode(out);
|
||||
ssize_t ret;
|
||||
+ struct backing_file_ctx ctx = {
|
||||
+ .cred = ovl_creds(inode->i_sb),
|
||||
+ .user_file = out,
|
||||
+ .end_write = ovl_file_modified,
|
||||
+ };
|
||||
|
||||
inode_lock(inode);
|
||||
/* Update mode */
|
||||
ovl_copyattr(inode);
|
||||
- ret = file_remove_privs(out);
|
||||
- if (ret)
|
||||
- goto out_unlock;
|
||||
|
||||
ret = ovl_real_fdget(out, &real);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
- old_cred = ovl_override_creds(inode->i_sb);
|
||||
- file_start_write(real.file);
|
||||
-
|
||||
- ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
|
||||
-
|
||||
- file_end_write(real.file);
|
||||
- /* Update size */
|
||||
- ovl_file_modified(out);
|
||||
- revert_creds(old_cred);
|
||||
+ ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
|
||||
fdput(real);
|
||||
|
||||
out_unlock:
|
||||
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
|
||||
index 0648d548a418..0546d5b1c9f5 100644
|
||||
--- a/include/linux/backing-file.h
|
||||
+++ b/include/linux/backing-file.h
|
||||
@@ -28,5 +28,13 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
|
||||
struct kiocb *iocb, int flags,
|
||||
struct backing_file_ctx *ctx);
|
||||
+ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
|
||||
+ struct pipe_inode_info *pipe, size_t len,
|
||||
+ unsigned int flags,
|
||||
+ struct backing_file_ctx *ctx);
|
||||
+ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
|
||||
+ struct file *out, loff_t *ppos, size_t len,
|
||||
+ unsigned int flags,
|
||||
+ struct backing_file_ctx *ctx);
|
||||
|
||||
#endif /* _LINUX_BACKING_FILE_H */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
124
SOURCES/1335-fs-factor-out-backing-file-mmap-helper.patch
Normal file
124
SOURCES/1335-fs-factor-out-backing-file-mmap-helper.patch
Normal file
@ -0,0 +1,124 @@
|
||||
From 5176a4370a2e9f1ebe16e502bf93897820461b7f Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 10:51:45 +0200
|
||||
Subject: [PATCH] fs: factor out backing_file_mmap() helper
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit f567377e406c032fff0799bde4fdf4a977529b84
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Fri Oct 13 12:49:37 2023 +0300
|
||||
|
||||
fs: factor out backing_file_mmap() helper
|
||||
|
||||
Assert that the file object is allocated in a backing_file container
|
||||
so that file_user_path() could be used to display the user path and
|
||||
not the backing file's path in /proc/<pid>/maps.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
index 5cc411566ce0..6ea14b6214c1 100644
|
||||
--- a/fs/backing-file.c
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/backing-file.h>
|
||||
#include <linux/splice.h>
|
||||
+#include <linux/mm.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -299,6 +300,32 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(backing_file_splice_write);
|
||||
|
||||
+int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
|
||||
+ struct backing_file_ctx *ctx)
|
||||
+{
|
||||
+ const struct cred *old_cred;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
|
||||
+ WARN_ON_ONCE(ctx->user_file != vma->vm_file))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ if (!file->f_op->mmap)
|
||||
+ return -ENODEV;
|
||||
+
|
||||
+ vma_set_file(vma, file);
|
||||
+
|
||||
+ old_cred = override_creds(ctx->cred);
|
||||
+ ret = call_mmap(vma->vm_file, vma);
|
||||
+ revert_creds(old_cred);
|
||||
+
|
||||
+ if (ctx->accessed)
|
||||
+ ctx->accessed(ctx->user_file);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_mmap);
|
||||
+
|
||||
static int __init backing_aio_init(void)
|
||||
{
|
||||
backing_aio_cachep = kmem_cache_create("backing_aio",
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 165a92b25c0a..d85385f37ba6 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -10,7 +10,6 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/security.h>
|
||||
-#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/backing-file.h>
|
||||
#include "overlayfs.h"
|
||||
@@ -411,23 +410,13 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *realfile = file->private_data;
|
||||
- const struct cred *old_cred;
|
||||
- int ret;
|
||||
-
|
||||
- if (!realfile->f_op->mmap)
|
||||
- return -ENODEV;
|
||||
-
|
||||
- if (WARN_ON(file != vma->vm_file))
|
||||
- return -EIO;
|
||||
-
|
||||
- vma_set_file(vma, realfile);
|
||||
-
|
||||
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
- ret = call_mmap(vma->vm_file, vma);
|
||||
- revert_creds(old_cred);
|
||||
- ovl_file_accessed(file);
|
||||
+ struct backing_file_ctx ctx = {
|
||||
+ .cred = ovl_creds(file_inode(file)->i_sb),
|
||||
+ .user_file = file,
|
||||
+ .accessed = ovl_file_accessed,
|
||||
+ };
|
||||
|
||||
- return ret;
|
||||
+ return backing_file_mmap(realfile, vma, &ctx);
|
||||
}
|
||||
|
||||
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
|
||||
index 0546d5b1c9f5..3f1fe1774f1b 100644
|
||||
--- a/include/linux/backing-file.h
|
||||
+++ b/include/linux/backing-file.h
|
||||
@@ -36,5 +36,7 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
|
||||
struct file *out, loff_t *ppos, size_t len,
|
||||
unsigned int flags,
|
||||
struct backing_file_ctx *ctx);
|
||||
+int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
|
||||
+ struct backing_file_ctx *ctx);
|
||||
|
||||
#endif /* _LINUX_BACKING_FILE_H */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
158
SOURCES/1336-lsm-add-helper-for-blob-allocations.patch
Normal file
158
SOURCES/1336-lsm-add-helper-for-blob-allocations.patch
Normal file
@ -0,0 +1,158 @@
|
||||
From f034201051121a87f98c1651368c3883f633182f Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:47:18 +0200
|
||||
Subject: [PATCH] lsm: add helper for blob allocations
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- security/security.c: context fuzz + dropped hunks changing functions
|
||||
not present downstream
|
||||
|
||||
commit 09001284eebfc1b684e81d1db0f006787d35f3e1
|
||||
Author: Casey Schaufler <casey@schaufler-ca.com>
|
||||
Date: Wed Jul 10 14:32:27 2024 -0700
|
||||
|
||||
lsm: add helper for blob allocations
|
||||
|
||||
Create a helper function lsm_blob_alloc() for general use in the hook
|
||||
specific functions that allocate LSM blobs. Change the hook specific
|
||||
functions to use this helper. This reduces the code size by a small
|
||||
amount and will make adding new instances of infrastructure managed
|
||||
security blobs easier.
|
||||
|
||||
Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
|
||||
Reviewed-by: John Johansen <john.johansen@canonical.com>
|
||||
[PM: subject tweak]
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/security/security.c b/security/security.c
|
||||
index b59af216324f..1e63f23a504a 100644
|
||||
--- a/security/security.c
|
||||
+++ b/security/security.c
|
||||
@@ -645,27 +645,42 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb)
|
||||
EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
|
||||
|
||||
/**
|
||||
- * lsm_cred_alloc - allocate a composite cred blob
|
||||
- * @cred: the cred that needs a blob
|
||||
+ * lsm_blob_alloc - allocate a composite blob
|
||||
+ * @dest: the destination for the blob
|
||||
+ * @size: the size of the blob
|
||||
* @gfp: allocation type
|
||||
*
|
||||
- * Allocate the cred blob for all the modules
|
||||
+ * Allocate a blob for all the modules
|
||||
*
|
||||
* Returns 0, or -ENOMEM if memory can't be allocated.
|
||||
*/
|
||||
-static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
|
||||
+static int lsm_blob_alloc(void **dest, size_t size, gfp_t gfp)
|
||||
{
|
||||
- if (blob_sizes.lbs_cred == 0) {
|
||||
- cred->security = NULL;
|
||||
+ if (size == 0) {
|
||||
+ *dest = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
- cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
|
||||
- if (cred->security == NULL)
|
||||
+ *dest = kzalloc(size, gfp);
|
||||
+ if (*dest == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * lsm_cred_alloc - allocate a composite cred blob
|
||||
+ * @cred: the cred that needs a blob
|
||||
+ * @gfp: allocation type
|
||||
+ *
|
||||
+ * Allocate the cred blob for all the modules
|
||||
+ *
|
||||
+ * Returns 0, or -ENOMEM if memory can't be allocated.
|
||||
+ */
|
||||
+static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
|
||||
+{
|
||||
+ return lsm_blob_alloc(&cred->security, blob_sizes.lbs_cred, gfp);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* lsm_early_cred - during initialization allocate a composite cred blob
|
||||
* @cred: the cred that needs a blob
|
||||
@@ -732,15 +747,7 @@ static int lsm_inode_alloc(struct inode *inode)
|
||||
*/
|
||||
static int lsm_task_alloc(struct task_struct *task)
|
||||
{
|
||||
- if (blob_sizes.lbs_task == 0) {
|
||||
- task->security = NULL;
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
|
||||
- if (task->security == NULL)
|
||||
- return -ENOMEM;
|
||||
- return 0;
|
||||
+ return lsm_blob_alloc(&task->security, blob_sizes.lbs_task, GFP_KERNEL);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -753,15 +760,7 @@ static int lsm_task_alloc(struct task_struct *task)
|
||||
*/
|
||||
static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
|
||||
{
|
||||
- if (blob_sizes.lbs_ipc == 0) {
|
||||
- kip->security = NULL;
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
|
||||
- if (kip->security == NULL)
|
||||
- return -ENOMEM;
|
||||
- return 0;
|
||||
+ return lsm_blob_alloc(&kip->security, blob_sizes.lbs_ipc, GFP_KERNEL);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -774,15 +773,8 @@ static int lsm_ipc_alloc(struct kern_ipc_perm *kip)
|
||||
*/
|
||||
static int lsm_msg_msg_alloc(struct msg_msg *mp)
|
||||
{
|
||||
- if (blob_sizes.lbs_msg_msg == 0) {
|
||||
- mp->security = NULL;
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
|
||||
- if (mp->security == NULL)
|
||||
- return -ENOMEM;
|
||||
- return 0;
|
||||
+ return lsm_blob_alloc(&mp->security, blob_sizes.lbs_msg_msg,
|
||||
+ GFP_KERNEL);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -809,15 +801,8 @@ static void __init lsm_early_task(struct task_struct *task)
|
||||
*/
|
||||
static int lsm_superblock_alloc(struct super_block *sb)
|
||||
{
|
||||
- if (blob_sizes.lbs_superblock == 0) {
|
||||
- sb->s_security = NULL;
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- sb->s_security = kzalloc(blob_sizes.lbs_superblock, GFP_KERNEL);
|
||||
- if (sb->s_security == NULL)
|
||||
- return -ENOMEM;
|
||||
- return 0;
|
||||
+ return lsm_blob_alloc(&sb->s_security, blob_sizes.lbs_superblock,
|
||||
+ GFP_KERNEL);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
74
SOURCES/1337-ovl-fix-nested-backing-file-paths.patch
Normal file
74
SOURCES/1337-ovl-fix-nested-backing-file-paths.patch
Normal file
@ -0,0 +1,74 @@
|
||||
From c884ff1e458df0e5d801f19b4e847a4673d7471b Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:48:02 +0200
|
||||
Subject: [PATCH] ovl: Fix nested backing file paths
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 924577e4f6ca473de1528953a0e13505fae61d7b
|
||||
Author: André Almeida <andrealmeid@igalia.com>
|
||||
Date: Tue Apr 29 15:38:50 2025 -0300
|
||||
|
||||
ovl: Fix nested backing file paths
|
||||
|
||||
When the lowerdir of an overlayfs is a merged directory of another
|
||||
overlayfs, ovl_open_realfile() will fail to open the real file and point
|
||||
to a lower dentry copy, without the proper parent path. After this,
|
||||
d_path() will then display the path incorrectly as if the file is placed
|
||||
in the root directory.
|
||||
|
||||
This bug can be triggered with the following setup:
|
||||
|
||||
mkdir -p ovl-A/lower ovl-A/upper ovl-A/merge ovl-A/work
|
||||
mkdir -p ovl-B/upper ovl-B/merge ovl-B/work
|
||||
|
||||
cp /bin/cat ovl-A/lower/
|
||||
|
||||
mount -t overlay overlay -o \
|
||||
lowerdir=ovl-A/lower,upperdir=ovl-A/upper,workdir=ovl-A/work \
|
||||
ovl-A/merge
|
||||
|
||||
mount -t overlay overlay -o \
|
||||
lowerdir=ovl-A/merge,upperdir=ovl-B/upper,workdir=ovl-B/work \
|
||||
ovl-B/merge
|
||||
|
||||
ovl-A/merge/cat /proc/self/maps | grep --color cat
|
||||
ovl-B/merge/cat /proc/self/maps | grep --color cat
|
||||
|
||||
The first cat will correctly show `/ovl-A/merge/cat`, while the second
|
||||
one shows just `/cat`.
|
||||
|
||||
To fix that, uses file_user_path() inside of backing_file_open() to get
|
||||
the correct file path for the dentry.
|
||||
|
||||
Co-developed-by: John Schoenick <johns@valvesoftware.com>
|
||||
Signed-off-by: John Schoenick <johns@valvesoftware.com>
|
||||
Signed-off-by: André Almeida <andrealmeid@igalia.com>
|
||||
Fixes: def3ae83da02 ("fs: store real path instead of fake path in backing file f_path")
|
||||
Cc: <stable@vger.kernel.org> # v6.7
|
||||
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index d85385f37ba6..3bf52eace698 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -51,8 +51,8 @@ static struct file *ovl_open_realfile(const struct file *file,
|
||||
if (!inode_owner_or_capable(real_idmap, realinode))
|
||||
flags &= ~O_NOATIME;
|
||||
|
||||
- realfile = backing_file_open(&file->f_path, flags, realpath,
|
||||
- current_cred());
|
||||
+ realfile = backing_file_open(file_user_path((struct file *) file),
|
||||
+ flags, realpath, current_cred());
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,105 @@
|
||||
From 3869325e0bf98aba624155c8355abe6e5db6e674 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:18:45 +0200
|
||||
Subject: [PATCH] fs: constify file ptr in backing_file accessor helpers
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- context fuzz and dropped changes to functions not present downstream
|
||||
|
||||
commit 4e301d858af17ae2ce56886296e5458c5a08219a
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Sat Jun 7 13:53:03 2025 +0200
|
||||
|
||||
fs: constify file ptr in backing_file accessor helpers
|
||||
|
||||
Add internal helper backing_file_set_user_path() for the only
|
||||
two cases that need to modify backing_file fields.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Link: https://lore.kernel.org/20250607115304.2521155-2-amir73il@gmail.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
index 6ea14b6214c1..840b45366557 100644
|
||||
--- a/fs/backing-file.c
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -41,7 +41,7 @@ struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
return f;
|
||||
|
||||
path_get(user_path);
|
||||
- *backing_file_user_path(f) = *user_path;
|
||||
+ backing_file_set_user_path(f, user_path);
|
||||
error = vfs_open(real_path, f);
|
||||
if (error) {
|
||||
fput(f);
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index e5c7b9705109..fa8f4d34efa5 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -50,17 +50,20 @@ struct backing_file {
|
||||
struct path user_path;
|
||||
};
|
||||
|
||||
-static inline struct backing_file *backing_file(struct file *f)
|
||||
-{
|
||||
- return container_of(f, struct backing_file, file);
|
||||
-}
|
||||
+#define backing_file(f) container_of(f, struct backing_file, file)
|
||||
|
||||
-struct path *backing_file_user_path(struct file *f)
|
||||
+struct path *backing_file_user_path(const struct file *f)
|
||||
{
|
||||
return &backing_file(f)->user_path;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(backing_file_user_path);
|
||||
|
||||
+void backing_file_set_user_path(struct file *f, const struct path *path)
|
||||
+{
|
||||
+ backing_file(f)->user_path = *path;
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(backing_file_set_user_path);
|
||||
+
|
||||
static void file_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index bd0934d0521b..78fcdad80e53 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -96,6 +96,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
+void backing_file_set_user_path(struct file *f, const struct path *path);
|
||||
|
||||
static inline void file_put_write_access(struct file *file)
|
||||
{
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 5f3ca25c77e5..7ed9232f579d 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2586,7 +2586,7 @@ struct file *dentry_open(const struct path *path, int flags,
|
||||
const struct cred *creds);
|
||||
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
const struct cred *cred);
|
||||
-struct path *backing_file_user_path(struct file *f);
|
||||
+struct path *backing_file_user_path(const struct file *f);
|
||||
|
||||
/*
|
||||
* file_user_path - get the path to display for memory mapped file
|
||||
@@ -2597,7 +2597,7 @@ struct path *backing_file_user_path(struct file *f);
|
||||
* /proc/<pid>/maps), this helper should be used to get the path to display
|
||||
* to the user, which is the path of the fd that user has requested to map.
|
||||
*/
|
||||
-static inline const struct path *file_user_path(struct file *f)
|
||||
+static inline const struct path *file_user_path(const struct file *f)
|
||||
{
|
||||
if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
return backing_file_user_path(f);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
38
SOURCES/1339-ovl-remove-unneeded-non-const-conversion.patch
Normal file
38
SOURCES/1339-ovl-remove-unneeded-non-const-conversion.patch
Normal file
@ -0,0 +1,38 @@
|
||||
From c3f8db29db9e7b9bb68b107e932315f147046ac5 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:48:18 +0200
|
||||
Subject: [PATCH] ovl: remove unneeded non-const conversion
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
|
||||
commit 3ec2529eca6f175f4e3e87c4534010e044839b38
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Sat Jun 7 13:53:04 2025 +0200
|
||||
|
||||
ovl: remove unneeded non-const conversion
|
||||
|
||||
file_user_path() now takes a const file ptr.
|
||||
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Link: https://lore.kernel.org/20250607115304.2521155-3-amir73il@gmail.com
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 3bf52eace698..c8e45f503db3 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -51,7 +51,7 @@ static struct file *ovl_open_realfile(const struct file *file,
|
||||
if (!inode_owner_or_capable(real_idmap, realinode))
|
||||
flags &= ~O_NOATIME;
|
||||
|
||||
- realfile = backing_file_open(file_user_path((struct file *) file),
|
||||
+ realfile = backing_file_open(file_user_path(file),
|
||||
flags, realpath, current_cred());
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
From 99a9c81094c622efebec7695e551baffdac3f89b Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 20:29:45 +0200
|
||||
Subject: [PATCH] ovl: remove redundant IOCB_DIO_CALLER_COMP clearing
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
Conflicts:
|
||||
- just context fuzz
|
||||
|
||||
commit 7933a585d70ee496fa341b50b8b0a95b131867ff
|
||||
Author: Seong-Gwang Heo <heo@mykernel.net>
|
||||
Date: Thu Oct 9 13:41:48 2025 +0800
|
||||
|
||||
ovl: remove redundant IOCB_DIO_CALLER_COMP clearing
|
||||
|
||||
The backing_file_write_iter() function, which is called
|
||||
immediately after this code, already contains identical
|
||||
logic to clear the IOCB_DIO_CALLER_COMP flag along with
|
||||
the same explanatory comment. There is no need to duplicate
|
||||
this operation in the overlayfs code.
|
||||
|
||||
Signed-off-by: Seong-Gwang Heo <heo@mykernel.net>
|
||||
Fixes: a6293b3e285c ("fs: factor out backing_file_{read,write}_iter() helpers")
|
||||
Acked-by: Miklos Szeredi <mszeredi@redhat.com>
|
||||
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Signed-off-by: Christian Brauner <brauner@kernel.org>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index c8e45f503db3..3d8539909f74 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -308,11 +308,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
|
||||
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
|
||||
|
||||
- /*
|
||||
- * Overlayfs doesn't support deferred completions, don't copy
|
||||
- * this property in case it is set by the issuer.
|
||||
- */
|
||||
- ifl &= ~IOCB_DIO_CALLER_COMP;
|
||||
ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
|
||||
fdput(real);
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,89 @@
|
||||
From 627a3d264cb85311131aef67a0ff2397999d5394 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 20:29:57 +0200
|
||||
Subject: [PATCH] perf/core: Fix MMAP event path names with backing files
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
|
||||
commit 8818f507a9391019a3ec7c57b1a32e4b386e48a5
|
||||
Author: Adrian Hunter <adrian.hunter@intel.com>
|
||||
Date: Mon Oct 13 10:22:43 2025 +0300
|
||||
|
||||
perf/core: Fix MMAP event path names with backing files
|
||||
|
||||
Some file systems like FUSE-based ones or overlayfs may record the backing
|
||||
file in struct vm_area_struct vm_file, instead of the user file that the
|
||||
user mmapped.
|
||||
|
||||
Since commit def3ae83da02f ("fs: store real path instead of fake path in
|
||||
backing file f_path"), file_path() no longer returns the user file path
|
||||
when applied to a backing file. There is an existing helper
|
||||
file_user_path() for that situation.
|
||||
|
||||
Use file_user_path() instead of file_path() to get the path for MMAP
|
||||
and MMAP2 events.
|
||||
|
||||
Example:
|
||||
|
||||
Setup:
|
||||
|
||||
# cd /root
|
||||
# mkdir test ; cd test ; mkdir lower upper work merged
|
||||
# cp `which cat` lower
|
||||
# mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged
|
||||
# perf record -e intel_pt//u -- /root/test/merged/cat /proc/self/maps
|
||||
...
|
||||
55b0ba399000-55b0ba434000 r-xp 00018000 00:1a 3419 /root/test/merged/cat
|
||||
...
|
||||
[ perf record: Woken up 1 times to write data ]
|
||||
[ perf record: Captured and wrote 0.060 MB perf.data ]
|
||||
#
|
||||
|
||||
Before:
|
||||
|
||||
File name is wrong (/cat), so decoding fails:
|
||||
|
||||
# perf script --no-itrace --show-mmap-events
|
||||
cat 367 [016] 100.491492: PERF_RECORD_MMAP2 367/367: [0x55b0ba399000(0x9b000) @ 0x18000 00:02 3419 489959280]: r-xp /cat
|
||||
...
|
||||
# perf script --itrace=e | wc -l
|
||||
Warning:
|
||||
19 instruction trace errors
|
||||
19
|
||||
#
|
||||
|
||||
After:
|
||||
|
||||
File name is correct (/root/test/merged/cat), so decoding is ok:
|
||||
|
||||
# perf script --no-itrace --show-mmap-events
|
||||
cat 364 [016] 72.153006: PERF_RECORD_MMAP2 364/364: [0x55ce4003d000(0x9b000) @ 0x18000 00:02 3419 3132534314]: r-xp /root/test/merged/cat
|
||||
# perf script --itrace=e
|
||||
# perf script --itrace=e | wc -l
|
||||
0
|
||||
#
|
||||
|
||||
Fixes: def3ae83da02f ("fs: store real path instead of fake path in backing file f_path")
|
||||
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Cc: stable@vger.kernel.org
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/kernel/events/core.c b/kernel/events/core.c
|
||||
index 0d3bd850fee7..5065087dd236 100644
|
||||
--- a/kernel/events/core.c
|
||||
+++ b/kernel/events/core.c
|
||||
@@ -8953,7 +8953,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
* need to add enough zero bytes after the string to handle
|
||||
* the 64bit alignment we do later.
|
||||
*/
|
||||
- name = file_path(file, buf, PATH_MAX - sizeof(u64));
|
||||
+ name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64));
|
||||
if (IS_ERR(name)) {
|
||||
name = "//toolong";
|
||||
goto cpy_name;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,85 @@
|
||||
From 644c8e296eb8628ed6ccaff5609bce2c4b591c8a Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:00:17 +0200
|
||||
Subject: [PATCH] fs: prepare for adding LSM blob to backing_file
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/file_table.c: adjusted the body and call site of
|
||||
backing_file_free() to downstream state (sme intermediate commits
|
||||
not backported)
|
||||
|
||||
commit 880bd496ec72a6dcb00cb70c430ef752ba242ae7
|
||||
Author: Amir Goldstein <amir73il@gmail.com>
|
||||
Date: Mon Mar 30 10:27:51 2026 +0200
|
||||
|
||||
fs: prepare for adding LSM blob to backing_file
|
||||
|
||||
In preparation to adding LSM blob to backing_file struct, factor out
|
||||
helpers init_backing_file() and backing_file_free().
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Cc: linux-fsdevel@vger.kernel.org
|
||||
Cc: linux-unionfs@vger.kernel.org
|
||||
Cc: linux-erofs@lists.ozlabs.org
|
||||
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Reviewed-by: Serge Hallyn <serge@hallyn.com>
|
||||
[PM: use the term "LSM blob", fix comment style to match file]
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index fa8f4d34efa5..34e3863c95b0 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -75,11 +75,16 @@ static void file_free_rcu(struct rcu_head *head)
|
||||
kmem_cache_free(filp_cachep, f);
|
||||
}
|
||||
|
||||
+static inline void backing_file_free(struct backing_file *ff)
|
||||
+{
|
||||
+ path_put(&ff->user_path);
|
||||
+}
|
||||
+
|
||||
static inline void file_free(struct file *f)
|
||||
{
|
||||
security_file_free(f);
|
||||
if (unlikely(f->f_mode & FMODE_BACKING))
|
||||
- path_put(backing_file_user_path(f));
|
||||
+ backing_file_free(backing_file(f));
|
||||
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
|
||||
percpu_counter_dec(&nr_files);
|
||||
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
|
||||
@@ -255,6 +260,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
return f;
|
||||
}
|
||||
|
||||
+static int init_backing_file(struct backing_file *ff)
|
||||
+{
|
||||
+ memset(&ff->user_path, 0, sizeof(ff->user_path));
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Variant of alloc_empty_file() that allocates a backing_file container
|
||||
* and doesn't check and modify nr_files.
|
||||
@@ -277,7 +288,14 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
+ /* The f_mode flags must be set before fput(). */
|
||||
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
|
||||
+ error = init_backing_file(ff);
|
||||
+ if (unlikely(error)) {
|
||||
+ fput(&ff->file);
|
||||
+ return ERR_PTR(error);
|
||||
+ }
|
||||
+
|
||||
return &ff->file;
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
535
SOURCES/1343-lsm-add-backing-file-lsm-hooks.patch
Normal file
535
SOURCES/1343-lsm-add-backing-file-lsm-hooks.patch
Normal file
@ -0,0 +1,535 @@
|
||||
From 807cf6dc47e9871fa1f77369e69fe8666cb0a1d4 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 11:54:46 +0200
|
||||
Subject: [PATCH] lsm: add backing_file LSM hooks
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- fs/backing-file.c:
|
||||
- adjust backing_file_mmap() to downstream state (missing scoped
|
||||
guards, ctx->user_file instead of missing user_file variable)
|
||||
- backing_tmpfile_open() not present downstream
|
||||
- fs/erofs/ishare.c: hunk dropped, file not present downstream
|
||||
- fs/file_table.c: context fuzz + put security_backing_file_free() in the
|
||||
right place
|
||||
- fs/fuse/passthrough.c: hunk dropped, file not present downstream
|
||||
- fs/overlayfs/dir.c: hunk dropped, ovl_create_tmpfile() not present downstream
|
||||
- fs/overlayfs/file.c: adjust to different indentation
|
||||
- include/linux/backing-file.h: backing_tmpfile_open() not present downstream
|
||||
- include/linux/lsm_hooks.h: adjust to downstream's definition of struct lsm_blob_sizes
|
||||
- security/lsm.h: hunk dropped, file not present downstream
|
||||
- security/lsm_init.c: hunk dropped, file not present downstream
|
||||
- security/security.c: misc conflicts, port changes to stuff that was
|
||||
already in lsm.h/lsm_init.c upstream
|
||||
|
||||
commit 6af36aeb147a06dea47c49859cd6ca5659aeb987
|
||||
Author: Paul Moore <paul@paul-moore.com>
|
||||
Date: Fri Dec 19 13:18:22 2025 -0500
|
||||
|
||||
lsm: add backing_file LSM hooks
|
||||
|
||||
Stacked filesystems such as overlayfs do not currently provide the
|
||||
necessary mechanisms for LSMs to properly enforce access controls on the
|
||||
mmap() and mprotect() operations. In order to resolve this gap, a LSM
|
||||
security blob is being added to the backing_file struct and the following
|
||||
new LSM hooks are being created:
|
||||
|
||||
security_backing_file_alloc()
|
||||
security_backing_file_free()
|
||||
security_mmap_backing_file()
|
||||
|
||||
The first two hooks are to manage the lifecycle of the LSM security blob
|
||||
in the backing_file struct, while the third provides a new mmap() access
|
||||
control point for the underlying backing file. It is also expected that
|
||||
LSMs will likely want to update their security_file_mprotect() callback
|
||||
to address issues with their mprotect() controls, but that does not
|
||||
require a change to the security_file_mprotect() LSM hook.
|
||||
|
||||
There are a three other small changes to support these new LSM hooks:
|
||||
* Pass the user file associated with a backing file down to
|
||||
alloc_empty_backing_file() so it can be included in the
|
||||
security_backing_file_alloc() hook.
|
||||
* Add getter and setter functions for the backing_file struct LSM blob
|
||||
as the backing_file struct remains private to fs/file_table.c.
|
||||
* Constify the file struct field in the LSM common_audit_data struct to
|
||||
better support LSMs that need to pass a const file struct pointer into
|
||||
the common LSM audit code.
|
||||
|
||||
Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
|
||||
and supplying a fixup.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Cc: linux-fsdevel@vger.kernel.org
|
||||
Cc: linux-unionfs@vger.kernel.org
|
||||
Cc: linux-erofs@lists.ozlabs.org
|
||||
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Reviewed-by: Serge Hallyn <serge@hallyn.com>
|
||||
Reviewed-by: Christian Brauner <brauner@kernel.org>
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/fs/backing-file.c b/fs/backing-file.c
|
||||
index 840b45366557..e6f4fe27b58b 100644
|
||||
--- a/fs/backing-file.c
|
||||
+++ b/fs/backing-file.c
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <linux/backing-file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/mm.h>
|
||||
+#include <linux/security.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -29,14 +30,15 @@
|
||||
* returned file into a container structure that also stores the stacked
|
||||
* file's path, which can be retrieved using backing_file_user_path().
|
||||
*/
|
||||
-struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
+struct file *backing_file_open(const struct file *user_file, int flags,
|
||||
const struct path *real_path,
|
||||
const struct cred *cred)
|
||||
{
|
||||
+ const struct path *user_path = &user_file->f_path;
|
||||
struct file *f;
|
||||
int error;
|
||||
|
||||
- f = alloc_empty_backing_file(flags, cred);
|
||||
+ f = alloc_empty_backing_file(flags, cred, user_file);
|
||||
if (IS_ERR(f))
|
||||
return f;
|
||||
|
||||
@@ -316,6 +318,11 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
|
||||
vma_set_file(vma, file);
|
||||
|
||||
old_cred = override_creds(ctx->cred);
|
||||
+ ret = security_mmap_backing_file(vma, file, ctx->user_file);
|
||||
+ if (ret) {
|
||||
+ revert_creds(old_cred);
|
||||
+ return ret;
|
||||
+ }
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
revert_creds(old_cred);
|
||||
|
||||
diff --git a/fs/file_table.c b/fs/file_table.c
|
||||
index 34e3863c95b0..fc04eb48d550 100644
|
||||
--- a/fs/file_table.c
|
||||
+++ b/fs/file_table.c
|
||||
@@ -48,6 +48,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp;
|
||||
struct backing_file {
|
||||
struct file file;
|
||||
struct path user_path;
|
||||
+#ifdef CONFIG_SECURITY
|
||||
+ void *security;
|
||||
+#endif
|
||||
};
|
||||
|
||||
#define backing_file(f) container_of(f, struct backing_file, file)
|
||||
@@ -64,6 +67,18 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(backing_file_set_user_path);
|
||||
|
||||
+#ifdef CONFIG_SECURITY
|
||||
+void *backing_file_security(const struct file *f)
|
||||
+{
|
||||
+ return backing_file(f)->security;
|
||||
+}
|
||||
+
|
||||
+void backing_file_set_security(struct file *f, void *security)
|
||||
+{
|
||||
+ backing_file(f)->security = security;
|
||||
+}
|
||||
+#endif /* CONFIG_SECURITY */
|
||||
+
|
||||
static void file_free_rcu(struct rcu_head *head)
|
||||
{
|
||||
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
|
||||
@@ -77,6 +92,7 @@ static void file_free_rcu(struct rcu_head *head)
|
||||
|
||||
static inline void backing_file_free(struct backing_file *ff)
|
||||
{
|
||||
+ security_backing_file_free(&ff->file);
|
||||
path_put(&ff->user_path);
|
||||
}
|
||||
|
||||
@@ -260,10 +276,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
return f;
|
||||
}
|
||||
|
||||
-static int init_backing_file(struct backing_file *ff)
|
||||
+static int init_backing_file(struct backing_file *ff,
|
||||
+ const struct file *user_file)
|
||||
{
|
||||
memset(&ff->user_path, 0, sizeof(ff->user_path));
|
||||
- return 0;
|
||||
+ backing_file_set_security(&ff->file, NULL);
|
||||
+ return security_backing_file_alloc(&ff->file, user_file);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -273,7 +291,8 @@ static int init_backing_file(struct backing_file *ff)
|
||||
* This is only for kernel internal use, and the allocate file must not be
|
||||
* installed into file tables or such.
|
||||
*/
|
||||
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
||||
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
|
||||
+ const struct file *user_file)
|
||||
{
|
||||
struct backing_file *ff;
|
||||
int error;
|
||||
@@ -290,7 +309,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
|
||||
|
||||
/* The f_mode flags must be set before fput(). */
|
||||
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
|
||||
- error = init_backing_file(ff);
|
||||
+ error = init_backing_file(ff, user_file);
|
||||
if (unlikely(error)) {
|
||||
fput(&ff->file);
|
||||
return ERR_PTR(error);
|
||||
diff --git a/fs/internal.h b/fs/internal.h
|
||||
index 78fcdad80e53..f48f5fa349c9 100644
|
||||
--- a/fs/internal.h
|
||||
+++ b/fs/internal.h
|
||||
@@ -95,7 +95,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
*/
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred);
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
|
||||
-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
|
||||
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
|
||||
+ const struct file *user_file);
|
||||
void backing_file_set_user_path(struct file *f, const struct path *path);
|
||||
|
||||
static inline void file_put_write_access(struct file *file)
|
||||
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
|
||||
index 3d8539909f74..a5bc5dfc930f 100644
|
||||
--- a/fs/overlayfs/file.c
|
||||
+++ b/fs/overlayfs/file.c
|
||||
@@ -51,7 +51,7 @@ static struct file *ovl_open_realfile(const struct file *file,
|
||||
if (!inode_owner_or_capable(real_idmap, realinode))
|
||||
flags &= ~O_NOATIME;
|
||||
|
||||
- realfile = backing_file_open(file_user_path(file),
|
||||
+ realfile = backing_file_open(file,
|
||||
flags, realpath, current_cred());
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
|
||||
index 3f1fe1774f1b..103b6992b80a 100644
|
||||
--- a/include/linux/backing-file.h
|
||||
+++ b/include/linux/backing-file.h
|
||||
@@ -19,7 +19,7 @@ struct backing_file_ctx {
|
||||
void (*end_write)(struct file *);
|
||||
};
|
||||
|
||||
-struct file *backing_file_open(const struct path *user_path, int flags,
|
||||
+struct file *backing_file_open(const struct file *user_file, int flags,
|
||||
const struct path *real_path,
|
||||
const struct cred *cred);
|
||||
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index 7ed9232f579d..a94f20ba2bf6 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -2588,6 +2588,19 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
const struct cred *cred);
|
||||
struct path *backing_file_user_path(const struct file *f);
|
||||
|
||||
+#ifdef CONFIG_SECURITY
|
||||
+void *backing_file_security(const struct file *f);
|
||||
+void backing_file_set_security(struct file *f, void *security);
|
||||
+#else
|
||||
+static inline void *backing_file_security(const struct file *f)
|
||||
+{
|
||||
+ return NULL;
|
||||
+}
|
||||
+static inline void backing_file_set_security(struct file *f, void *security)
|
||||
+{
|
||||
+}
|
||||
+#endif /* CONFIG_SECURITY */
|
||||
+
|
||||
/*
|
||||
* file_user_path - get the path to display for memory mapped file
|
||||
*
|
||||
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
|
||||
index 97a8b21eb033..c0a2839253fa 100644
|
||||
--- a/include/linux/lsm_audit.h
|
||||
+++ b/include/linux/lsm_audit.h
|
||||
@@ -93,7 +93,7 @@ struct common_audit_data {
|
||||
#endif
|
||||
char *kmod_name;
|
||||
struct lsm_ioctlop_audit *op;
|
||||
- struct file *file;
|
||||
+ const struct file *file;
|
||||
struct lsm_ibpkey_audit *ibpkey;
|
||||
struct lsm_ibendport_audit *ibendport;
|
||||
int reason;
|
||||
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
|
||||
index b6fbb446bab7..304da2a90ba7 100644
|
||||
--- a/include/linux/lsm_hook_defs.h
|
||||
+++ b/include/linux/lsm_hook_defs.h
|
||||
@@ -168,6 +168,9 @@ LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir,
|
||||
LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
|
||||
LSM_HOOK(int, 0, file_alloc_security, struct file *file)
|
||||
LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
|
||||
+LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
|
||||
+ const struct file *user_file)
|
||||
+LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
|
||||
LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
|
||||
@@ -175,6 +178,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
|
||||
LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
|
||||
LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
|
||||
unsigned long prot, unsigned long flags)
|
||||
+LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
|
||||
+ struct file *backing_file, struct file *user_file)
|
||||
LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
|
||||
unsigned long reqprot, unsigned long prot)
|
||||
LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
|
||||
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
|
||||
index 7577ecfc79e4..a16571929f7b 100644
|
||||
--- a/include/linux/lsm_hooks.h
|
||||
+++ b/include/linux/lsm_hooks.h
|
||||
@@ -1637,6 +1637,7 @@ struct security_hook_list {
|
||||
struct lsm_blob_sizes {
|
||||
int lbs_cred;
|
||||
int lbs_file;
|
||||
+ int lbs_backing_file;
|
||||
int lbs_inode;
|
||||
int lbs_superblock;
|
||||
int lbs_ipc;
|
||||
diff --git a/include/linux/security.h b/include/linux/security.h
|
||||
index d2888c127859..db02db9f623a 100644
|
||||
--- a/include/linux/security.h
|
||||
+++ b/include/linux/security.h
|
||||
@@ -387,11 +387,17 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
|
||||
int security_file_permission(struct file *file, int mask);
|
||||
int security_file_alloc(struct file *file);
|
||||
void security_file_free(struct file *file);
|
||||
+int security_backing_file_alloc(struct file *backing_file,
|
||||
+ const struct file *user_file);
|
||||
+void security_backing_file_free(struct file *backing_file);
|
||||
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
int security_file_ioctl_compat(struct file *file, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
int security_mmap_file(struct file *file, unsigned long prot,
|
||||
unsigned long flags);
|
||||
+int security_mmap_backing_file(struct vm_area_struct *vma,
|
||||
+ struct file *backing_file,
|
||||
+ struct file *user_file);
|
||||
int security_mmap_addr(unsigned long addr);
|
||||
int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
|
||||
unsigned long prot);
|
||||
@@ -976,6 +982,15 @@ static inline int security_file_alloc(struct file *file)
|
||||
static inline void security_file_free(struct file *file)
|
||||
{ }
|
||||
|
||||
+static inline int security_backing_file_alloc(struct file *backing_file,
|
||||
+ const struct file *user_file)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline void security_backing_file_free(struct file *backing_file)
|
||||
+{ }
|
||||
+
|
||||
static inline int security_file_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
@@ -995,6 +1010,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static inline int security_mmap_backing_file(struct vm_area_struct *vma,
|
||||
+ struct file *backing_file,
|
||||
+ struct file *user_file)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static inline int security_mmap_addr(unsigned long addr)
|
||||
{
|
||||
return cap_mmap_addr(addr);
|
||||
diff --git a/security/security.c b/security/security.c
|
||||
index 1e63f23a504a..27a309ab0b97 100644
|
||||
--- a/security/security.c
|
||||
+++ b/security/security.c
|
||||
@@ -89,6 +89,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
|
||||
static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
|
||||
|
||||
static struct kmem_cache *lsm_file_cache;
|
||||
+static struct kmem_cache *lsm_backing_file_cache;
|
||||
static struct kmem_cache *lsm_inode_cache;
|
||||
|
||||
char *lsm_names;
|
||||
@@ -260,6 +261,8 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
|
||||
|
||||
lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
|
||||
lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
|
||||
+ lsm_set_blob_size(&needed->lbs_backing_file,
|
||||
+ &blob_sizes.lbs_backing_file);
|
||||
/*
|
||||
* The inode blob gets an rcu_head in addition to
|
||||
* what the modules might need.
|
||||
@@ -447,14 +450,15 @@ static void __init ordered_lsm_init(void)
|
||||
|
||||
report_lsm_order();
|
||||
|
||||
- init_debug("cred blob size = %d\n", blob_sizes.lbs_cred);
|
||||
- init_debug("file blob size = %d\n", blob_sizes.lbs_file);
|
||||
- init_debug("inode blob size = %d\n", blob_sizes.lbs_inode);
|
||||
- init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc);
|
||||
- init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg);
|
||||
- init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
|
||||
- init_debug("task blob size = %d\n", blob_sizes.lbs_task);
|
||||
- init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count);
|
||||
+ init_debug("cred blob size = %d\n", blob_sizes.lbs_cred);
|
||||
+ init_debug("file blob size = %d\n", blob_sizes.lbs_file);
|
||||
+ init_debug("backing_file blob size = %d\n", blob_sizes.lbs_backing_file);
|
||||
+ init_debug("inode blob size = %d\n", blob_sizes.lbs_inode);
|
||||
+ init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc);
|
||||
+ init_debug("msg_msg blob size = %d\n", blob_sizes.lbs_msg_msg);
|
||||
+ init_debug("superblock blob size = %d\n", blob_sizes.lbs_superblock);
|
||||
+ init_debug("task blob size = %d\n", blob_sizes.lbs_task);
|
||||
+ init_debug("xattr slots = %d\n", blob_sizes.lbs_xattr_count);
|
||||
|
||||
/*
|
||||
* Create any kmem_caches needed for blobs
|
||||
@@ -463,6 +467,11 @@ static void __init ordered_lsm_init(void)
|
||||
lsm_file_cache = kmem_cache_create("lsm_file_cache",
|
||||
blob_sizes.lbs_file, 0,
|
||||
SLAB_PANIC, NULL);
|
||||
+ if (blob_sizes.lbs_backing_file)
|
||||
+ lsm_backing_file_cache = kmem_cache_create(
|
||||
+ "lsm_backing_file_cache",
|
||||
+ blob_sizes.lbs_backing_file,
|
||||
+ 0, SLAB_PANIC, NULL);
|
||||
if (blob_sizes.lbs_inode)
|
||||
lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
|
||||
blob_sizes.lbs_inode, 0,
|
||||
@@ -644,6 +653,30 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb)
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
|
||||
|
||||
+/**
|
||||
+ * lsm_backing_file_alloc - allocate a composite backing file blob
|
||||
+ * @backing_file: the backing file
|
||||
+ *
|
||||
+ * Allocate the backing file blob for all the modules.
|
||||
+ *
|
||||
+ * Returns 0, or -ENOMEM if memory can't be allocated.
|
||||
+ */
|
||||
+static int lsm_backing_file_alloc(struct file *backing_file)
|
||||
+{
|
||||
+ void *blob;
|
||||
+
|
||||
+ if (!lsm_backing_file_cache) {
|
||||
+ backing_file_set_security(backing_file, NULL);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
|
||||
+ backing_file_set_security(backing_file, blob);
|
||||
+ if (!blob)
|
||||
+ return -ENOMEM;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* lsm_blob_alloc - allocate a composite blob
|
||||
* @dest: the destination for the blob
|
||||
@@ -1689,6 +1722,57 @@ void security_file_free(struct file *file)
|
||||
}
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * security_backing_file_alloc() - Allocate and setup a backing file blob
|
||||
+ * @backing_file: the backing file
|
||||
+ * @user_file: the associated user visible file
|
||||
+ *
|
||||
+ * Allocate a backing file LSM blob and perform any necessary initialization of
|
||||
+ * the LSM blob. There will be some operations where the LSM will not have
|
||||
+ * access to @user_file after this point, so any important state associated
|
||||
+ * with @user_file that is important to the LSM should be captured in the
|
||||
+ * backing file's LSM blob.
|
||||
+ *
|
||||
+ * LSM's should avoid taking a reference to @user_file in this hook as it will
|
||||
+ * result in problems later when the system attempts to drop/put the file
|
||||
+ * references due to a circular dependency.
|
||||
+ *
|
||||
+ * Return: Return 0 if the hook is successful, negative values otherwise.
|
||||
+ */
|
||||
+int security_backing_file_alloc(struct file *backing_file,
|
||||
+ const struct file *user_file)
|
||||
+{
|
||||
+ int rc;
|
||||
+
|
||||
+ rc = lsm_backing_file_alloc(backing_file);
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ rc = call_int_hook(backing_file_alloc, backing_file, user_file);
|
||||
+ if (unlikely(rc))
|
||||
+ security_backing_file_free(backing_file);
|
||||
+
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * security_backing_file_free() - Free a backing file blob
|
||||
+ * @backing_file: the backing file
|
||||
+ *
|
||||
+ * Free any LSM state associate with a backing file's LSM blob, including the
|
||||
+ * blob itself.
|
||||
+ */
|
||||
+void security_backing_file_free(struct file *backing_file)
|
||||
+{
|
||||
+ void *blob = backing_file_security(backing_file);
|
||||
+
|
||||
+ call_void_hook(backing_file_free, backing_file);
|
||||
+
|
||||
+ if (blob) {
|
||||
+ backing_file_set_security(backing_file, NULL);
|
||||
+ kmem_cache_free(lsm_backing_file_cache, blob);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
return call_int_hook(file_ioctl, file, cmd, arg);
|
||||
@@ -1757,6 +1841,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
|
||||
return ima_file_mmap(file, prot);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
|
||||
+ * @vma: the vm_area_struct for the mmap'd region
|
||||
+ * @backing_file: the backing file being mmap'd
|
||||
+ * @user_file: the user file being mmap'd
|
||||
+ *
|
||||
+ * Check permissions for a mmap operation on a stacked filesystem. This hook
|
||||
+ * is called after the security_mmap_file() and is responsible for authorizing
|
||||
+ * the mmap on @backing_file. It is important to note that the mmap operation
|
||||
+ * on @user_file has already been authorized and the @vma->vm_file has been
|
||||
+ * set to @backing_file.
|
||||
+ *
|
||||
+ * Return: Returns 0 if permission is granted.
|
||||
+ */
|
||||
+int security_mmap_backing_file(struct vm_area_struct *vma,
|
||||
+ struct file *backing_file,
|
||||
+ struct file *user_file)
|
||||
+{
|
||||
+ /* recommended by the stackable filesystem devs */
|
||||
+ if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(security_mmap_backing_file);
|
||||
+
|
||||
int security_mmap_addr(unsigned long addr)
|
||||
{
|
||||
return call_int_hook(mmap_addr, addr);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,444 @@
|
||||
From df2d263e4dcf6739be4c4b51d7e1f6c1d3316200 Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Wed, 3 Jun 2026 13:01:38 +0200
|
||||
Subject: [PATCH] selinux: fix overlayfs mmap() and mprotect() access checks
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-179443
|
||||
CVE: CVE-2026-46054
|
||||
Conflicts:
|
||||
- security/selinux/hooks.c:
|
||||
- context fuzz
|
||||
- preserve passing &selinux_state to avc_has_perm()
|
||||
- preserve honoring the checkreqprot setting (in case of mmap
|
||||
backing file check it is ignored, but that's the best we can do -
|
||||
at worst some access would be denied on overlayfs in extremely
|
||||
exotic use cases)
|
||||
- security/selinux/include/objsec.h: context fuzz
|
||||
|
||||
commit 82544d36b1729153c8aeb179e84750f0c085d3b1
|
||||
Author: Paul Moore <paul@paul-moore.com>
|
||||
Date: Thu Jan 1 17:19:18 2026 -0500
|
||||
|
||||
selinux: fix overlayfs mmap() and mprotect() access checks
|
||||
|
||||
The existing SELinux security model for overlayfs is to allow access if
|
||||
the current task is able to access the top level file (the "user" file)
|
||||
and the mounter's credentials are sufficient to access the lower
|
||||
level file (the "backing" file). Unfortunately, the current code does
|
||||
not properly enforce these access controls for both mmap() and mprotect()
|
||||
operations on overlayfs filesystems.
|
||||
|
||||
This patch makes use of the newly created security_mmap_backing_file()
|
||||
LSM hook to provide the missing backing file enforcement for mmap()
|
||||
operations, and leverages the backing file API and new LSM blob to
|
||||
provide the necessary information to properly enforce the mprotect()
|
||||
access controls.
|
||||
|
||||
Cc: stable@vger.kernel.org
|
||||
Acked-by: Amir Goldstein <amir73il@gmail.com>
|
||||
Signed-off-by: Paul Moore <paul@paul-moore.com>
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
|
||||
index deacc9a63fae..fb5b9cb027d0 100644
|
||||
--- a/security/selinux/hooks.c
|
||||
+++ b/security/selinux/hooks.c
|
||||
@@ -1706,50 +1706,76 @@ static inline int file_path_has_perm(const struct cred *cred,
|
||||
static int bpf_fd_pass(const struct file *file, u32 sid);
|
||||
#endif
|
||||
|
||||
-/* Check whether a task can use an open file descriptor to
|
||||
- access an inode in a given way. Check access to the
|
||||
- descriptor itself, and then use dentry_has_perm to
|
||||
- check a particular permission to the file.
|
||||
- Access to the descriptor is implicitly granted if it
|
||||
- has the same SID as the process. If av is zero, then
|
||||
- access to the file is not checked, e.g. for cases
|
||||
- where only the descriptor is affected like seek. */
|
||||
-static int file_has_perm(const struct cred *cred,
|
||||
- struct file *file,
|
||||
- u32 av)
|
||||
+static int __file_has_perm(const struct cred *cred, const struct file *file,
|
||||
+ u32 av, bool bf_user_file)
|
||||
+
|
||||
{
|
||||
- struct file_security_struct *fsec = selinux_file(file);
|
||||
- struct inode *inode = file_inode(file);
|
||||
struct common_audit_data ad;
|
||||
- u32 sid = cred_sid(cred);
|
||||
+ struct inode *inode;
|
||||
+ u32 ssid = cred_sid(cred);
|
||||
+ u32 tsid_fd;
|
||||
int rc;
|
||||
|
||||
- ad.type = LSM_AUDIT_DATA_FILE;
|
||||
- ad.u.file = file;
|
||||
+ if (bf_user_file) {
|
||||
+ struct backing_file_security_struct *bfsec;
|
||||
+ const struct path *path;
|
||||
|
||||
- if (sid != fsec->sid) {
|
||||
+ if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
|
||||
+ return -EIO;
|
||||
+
|
||||
+ bfsec = selinux_backing_file(file);
|
||||
+ path = backing_file_user_path(file);
|
||||
+ tsid_fd = bfsec->uf_sid;
|
||||
+ inode = d_inode(path->dentry);
|
||||
+
|
||||
+ ad.type = LSM_AUDIT_DATA_PATH;
|
||||
+ ad.u.path = *path;
|
||||
+ } else {
|
||||
+ struct file_security_struct *fsec = selinux_file(file);
|
||||
+
|
||||
+ tsid_fd = fsec->sid;
|
||||
+ inode = file_inode(file);
|
||||
+
|
||||
+ ad.type = LSM_AUDIT_DATA_FILE;
|
||||
+ ad.u.file = file;
|
||||
+ }
|
||||
+
|
||||
+ if (ssid != tsid_fd) {
|
||||
rc = avc_has_perm(&selinux_state,
|
||||
- sid, fsec->sid,
|
||||
+ ssid, tsid_fd,
|
||||
SECCLASS_FD,
|
||||
FD__USE,
|
||||
&ad);
|
||||
if (rc)
|
||||
- goto out;
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
- rc = bpf_fd_pass(file, cred_sid(cred));
|
||||
+ /* regardless of backing vs user file, use the underlying file here */
|
||||
+ rc = bpf_fd_pass(file, ssid);
|
||||
if (rc)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
/* av is zero if only checking access to the descriptor. */
|
||||
- rc = 0;
|
||||
if (av)
|
||||
- rc = inode_has_perm(cred, inode, av, &ad);
|
||||
+ return inode_has_perm(cred, inode, av, &ad);
|
||||
|
||||
-out:
|
||||
- return rc;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* Check whether a task can use an open file descriptor to
|
||||
+ access an inode in a given way. Check access to the
|
||||
+ descriptor itself, and then use dentry_has_perm to
|
||||
+ check a particular permission to the file.
|
||||
+ Access to the descriptor is implicitly granted if it
|
||||
+ has the same SID as the process. If av is zero, then
|
||||
+ access to the file is not checked, e.g. for cases
|
||||
+ where only the descriptor is affected like seek. */
|
||||
+static inline int file_has_perm(const struct cred *cred,
|
||||
+ const struct file *file, u32 av)
|
||||
+{
|
||||
+ return __file_has_perm(cred, file, av, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3646,6 +3672,17 @@ static int selinux_file_alloc_security(struct file *file)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int selinux_backing_file_alloc(struct file *backing_file,
|
||||
+ const struct file *user_file)
|
||||
+{
|
||||
+ struct backing_file_security_struct *bfsec;
|
||||
+
|
||||
+ bfsec = selinux_backing_file(backing_file);
|
||||
+ bfsec->uf_sid = selinux_file(user_file)->sid;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Check whether a task has the ioctl permission and cmd
|
||||
* operation to an inode.
|
||||
@@ -3759,43 +3796,56 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
|
||||
|
||||
static int default_noexec __ro_after_init;
|
||||
|
||||
-static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
|
||||
+static int __file_map_prot_check(const struct cred *cred,
|
||||
+ const struct file *file, unsigned long prot,
|
||||
+ bool shared, bool bf_user_file)
|
||||
{
|
||||
- const struct cred *cred = current_cred();
|
||||
- u32 sid = cred_sid(cred);
|
||||
- int rc = 0;
|
||||
+ struct inode *inode = NULL;
|
||||
+ bool prot_exec = prot & PROT_EXEC;
|
||||
+ bool prot_write = prot & PROT_WRITE;
|
||||
+
|
||||
+ if (file) {
|
||||
+ if (bf_user_file)
|
||||
+ inode = d_inode(backing_file_user_path(file)->dentry);
|
||||
+ else
|
||||
+ inode = file_inode(file);
|
||||
+ }
|
||||
+
|
||||
+ if (default_noexec && prot_exec &&
|
||||
+ (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
|
||||
+ int rc;
|
||||
+ u32 sid = cred_sid(cred);
|
||||
|
||||
- if (default_noexec &&
|
||||
- (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
|
||||
- (!shared && (prot & PROT_WRITE)))) {
|
||||
/*
|
||||
- * We are making executable an anonymous mapping or a
|
||||
- * private file mapping that will also be writable.
|
||||
- * This has an additional check.
|
||||
+ * We are making executable an anonymous mapping or a private
|
||||
+ * file mapping that will also be writable.
|
||||
*/
|
||||
rc = avc_has_perm(&selinux_state,
|
||||
- sid, sid, SECCLASS_PROCESS,
|
||||
- PROCESS__EXECMEM, NULL);
|
||||
+ sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
|
||||
+ NULL);
|
||||
if (rc)
|
||||
- goto error;
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
if (file) {
|
||||
- /* read access is always possible with a mapping */
|
||||
+ /* "read" always possible, "write" only if shared */
|
||||
u32 av = FILE__READ;
|
||||
-
|
||||
- /* write access only matters if the mapping is shared */
|
||||
- if (shared && (prot & PROT_WRITE))
|
||||
+ if (shared && prot_write)
|
||||
av |= FILE__WRITE;
|
||||
-
|
||||
- if (prot & PROT_EXEC)
|
||||
+ if (prot_exec)
|
||||
av |= FILE__EXECUTE;
|
||||
|
||||
- return file_has_perm(cred, file, av);
|
||||
+ return __file_has_perm(cred, file, av, bf_user_file);
|
||||
}
|
||||
|
||||
-error:
|
||||
- return rc;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline int file_map_prot_check(const struct cred *cred,
|
||||
+ const struct file *file,
|
||||
+ unsigned long prot, bool shared)
|
||||
+{
|
||||
+ return __file_map_prot_check(cred, file, prot, shared, false);
|
||||
}
|
||||
|
||||
static int selinux_mmap_addr(unsigned long addr)
|
||||
@@ -3812,17 +3862,17 @@ static int selinux_mmap_addr(unsigned long addr)
|
||||
return rc;
|
||||
}
|
||||
|
||||
-static int selinux_mmap_file(struct file *file, unsigned long reqprot,
|
||||
- unsigned long prot, unsigned long flags)
|
||||
+static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
|
||||
+ unsigned long reqprot, unsigned long prot,
|
||||
+ bool shared)
|
||||
{
|
||||
- struct common_audit_data ad;
|
||||
- int rc;
|
||||
-
|
||||
if (file) {
|
||||
+ int rc;
|
||||
+ struct common_audit_data ad;
|
||||
+
|
||||
ad.type = LSM_AUDIT_DATA_FILE;
|
||||
ad.u.file = file;
|
||||
- rc = inode_has_perm(current_cred(), file_inode(file),
|
||||
- FILE__MAP, &ad);
|
||||
+ rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
@@ -3830,23 +3880,68 @@ static int selinux_mmap_file(struct file *file, unsigned long reqprot,
|
||||
if (checkreqprot_get(&selinux_state))
|
||||
prot = reqprot;
|
||||
|
||||
- return file_map_prot_check(file, prot,
|
||||
- (flags & MAP_TYPE) == MAP_SHARED);
|
||||
+ return file_map_prot_check(cred, file, prot, shared);
|
||||
+}
|
||||
+
|
||||
+static int selinux_mmap_file(struct file *file, unsigned long reqprot,
|
||||
+ unsigned long prot, unsigned long flags)
|
||||
+{
|
||||
+ return selinux_mmap_file_common(current_cred(), file, reqprot, prot,
|
||||
+ (flags & MAP_TYPE) == MAP_SHARED);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * selinux_mmap_backing_file - Check mmap permissions on a backing file
|
||||
+ * @vma: memory region
|
||||
+ * @backing_file: stacked filesystem backing file
|
||||
+ * @user_file: user visible file
|
||||
+ *
|
||||
+ * This is called after selinux_mmap_file() on stacked filesystems, and it
|
||||
+ * is this function's responsibility to verify access to @backing_file and
|
||||
+ * setup the SELinux state for possible later use in the mprotect() code path.
|
||||
+ *
|
||||
+ * By the time this function is called, mmap() access to @user_file has already
|
||||
+ * been authorized and @vma->vm_file has been set to point to @backing_file.
|
||||
+ *
|
||||
+ * Return zero on success, negative values otherwise.
|
||||
+ */
|
||||
+static int selinux_mmap_backing_file(struct vm_area_struct *vma,
|
||||
+ struct file *backing_file,
|
||||
+ struct file *user_file __always_unused)
|
||||
+{
|
||||
+ unsigned long prot = 0;
|
||||
+
|
||||
+ /* translate vma->vm_flags perms into PROT perms */
|
||||
+ if (vma->vm_flags & VM_READ)
|
||||
+ prot |= PROT_READ;
|
||||
+ if (vma->vm_flags & VM_WRITE)
|
||||
+ prot |= PROT_WRITE;
|
||||
+ if (vma->vm_flags & VM_EXEC)
|
||||
+ prot |= PROT_EXEC;
|
||||
+
|
||||
+ return selinux_mmap_file_common(backing_file->f_cred, backing_file,
|
||||
+ prot, prot, vma->vm_flags & VM_SHARED);
|
||||
}
|
||||
|
||||
static int selinux_file_mprotect(struct vm_area_struct *vma,
|
||||
unsigned long reqprot,
|
||||
unsigned long prot)
|
||||
{
|
||||
+ int rc;
|
||||
const struct cred *cred = current_cred();
|
||||
u32 sid = cred_sid(cred);
|
||||
+ const struct file *file = vma->vm_file;
|
||||
+ bool backing_file;
|
||||
+ bool shared = vma->vm_flags & VM_SHARED;
|
||||
+
|
||||
+ /* check if we need to trigger the "backing files are awful" mode */
|
||||
+ backing_file = file && (file->f_mode & FMODE_BACKING);
|
||||
|
||||
if (checkreqprot_get(&selinux_state))
|
||||
prot = reqprot;
|
||||
|
||||
if (default_noexec &&
|
||||
(prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
|
||||
- int rc = 0;
|
||||
/*
|
||||
* We don't use the vma_is_initial_heap() helper as it has
|
||||
* a history of problems and is currently broken on systems
|
||||
@@ -3861,12 +3956,16 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
|
||||
rc = avc_has_perm(&selinux_state,
|
||||
sid, sid, SECCLASS_PROCESS,
|
||||
PROCESS__EXECHEAP, NULL);
|
||||
- } else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ } else if (!file && (vma_is_initial_stack(vma) ||
|
||||
vma_is_stack_for_current(vma))) {
|
||||
rc = avc_has_perm(&selinux_state,
|
||||
sid, sid, SECCLASS_PROCESS,
|
||||
PROCESS__EXECSTACK, NULL);
|
||||
- } else if (vma->vm_file && vma->anon_vma) {
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ } else if (file && vma->anon_vma) {
|
||||
/*
|
||||
* We are making executable a file mapping that has
|
||||
* had some COW done. Since pages might have been
|
||||
@@ -3874,13 +3973,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
|
||||
* modified content. This typically should only
|
||||
* occur for text relocations.
|
||||
*/
|
||||
- rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
|
||||
+ rc = __file_has_perm(cred, file, FILE__EXECMOD,
|
||||
+ backing_file);
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ if (backing_file) {
|
||||
+ rc = file_has_perm(file->f_cred, file,
|
||||
+ FILE__EXECMOD);
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ }
|
||||
}
|
||||
+ }
|
||||
+
|
||||
+ rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
|
||||
+ if (rc)
|
||||
+ return rc;
|
||||
+ if (backing_file) {
|
||||
+ rc = file_map_prot_check(file->f_cred, file, prot, shared);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
- return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int selinux_file_lock(struct file *file, unsigned int cmd)
|
||||
@@ -7007,6 +7122,7 @@ static void selinux_bpf_token_free(struct bpf_token *token)
|
||||
struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
|
||||
.lbs_cred = sizeof(struct task_security_struct),
|
||||
.lbs_file = sizeof(struct file_security_struct),
|
||||
+ .lbs_backing_file = sizeof(struct backing_file_security_struct),
|
||||
.lbs_inode = sizeof(struct inode_security_struct),
|
||||
.lbs_ipc = sizeof(struct ipc_security_struct),
|
||||
.lbs_msg_msg = sizeof(struct msg_security_struct),
|
||||
@@ -7216,9 +7332,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
|
||||
|
||||
LSM_HOOK_INIT(file_permission, selinux_file_permission),
|
||||
LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
|
||||
+ LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
|
||||
LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
|
||||
LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
|
||||
LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
|
||||
+ LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
|
||||
LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
|
||||
LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
|
||||
LSM_HOOK_INIT(file_lock, selinux_file_lock),
|
||||
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
|
||||
index 2953132408bf..b1c5a2877f7e 100644
|
||||
--- a/security/selinux/include/objsec.h
|
||||
+++ b/security/selinux/include/objsec.h
|
||||
@@ -60,6 +60,10 @@ struct file_security_struct {
|
||||
u32 pseqno; /* Policy seqno at the time of file open */
|
||||
};
|
||||
|
||||
+struct backing_file_security_struct {
|
||||
+ u32 uf_sid; /* associated user file fsec->sid */
|
||||
+};
|
||||
+
|
||||
struct superblock_security_struct {
|
||||
u32 sid; /* SID of file system superblock */
|
||||
u32 def_sid; /* default SID for labeling */
|
||||
@@ -158,6 +162,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
|
||||
return file->f_security + selinux_blob_sizes.lbs_file;
|
||||
}
|
||||
|
||||
+static inline struct backing_file_security_struct *
|
||||
+selinux_backing_file(const struct file *backing_file)
|
||||
+{
|
||||
+ void *blob = backing_file_security(backing_file);
|
||||
+ return blob + selinux_blob_sizes.lbs_backing_file;
|
||||
+}
|
||||
+
|
||||
static inline struct inode_security_struct *selinux_inode(
|
||||
const struct inode *inode)
|
||||
{
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,130 @@
|
||||
From ec2a2e4b876c7faed3de5e85406180810cc8539a Mon Sep 17 00:00:00 2001
|
||||
From: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
Date: Tue, 16 Jun 2026 10:06:13 +0200
|
||||
Subject: [PATCH] selinux: RHEL-only hotfix for execmem regression
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-185118
|
||||
Upstream Status: RHEL9-only
|
||||
|
||||
As discovered by image-mode/CoreOS testing, the fixes for CVE-2026-46054
|
||||
caused a regression that results in unexpected execmem denials in
|
||||
specific scenarios involving overlayfs (or another stacked filesystem).
|
||||
|
||||
Specifically in case of image mode / CoreOS there is often (always?) an
|
||||
overlayfs filesystem mounted during early boot (before SELinux policy is
|
||||
loaded), which means that overlayfs captures the kernel’s SELinux
|
||||
context as part of the mounter credentials, which are later used by
|
||||
overlayfs+SELinux to verify that file accesses through the overlay mount
|
||||
don’t give the mounter a way to access underlying files it otherwise
|
||||
wouldn’t have access to. This verification would normally pass, as the
|
||||
policy grants the kernel context almost unrestricted access to the
|
||||
filesystem. However, the new checks added to fix CVE-2026-46054
|
||||
erroneously include the execmem check for the mounter and in the policy
|
||||
kernel_t doesn’t have the execmem permission, so mmapping an overlay
|
||||
file with MAP_PRIVATE and PROT_WRITE|PROT_EXEC would now result in a
|
||||
SELinux denial.
|
||||
|
||||
Fix this by passing a boolean through the helper functions that allows
|
||||
to distinguish the direct permission check from the mounter check and
|
||||
skipping the execmem check in the mounter case.
|
||||
|
||||
This is a transient RHEL-only fix to allow the CVE fix to go through
|
||||
without breaking image mode/CoreOS deployments. Once an optimal solution
|
||||
is figured out and applied upstream, this commit will be reverted and
|
||||
replaced with the upstream fix (at least in Y-streams). I expect the
|
||||
upstream solution to be functionally equivalent, though probably
|
||||
cosmetically different.
|
||||
|
||||
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
|
||||
|
||||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
|
||||
index fb5b9cb027d0..b31b06d4440b 100644
|
||||
--- a/security/selinux/hooks.c
|
||||
+++ b/security/selinux/hooks.c
|
||||
@@ -3798,7 +3798,7 @@ static int default_noexec __ro_after_init;
|
||||
|
||||
static int __file_map_prot_check(const struct cred *cred,
|
||||
const struct file *file, unsigned long prot,
|
||||
- bool shared, bool bf_user_file)
|
||||
+ bool shared, bool mounter, bool bf_user_file)
|
||||
{
|
||||
struct inode *inode = NULL;
|
||||
bool prot_exec = prot & PROT_EXEC;
|
||||
@@ -3812,7 +3812,7 @@ static int __file_map_prot_check(const struct cred *cred,
|
||||
}
|
||||
|
||||
if (default_noexec && prot_exec &&
|
||||
- (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
|
||||
+ (!file || IS_PRIVATE(inode) || (!shared && prot_write)) && !mounter) {
|
||||
int rc;
|
||||
u32 sid = cred_sid(cred);
|
||||
|
||||
@@ -3843,9 +3843,9 @@ static int __file_map_prot_check(const struct cred *cred,
|
||||
|
||||
static inline int file_map_prot_check(const struct cred *cred,
|
||||
const struct file *file,
|
||||
- unsigned long prot, bool shared)
|
||||
+ unsigned long prot, bool shared, bool mounter)
|
||||
{
|
||||
- return __file_map_prot_check(cred, file, prot, shared, false);
|
||||
+ return __file_map_prot_check(cred, file, prot, shared, mounter, false);
|
||||
}
|
||||
|
||||
static int selinux_mmap_addr(unsigned long addr)
|
||||
@@ -3864,7 +3864,7 @@ static int selinux_mmap_addr(unsigned long addr)
|
||||
|
||||
static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
|
||||
unsigned long reqprot, unsigned long prot,
|
||||
- bool shared)
|
||||
+ bool shared, bool mounter)
|
||||
{
|
||||
if (file) {
|
||||
int rc;
|
||||
@@ -3880,14 +3880,15 @@ static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
|
||||
if (checkreqprot_get(&selinux_state))
|
||||
prot = reqprot;
|
||||
|
||||
- return file_map_prot_check(cred, file, prot, shared);
|
||||
+ return file_map_prot_check(cred, file, prot, shared, mounter);
|
||||
}
|
||||
|
||||
static int selinux_mmap_file(struct file *file, unsigned long reqprot,
|
||||
unsigned long prot, unsigned long flags)
|
||||
{
|
||||
return selinux_mmap_file_common(current_cred(), file, reqprot, prot,
|
||||
- (flags & MAP_TYPE) == MAP_SHARED);
|
||||
+ (flags & MAP_TYPE) == MAP_SHARED,
|
||||
+ false);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3920,7 +3921,8 @@ static int selinux_mmap_backing_file(struct vm_area_struct *vma,
|
||||
prot |= PROT_EXEC;
|
||||
|
||||
return selinux_mmap_file_common(backing_file->f_cred, backing_file,
|
||||
- prot, prot, vma->vm_flags & VM_SHARED);
|
||||
+ prot, prot, vma->vm_flags & VM_SHARED,
|
||||
+ true);
|
||||
}
|
||||
|
||||
static int selinux_file_mprotect(struct vm_area_struct *vma,
|
||||
@@ -3986,11 +3988,11 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
|
||||
}
|
||||
}
|
||||
|
||||
- rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
|
||||
+ rc = __file_map_prot_check(cred, file, prot, shared, false, backing_file);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (backing_file) {
|
||||
- rc = file_map_prot_check(file->f_cred, file, prot, shared);
|
||||
+ rc = file_map_prot_check(file->f_cred, file, prot, shared, true);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,323 @@
|
||||
From 0ba00ffd79bdb243b0067aa95fe5846b6c1ecbe7 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:58 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Fix matcher action template attach
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 36ef2575e78d1a3c699dc3f1c9dee9be742c9bdd
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:31 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Fix matcher action template attach
|
||||
|
||||
The procedure of attaching an action template to an existing matcher had
|
||||
a few issues:
|
||||
|
||||
1. Attaching accidentally overran the `at` array in bwc_matcher, which
|
||||
would result in memory corruption. This bug wasn't triggered, but it
|
||||
is possible to trigger it by attaching action templates beyond the
|
||||
initial buffer size of 8. Fix this by converting to a dynamically
|
||||
sized buffer and reallocating if needed.
|
||||
|
||||
2. Similarly, the `at` array inside the native matcher was never
|
||||
reallocated. Fix this the same as above.
|
||||
|
||||
3. The bwc layer treated any error in action template attach as a signal
|
||||
that the matcher should be rehashed to account for a larger number of
|
||||
action STEs. In reality, there are other unrelated errors that can
|
||||
arise and they should be propagated upstack. Fix this by adding a
|
||||
`need_rehash` output parameter that's orthogonal to error codes.
|
||||
|
||||
Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling")
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-2-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index 19dce1ba512d..32de8bfc7644 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -90,13 +90,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher,
|
||||
bwc_matcher->priority = priority;
|
||||
bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG;
|
||||
|
||||
+ bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM;
|
||||
+ bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array,
|
||||
+ sizeof(*bwc_matcher->at), GFP_KERNEL);
|
||||
+ if (!bwc_matcher->at)
|
||||
+ goto free_bwc_matcher_rules;
|
||||
+
|
||||
/* create dummy action template */
|
||||
bwc_matcher->at[0] =
|
||||
mlx5hws_action_template_create(action_types ?
|
||||
action_types : init_action_types);
|
||||
if (!bwc_matcher->at[0]) {
|
||||
mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n");
|
||||
- goto free_bwc_matcher_rules;
|
||||
+ goto free_bwc_matcher_at_array;
|
||||
}
|
||||
|
||||
bwc_matcher->num_of_at = 1;
|
||||
@@ -126,6 +132,8 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher,
|
||||
mlx5hws_match_template_destroy(bwc_matcher->mt);
|
||||
free_at:
|
||||
mlx5hws_action_template_destroy(bwc_matcher->at[0]);
|
||||
+free_bwc_matcher_at_array:
|
||||
+ kfree(bwc_matcher->at);
|
||||
free_bwc_matcher_rules:
|
||||
kfree(bwc_matcher->rules);
|
||||
err:
|
||||
@@ -192,6 +200,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
|
||||
for (i = 0; i < bwc_matcher->num_of_at; i++)
|
||||
mlx5hws_action_template_destroy(bwc_matcher->at[i]);
|
||||
+ kfree(bwc_matcher->at);
|
||||
|
||||
mlx5hws_match_template_destroy(bwc_matcher->mt);
|
||||
kfree(bwc_matcher->rules);
|
||||
@@ -520,6 +529,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher,
|
||||
struct mlx5hws_rule_action rule_actions[])
|
||||
{
|
||||
enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS];
|
||||
+ void *p;
|
||||
+
|
||||
+ if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) {
|
||||
+ if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT)
|
||||
+ return -ENOMEM;
|
||||
+ bwc_matcher->size_of_at_array *= 2;
|
||||
+ p = krealloc(bwc_matcher->at,
|
||||
+ bwc_matcher->size_of_at_array *
|
||||
+ sizeof(*bwc_matcher->at),
|
||||
+ __GFP_ZERO | GFP_KERNEL);
|
||||
+ if (!p) {
|
||||
+ bwc_matcher->size_of_at_array /= 2;
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ bwc_matcher->at = p;
|
||||
+ }
|
||||
|
||||
hws_bwc_rule_actions_to_action_types(rule_actions, action_types);
|
||||
|
||||
@@ -777,6 +803,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
struct mlx5hws_rule_attr rule_attr;
|
||||
struct mutex *queue_lock; /* Protect the queue */
|
||||
u32 num_of_rules;
|
||||
+ bool need_rehash;
|
||||
int ret = 0;
|
||||
int at_idx;
|
||||
|
||||
@@ -803,10 +830,14 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
at_idx = bwc_matcher->num_of_at - 1;
|
||||
|
||||
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx]);
|
||||
+ bwc_matcher->at[at_idx],
|
||||
+ &need_rehash);
|
||||
if (unlikely(ret)) {
|
||||
- /* Action template attach failed, possibly due to
|
||||
- * requiring more action STEs.
|
||||
+ hws_bwc_unlock_all_queues(ctx);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ if (unlikely(need_rehash)) {
|
||||
+ /* The new action template requires more action STEs.
|
||||
* Need to attempt creating new matcher with all
|
||||
* the action templates, including the new one.
|
||||
*/
|
||||
@@ -942,6 +973,7 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
|
||||
struct mlx5hws_rule_attr rule_attr;
|
||||
struct mutex *queue_lock; /* Protect the queue */
|
||||
+ bool need_rehash;
|
||||
int at_idx, ret;
|
||||
u16 idx;
|
||||
|
||||
@@ -973,12 +1005,17 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
at_idx = bwc_matcher->num_of_at - 1;
|
||||
|
||||
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx]);
|
||||
+ bwc_matcher->at[at_idx],
|
||||
+ &need_rehash);
|
||||
if (unlikely(ret)) {
|
||||
- /* Action template attach failed, possibly due to
|
||||
- * requiring more action STEs.
|
||||
- * Need to attempt creating new matcher with all
|
||||
- * the action templates, including the new one.
|
||||
+ hws_bwc_unlock_all_queues(ctx);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ if (unlikely(need_rehash)) {
|
||||
+ /* The new action template requires more action
|
||||
+ * STEs. Need to attempt creating new matcher
|
||||
+ * with all the action templates, including the
|
||||
+ * new one.
|
||||
*/
|
||||
ret = hws_bwc_matcher_rehash_at(bwc_matcher);
|
||||
if (unlikely(ret)) {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
index 47f7ed141553..bb0cf4b922ce 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
@@ -10,9 +10,7 @@
|
||||
#define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32
|
||||
|
||||
/* Max number of AT attach operations for the same matcher.
|
||||
- * When the limit is reached, next attempt to attach new AT
|
||||
- * will result in creation of a new matcher and moving all
|
||||
- * the rules to this matcher.
|
||||
+ * When the limit is reached, a larger buffer is allocated for the ATs.
|
||||
*/
|
||||
#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8
|
||||
|
||||
@@ -23,10 +21,11 @@
|
||||
struct mlx5hws_bwc_matcher {
|
||||
struct mlx5hws_matcher *matcher;
|
||||
struct mlx5hws_match_template *mt;
|
||||
- struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM];
|
||||
- u32 priority;
|
||||
+ struct mlx5hws_action_template **at;
|
||||
u8 num_of_at;
|
||||
+ u8 size_of_at_array;
|
||||
u8 size_log;
|
||||
+ u32 priority;
|
||||
atomic_t num_of_rules;
|
||||
struct list_head *rules;
|
||||
};
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index b61864b32053..37a4497048a6 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -905,18 +905,48 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ void *p;
|
||||
+
|
||||
+ if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ matcher->size_of_at_array *= 2;
|
||||
+ p = krealloc(matcher->at,
|
||||
+ matcher->size_of_at_array * sizeof(*matcher->at),
|
||||
+ __GFP_ZERO | GFP_KERNEL);
|
||||
+ if (!p) {
|
||||
+ matcher->size_of_at_array /= 2;
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
+ matcher->at = p;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
- struct mlx5hws_action_template *at)
|
||||
+ struct mlx5hws_action_template *at,
|
||||
+ bool *need_rehash)
|
||||
{
|
||||
bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
u32 required_stes;
|
||||
int ret;
|
||||
|
||||
- if (!matcher->attr.max_num_of_at_attach) {
|
||||
- mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n",
|
||||
- matcher->num_of_at);
|
||||
- return -EOPNOTSUPP;
|
||||
+ *need_rehash = false;
|
||||
+
|
||||
+ if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) {
|
||||
+ ret = hws_matcher_grow_at_array(matcher);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (matcher->col_matcher) {
|
||||
+ ret = hws_matcher_grow_at_array(matcher->col_matcher);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ }
|
||||
}
|
||||
|
||||
ret = hws_matcher_check_and_process_at(matcher, at);
|
||||
@@ -927,12 +957,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
if (matcher->action_ste.max_stes < required_stes) {
|
||||
mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n",
|
||||
required_stes, matcher->action_ste.max_stes);
|
||||
- return -ENOMEM;
|
||||
+ *need_rehash = true;
|
||||
}
|
||||
|
||||
matcher->at[matcher->num_of_at] = *at;
|
||||
matcher->num_of_at += 1;
|
||||
- matcher->attr.max_num_of_at_attach -= 1;
|
||||
|
||||
if (matcher->col_matcher)
|
||||
matcher->col_matcher->num_of_at = matcher->num_of_at;
|
||||
@@ -960,8 +989,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher,
|
||||
if (!matcher->mt)
|
||||
return -ENOMEM;
|
||||
|
||||
- matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach,
|
||||
- sizeof(*matcher->at),
|
||||
+ matcher->size_of_at_array =
|
||||
+ num_of_at + matcher->attr.max_num_of_at_attach;
|
||||
+ matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at),
|
||||
GFP_KERNEL);
|
||||
if (!matcher->at) {
|
||||
mlx5hws_err(ctx, "Failed to allocate action template array\n");
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
index 020de70270c5..20b32012c418 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
@@ -23,6 +23,9 @@
|
||||
*/
|
||||
#define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1
|
||||
|
||||
+/* Maximum number of action templates that can be attached to a matcher. */
|
||||
+#define MLX5HWS_MATCHER_MAX_AT 128
|
||||
+
|
||||
enum mlx5hws_matcher_offset {
|
||||
MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12,
|
||||
MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13,
|
||||
@@ -72,6 +75,7 @@ struct mlx5hws_matcher {
|
||||
struct mlx5hws_match_template *mt;
|
||||
struct mlx5hws_action_template *at;
|
||||
u8 num_of_at;
|
||||
+ u8 size_of_at_array;
|
||||
u8 num_of_mt;
|
||||
/* enum mlx5hws_matcher_flags */
|
||||
u8 flags;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
index 5121951f2778..8ed8a715a2eb 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
@@ -399,11 +399,14 @@ int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher);
|
||||
*
|
||||
* @matcher: Matcher to attach the action template to.
|
||||
* @at: Action template to be attached to the matcher.
|
||||
+ * @need_rehash: Output parameter that tells callers if the matcher needs to be
|
||||
+ * rehashed.
|
||||
*
|
||||
* Return: Zero on success, non-zero otherwise.
|
||||
*/
|
||||
int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
- struct mlx5hws_action_template *at);
|
||||
+ struct mlx5hws_action_template *at,
|
||||
+ bool *need_rehash);
|
||||
|
||||
/**
|
||||
* mlx5hws_matcher_resize_set_target - Link two matchers and enable moving rules.
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
178
SOURCES/1347-net-mlx5-hws-remove-unused-element-array.patch
Normal file
178
SOURCES/1347-net-mlx5-hws-remove-unused-element-array.patch
Normal file
@ -0,0 +1,178 @@
|
||||
From f6b3ae9e3b84ce0d94c00565a0321eb0a1502cee Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:58 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Remove unused element array
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b2ae16214ffeda3e1c25223eebe19f85b0876181
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:32 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Remove unused element array
|
||||
|
||||
Remove the array of elements wrapped in a struct because in reality only
|
||||
the first element was ever used.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-3-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
index 50a81d360bb2..35ed9bee06a6 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
@@ -293,7 +293,7 @@ static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool,
|
||||
}
|
||||
|
||||
static struct mlx5hws_pool_elements *
|
||||
-hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx)
|
||||
+hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order)
|
||||
{
|
||||
struct mlx5hws_pool_elements *elem;
|
||||
u32 alloc_size;
|
||||
@@ -311,21 +311,21 @@ hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx)
|
||||
elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order);
|
||||
if (!elem->bitmap) {
|
||||
mlx5hws_err(pool->ctx,
|
||||
- "Failed to create bitmap type: %d: size %d index: %d\n",
|
||||
- pool->type, alloc_size, idx);
|
||||
+ "Failed to create bitmap type: %d: size %d\n",
|
||||
+ pool->type, alloc_size);
|
||||
goto free_elem;
|
||||
}
|
||||
|
||||
elem->log_size = alloc_size - order;
|
||||
}
|
||||
|
||||
- if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
|
||||
- pool->type, alloc_size, idx);
|
||||
+ if (hws_pool_create_resource_on_index(pool, alloc_size, 0)) {
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
+ pool->type, alloc_size);
|
||||
goto free_db;
|
||||
}
|
||||
|
||||
- pool->db.element_manager->elements[idx] = elem;
|
||||
+ pool->db.element = elem;
|
||||
|
||||
return elem;
|
||||
|
||||
@@ -359,9 +359,9 @@ hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
{
|
||||
struct mlx5hws_pool_elements *elem;
|
||||
|
||||
- elem = pool->db.element_manager->elements[0];
|
||||
+ elem = pool->db.element;
|
||||
if (!elem)
|
||||
- elem = hws_pool_element_create_new_elem(pool, order, 0);
|
||||
+ elem = hws_pool_element_create_new_elem(pool, order);
|
||||
if (!elem)
|
||||
goto err_no_elem;
|
||||
|
||||
@@ -451,16 +451,14 @@ static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_elements *elem,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static void
|
||||
+hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
|
||||
+ struct mlx5hws_pool_elements *elem)
|
||||
{
|
||||
- if (unlikely(!pool->resource[chunk->resource_idx]))
|
||||
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
|
||||
-
|
||||
- hws_pool_resource_free(pool, chunk->resource_idx);
|
||||
+ hws_pool_resource_free(pool, 0);
|
||||
+ bitmap_free(elem->bitmap);
|
||||
kfree(elem);
|
||||
- pool->db.element_manager->elements[chunk->resource_idx] = NULL;
|
||||
+ pool->db.element = NULL;
|
||||
}
|
||||
|
||||
static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
@@ -471,7 +469,7 @@ static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
if (unlikely(chunk->resource_idx))
|
||||
pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
|
||||
|
||||
- elem = pool->db.element_manager->elements[chunk->resource_idx];
|
||||
+ elem = pool->db.element;
|
||||
if (!elem) {
|
||||
mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx);
|
||||
return;
|
||||
@@ -483,7 +481,7 @@ static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
|
||||
if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE &&
|
||||
!elem->num_of_elements)
|
||||
- hws_onesize_element_db_destroy_element(pool, elem, chunk);
|
||||
+ hws_onesize_element_db_destroy_element(pool, elem);
|
||||
}
|
||||
|
||||
static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
@@ -504,18 +502,13 @@ static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
|
||||
static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- struct mlx5hws_pool_elements *elem;
|
||||
- int i;
|
||||
+ struct mlx5hws_pool_elements *elem = pool->db.element;
|
||||
|
||||
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
|
||||
- elem = pool->db.element_manager->elements[i];
|
||||
- if (elem) {
|
||||
- bitmap_free(elem->bitmap);
|
||||
- kfree(elem);
|
||||
- pool->db.element_manager->elements[i] = NULL;
|
||||
- }
|
||||
+ if (elem) {
|
||||
+ bitmap_free(elem->bitmap);
|
||||
+ kfree(elem);
|
||||
+ pool->db.element = NULL;
|
||||
}
|
||||
- kfree(pool->db.element_manager);
|
||||
}
|
||||
|
||||
/* This memory management works as the following:
|
||||
@@ -526,10 +519,6 @@ static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool)
|
||||
*/
|
||||
static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL);
|
||||
- if (!pool->db.element_manager)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
pool->p_db_uninit = &hws_onesize_element_db_uninit;
|
||||
pool->p_get_chunk = &hws_onesize_element_db_get_chunk;
|
||||
pool->p_put_chunk = &hws_onesize_element_db_put_chunk;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
index 621298b352b2..f4258f83fdbf 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
@@ -87,14 +87,10 @@ struct mlx5hws_pool_elements {
|
||||
bool is_full;
|
||||
};
|
||||
|
||||
-struct mlx5hws_element_manager {
|
||||
- struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ];
|
||||
-};
|
||||
-
|
||||
struct mlx5hws_pool_db {
|
||||
enum mlx5hws_db_type type;
|
||||
union {
|
||||
- struct mlx5hws_element_manager *element_manager;
|
||||
+ struct mlx5hws_pool_elements *element;
|
||||
struct mlx5hws_buddy_manager *buddy_manager;
|
||||
};
|
||||
};
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
700
SOURCES/1348-net-mlx5-hws-make-pool-single-resource.patch
Normal file
700
SOURCES/1348-net-mlx5-hws-make-pool-single-resource.patch
Normal file
@ -0,0 +1,700 @@
|
||||
From 33eec33671fbe0444075b87f15b803b77059993f Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:58 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Make pool single resource
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 38956bea7349ce75c1519b57c27cd97580b4c822
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:33 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Make pool single resource
|
||||
|
||||
The pool implementation claimed to support multiple resources, but this
|
||||
does not really make sense in context. Callers always allocate a single
|
||||
STC or STE chunk of exactly the size provided.
|
||||
|
||||
The code that handled multiple resources was unused (and likely buggy)
|
||||
due to the combination of flags passed by callers.
|
||||
|
||||
Simplify the pool by having it handle a single resource. As a result of
|
||||
this simplification, chunks no longer contain a resource offset (there
|
||||
is now only one resource per pool), and the get_base_id functions no
|
||||
longer take a chunk parameter.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-4-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index b5332c54d4fb..781ba8c4f733 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -238,6 +238,7 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx,
|
||||
enum mlx5hws_table_type table_type,
|
||||
bool is_mirror)
|
||||
{
|
||||
+ struct mlx5hws_pool *pool;
|
||||
bool use_fixup = false;
|
||||
u32 fw_tbl_type;
|
||||
u32 base_id;
|
||||
@@ -253,13 +254,11 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx,
|
||||
use_fixup = true;
|
||||
break;
|
||||
}
|
||||
+ pool = stc_attr->ste_table.ste_pool;
|
||||
if (!is_mirror)
|
||||
- base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool,
|
||||
- &stc_attr->ste_table.ste);
|
||||
+ base_id = mlx5hws_pool_get_base_id(pool);
|
||||
else
|
||||
- base_id =
|
||||
- mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool,
|
||||
- &stc_attr->ste_table.ste);
|
||||
+ base_id = mlx5hws_pool_get_base_mirror_id(pool);
|
||||
|
||||
*fixup_stc_attr = *stc_attr;
|
||||
fixup_stc_attr->ste_table.ste_obj_id = base_id;
|
||||
@@ -337,7 +336,7 @@ __must_hold(&ctx->ctrl_lock)
|
||||
if (!mlx5hws_context_cap_dynamic_reparse(ctx))
|
||||
stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
|
||||
|
||||
- obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
|
||||
+ obj_0_id = mlx5hws_pool_get_base_id(stc_pool);
|
||||
|
||||
/* According to table/action limitation change the stc_attr */
|
||||
use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false);
|
||||
@@ -353,7 +352,7 @@ __must_hold(&ctx->ctrl_lock)
|
||||
if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
u32 obj_1_id;
|
||||
|
||||
- obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
|
||||
+ obj_1_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
|
||||
|
||||
use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr,
|
||||
&fixup_stc_attr,
|
||||
@@ -393,11 +392,11 @@ __must_hold(&ctx->ctrl_lock)
|
||||
stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
|
||||
stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
|
||||
stc_attr.stc_offset = stc->offset;
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(stc_pool);
|
||||
mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
|
||||
|
||||
if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
|
||||
mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
|
||||
}
|
||||
|
||||
@@ -1581,7 +1580,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
u32 miss_ft_id)
|
||||
{
|
||||
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
|
||||
- struct mlx5hws_action_default_stc *default_stc;
|
||||
struct mlx5hws_matcher_action_ste *table_ste;
|
||||
struct mlx5hws_pool_attr pool_attr = {0};
|
||||
struct mlx5hws_pool *ste_pool, *stc_pool;
|
||||
@@ -1629,7 +1627,7 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
rtc_attr.fw_gen_wqe = true;
|
||||
rtc_attr.is_scnd_range = true;
|
||||
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
|
||||
rtc_attr.pd = ctx->pd_num;
|
||||
rtc_attr.ste_base = obj_id;
|
||||
@@ -1639,8 +1637,7 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
|
||||
/* STC is a single resource (obj_id), use any STC for the ID */
|
||||
stc_pool = ctx->stc_pool;
|
||||
- default_stc = ctx->common_res.default_stc;
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
|
||||
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
|
||||
@@ -1650,11 +1647,11 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
}
|
||||
|
||||
/* Create mirror RTC */
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
rtc_attr.ste_base = obj_id;
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true);
|
||||
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
|
||||
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
index 696275fd0ce2..3491408c5d84 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
@@ -118,7 +118,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
|
||||
{
|
||||
enum mlx5hws_table_type tbl_type = matcher->tbl->type;
|
||||
struct mlx5hws_cmd_ft_query_attr ft_attr = {0};
|
||||
- struct mlx5hws_pool_chunk *ste;
|
||||
struct mlx5hws_pool *ste_pool;
|
||||
u64 icm_addr_0 = 0;
|
||||
u64 icm_addr_1 = 0;
|
||||
@@ -134,12 +133,11 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
|
||||
matcher->end_ft_id,
|
||||
matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0);
|
||||
|
||||
- ste = &matcher->match_ste.ste;
|
||||
ste_pool = matcher->match_ste.pool;
|
||||
if (ste_pool) {
|
||||
- ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
|
||||
+ ste_0_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
|
||||
+ ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
}
|
||||
|
||||
seq_printf(f, ",%d,%d,%d,%d",
|
||||
@@ -148,12 +146,11 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
|
||||
matcher->match_ste.rtc_1_id,
|
||||
(int)ste_1_id);
|
||||
|
||||
- ste = &matcher->action_ste.ste;
|
||||
ste_pool = matcher->action_ste.pool;
|
||||
if (ste_pool) {
|
||||
- ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
|
||||
+ ste_0_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
|
||||
+ ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
else
|
||||
ste_1_id = -1;
|
||||
} else {
|
||||
@@ -387,14 +384,17 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context
|
||||
if (!stc_pool)
|
||||
return 0;
|
||||
|
||||
- if (stc_pool->resource[0]) {
|
||||
- ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]);
|
||||
+ if (stc_pool->resource) {
|
||||
+ ret = hws_debug_dump_context_stc_resource(f, ctx,
|
||||
+ stc_pool->resource);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
- if (stc_pool->mirror_resource[0]) {
|
||||
- ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]);
|
||||
+ if (stc_pool->mirror_resource) {
|
||||
+ struct mlx5hws_pool_resource *res = stc_pool->mirror_resource;
|
||||
+
|
||||
+ ret = hws_debug_dump_context_stc_resource(f, ctx, res);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 37a4497048a6..59b14db427b4 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -223,7 +223,6 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
|
||||
struct mlx5hws_match_template *mt = matcher->mt;
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
- struct mlx5hws_action_default_stc *default_stc;
|
||||
struct mlx5hws_matcher_action_ste *action_ste;
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
struct mlx5hws_pool *ste_pool, *stc_pool;
|
||||
@@ -305,7 +304,7 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
|
||||
rtc_attr.pd = ctx->pd_num;
|
||||
rtc_attr.ste_base = obj_id;
|
||||
@@ -316,8 +315,7 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
|
||||
/* STC is a single resource (obj_id), use any STC for the ID */
|
||||
stc_pool = ctx->stc_pool;
|
||||
- default_stc = ctx->common_res.default_stc;
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
|
||||
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
|
||||
@@ -328,11 +326,11 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
}
|
||||
|
||||
if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
rtc_attr.ste_base = obj_id;
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true);
|
||||
|
||||
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true);
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
index 35ed9bee06a6..0de03e17624c 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
@@ -20,15 +20,14 @@ static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource)
|
||||
kfree(resource);
|
||||
}
|
||||
|
||||
-static void hws_pool_resource_free(struct mlx5hws_pool *pool,
|
||||
- int resource_idx)
|
||||
+static void hws_pool_resource_free(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- hws_pool_free_one_resource(pool->resource[resource_idx]);
|
||||
- pool->resource[resource_idx] = NULL;
|
||||
+ hws_pool_free_one_resource(pool->resource);
|
||||
+ pool->resource = NULL;
|
||||
|
||||
if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
- hws_pool_free_one_resource(pool->mirror_resource[resource_idx]);
|
||||
- pool->mirror_resource[resource_idx] = NULL;
|
||||
+ hws_pool_free_one_resource(pool->mirror_resource);
|
||||
+ pool->mirror_resource = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +77,7 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range,
|
||||
}
|
||||
|
||||
static int
|
||||
-hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx)
|
||||
+hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range)
|
||||
{
|
||||
struct mlx5hws_pool_resource *resource;
|
||||
u32 fw_ft_type, opt_log_range;
|
||||
@@ -91,7 +90,7 @@ hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- pool->resource[idx] = resource;
|
||||
+ pool->resource = resource;
|
||||
|
||||
if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
struct mlx5hws_pool_resource *mirror_resource;
|
||||
@@ -102,10 +101,10 @@ hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx)
|
||||
if (!mirror_resource) {
|
||||
mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n");
|
||||
hws_pool_free_one_resource(resource);
|
||||
- pool->resource[idx] = NULL;
|
||||
+ pool->resource = NULL;
|
||||
return -EINVAL;
|
||||
}
|
||||
- pool->mirror_resource[idx] = mirror_resource;
|
||||
+ pool->mirror_resource = mirror_resource;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -129,9 +128,9 @@ static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
{
|
||||
struct mlx5hws_buddy_mem *buddy;
|
||||
|
||||
- buddy = pool->db.buddy_manager->buddies[chunk->resource_idx];
|
||||
+ buddy = pool->db.buddy;
|
||||
if (!buddy) {
|
||||
- mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx);
|
||||
+ mlx5hws_err(pool->ctx, "Bad buddy state\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -139,86 +138,50 @@ static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
}
|
||||
|
||||
static struct mlx5hws_buddy_mem *
|
||||
-hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx,
|
||||
- u32 order, bool *is_new_buddy)
|
||||
+hws_pool_buddy_get_buddy(struct mlx5hws_pool *pool, u32 order)
|
||||
{
|
||||
static struct mlx5hws_buddy_mem *buddy;
|
||||
u32 new_buddy_size;
|
||||
|
||||
- buddy = pool->db.buddy_manager->buddies[idx];
|
||||
+ buddy = pool->db.buddy;
|
||||
if (buddy)
|
||||
return buddy;
|
||||
|
||||
new_buddy_size = max(pool->alloc_log_sz, order);
|
||||
- *is_new_buddy = true;
|
||||
buddy = mlx5hws_buddy_create(new_buddy_size);
|
||||
if (!buddy) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n",
|
||||
- new_buddy_size, idx);
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create buddy order: %d\n",
|
||||
+ new_buddy_size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
|
||||
- pool->type, new_buddy_size, idx);
|
||||
+ if (hws_pool_resource_alloc(pool, new_buddy_size) != 0) {
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
+ pool->type, new_buddy_size);
|
||||
mlx5hws_buddy_cleanup(buddy);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
- pool->db.buddy_manager->buddies[idx] = buddy;
|
||||
+ pool->db.buddy = buddy;
|
||||
|
||||
return buddy;
|
||||
}
|
||||
|
||||
static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
|
||||
int order,
|
||||
- u32 *buddy_idx,
|
||||
int *seg)
|
||||
{
|
||||
struct mlx5hws_buddy_mem *buddy;
|
||||
- bool new_mem = false;
|
||||
- int ret = 0;
|
||||
- int i;
|
||||
-
|
||||
- *seg = -1;
|
||||
-
|
||||
- /* Find the next free place from the buddy array */
|
||||
- while (*seg < 0) {
|
||||
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
|
||||
- buddy = hws_pool_buddy_get_next_buddy(pool, i,
|
||||
- order,
|
||||
- &new_mem);
|
||||
- if (!buddy) {
|
||||
- ret = -ENOMEM;
|
||||
- goto out;
|
||||
- }
|
||||
-
|
||||
- *seg = mlx5hws_buddy_alloc_mem(buddy, order);
|
||||
- if (*seg >= 0)
|
||||
- goto found;
|
||||
-
|
||||
- if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) {
|
||||
- mlx5hws_err(pool->ctx,
|
||||
- "Fail to allocate seg for one resource pool\n");
|
||||
- ret = -ENOMEM;
|
||||
- goto out;
|
||||
- }
|
||||
-
|
||||
- if (new_mem) {
|
||||
- /* We have new memory pool, should be place for us */
|
||||
- mlx5hws_err(pool->ctx,
|
||||
- "No memory for order: %d with buddy no: %d\n",
|
||||
- order, i);
|
||||
- ret = -ENOMEM;
|
||||
- goto out;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
|
||||
-found:
|
||||
- *buddy_idx = i;
|
||||
-out:
|
||||
- return ret;
|
||||
+ buddy = hws_pool_buddy_get_buddy(pool, order);
|
||||
+ if (!buddy)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ *seg = mlx5hws_buddy_alloc_mem(buddy, order);
|
||||
+ if (*seg >= 0)
|
||||
+ return 0;
|
||||
+
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
@@ -226,9 +189,7 @@ static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
- /* Go over the buddies and find next free slot */
|
||||
ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->resource_idx,
|
||||
&chunk->offset);
|
||||
if (ret)
|
||||
mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
@@ -240,33 +201,21 @@ static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool)
|
||||
{
|
||||
struct mlx5hws_buddy_mem *buddy;
|
||||
- int i;
|
||||
-
|
||||
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
|
||||
- buddy = pool->db.buddy_manager->buddies[i];
|
||||
- if (buddy) {
|
||||
- mlx5hws_buddy_cleanup(buddy);
|
||||
- kfree(buddy);
|
||||
- pool->db.buddy_manager->buddies[i] = NULL;
|
||||
- }
|
||||
- }
|
||||
|
||||
- kfree(pool->db.buddy_manager);
|
||||
+ buddy = pool->db.buddy;
|
||||
+ if (buddy) {
|
||||
+ mlx5hws_buddy_cleanup(buddy);
|
||||
+ kfree(buddy);
|
||||
+ pool->db.buddy = NULL;
|
||||
+ }
|
||||
}
|
||||
|
||||
static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
|
||||
{
|
||||
- pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL);
|
||||
- if (!pool->db.buddy_manager)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) {
|
||||
- bool new_buddy;
|
||||
-
|
||||
- if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) {
|
||||
+ if (!hws_pool_buddy_get_buddy(pool, log_range)) {
|
||||
mlx5hws_err(pool->ctx,
|
||||
"Failed allocating memory on create log_sz: %d\n", log_range);
|
||||
- kfree(pool->db.buddy_manager);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
@@ -278,14 +227,13 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool,
|
||||
- u32 alloc_size, int idx)
|
||||
+static int hws_pool_create_resource(struct mlx5hws_pool *pool, u32 alloc_size)
|
||||
{
|
||||
- int ret = hws_pool_resource_alloc(pool, alloc_size, idx);
|
||||
+ int ret = hws_pool_resource_alloc(pool, alloc_size);
|
||||
|
||||
if (ret) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
|
||||
- pool->type, alloc_size, idx);
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
+ pool->type, alloc_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -319,7 +267,7 @@ hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order)
|
||||
elem->log_size = alloc_size - order;
|
||||
}
|
||||
|
||||
- if (hws_pool_create_resource_on_index(pool, alloc_size, 0)) {
|
||||
+ if (hws_pool_create_resource(pool, alloc_size)) {
|
||||
mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
pool->type, alloc_size);
|
||||
goto free_db;
|
||||
@@ -355,7 +303,7 @@ static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *se
|
||||
|
||||
static int
|
||||
hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
- u32 *idx, int *seg)
|
||||
+ int *seg)
|
||||
{
|
||||
struct mlx5hws_pool_elements *elem;
|
||||
|
||||
@@ -370,7 +318,6 @@ hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- *idx = 0;
|
||||
elem->num_of_elements++;
|
||||
return 0;
|
||||
|
||||
@@ -379,21 +326,17 @@ hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
-static int
|
||||
-hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
- u32 *idx, int *seg)
|
||||
+static int hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool,
|
||||
+ u32 order, int *seg)
|
||||
{
|
||||
- int ret, i;
|
||||
-
|
||||
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
|
||||
- if (!pool->resource[i]) {
|
||||
- ret = hws_pool_create_resource_on_index(pool, order, i);
|
||||
- if (ret)
|
||||
- goto err_no_res;
|
||||
- *idx = i;
|
||||
- *seg = 0; /* One memory slot in that element */
|
||||
- return 0;
|
||||
- }
|
||||
+ int ret;
|
||||
+
|
||||
+ if (!pool->resource) {
|
||||
+ ret = hws_pool_create_resource(pool, order);
|
||||
+ if (ret)
|
||||
+ goto err_no_res;
|
||||
+ *seg = 0; /* One memory slot in that element */
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
|
||||
@@ -409,9 +352,7 @@ static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
{
|
||||
int ret;
|
||||
|
||||
- /* Go over all memory elements and find/allocate free slot */
|
||||
ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->resource_idx,
|
||||
&chunk->offset);
|
||||
if (ret)
|
||||
mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
@@ -423,11 +364,8 @@ static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
struct mlx5hws_pool_chunk *chunk)
|
||||
{
|
||||
- if (unlikely(!pool->resource[chunk->resource_idx]))
|
||||
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
|
||||
-
|
||||
if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE)
|
||||
- hws_pool_resource_free(pool, chunk->resource_idx);
|
||||
+ hws_pool_resource_free(pool);
|
||||
}
|
||||
|
||||
static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool)
|
||||
@@ -455,7 +393,7 @@ static void
|
||||
hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
|
||||
struct mlx5hws_pool_elements *elem)
|
||||
{
|
||||
- hws_pool_resource_free(pool, 0);
|
||||
+ hws_pool_resource_free(pool);
|
||||
bitmap_free(elem->bitmap);
|
||||
kfree(elem);
|
||||
pool->db.element = NULL;
|
||||
@@ -466,12 +404,9 @@ static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
{
|
||||
struct mlx5hws_pool_elements *elem;
|
||||
|
||||
- if (unlikely(chunk->resource_idx))
|
||||
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
|
||||
-
|
||||
elem = pool->db.element;
|
||||
if (!elem) {
|
||||
- mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx);
|
||||
+ mlx5hws_err(pool->ctx, "Pool element was not allocated\n");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -489,9 +424,7 @@ static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
- /* Go over all memory elements and find/allocate free slot */
|
||||
ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->resource_idx,
|
||||
&chunk->offset);
|
||||
if (ret)
|
||||
mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
@@ -614,13 +547,10 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_
|
||||
|
||||
int mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- int i;
|
||||
-
|
||||
mutex_destroy(&pool->lock);
|
||||
|
||||
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++)
|
||||
- if (pool->resource[i])
|
||||
- hws_pool_resource_free(pool, i);
|
||||
+ if (pool->resource)
|
||||
+ hws_pool_resource_free(pool);
|
||||
|
||||
hws_pool_db_unint(pool);
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
index f4258f83fdbf..112a61cd2997 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
@@ -6,16 +6,12 @@
|
||||
|
||||
#define MLX5HWS_POOL_STC_LOG_SZ 15
|
||||
|
||||
-#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100
|
||||
-
|
||||
enum mlx5hws_pool_type {
|
||||
MLX5HWS_POOL_TYPE_STE,
|
||||
MLX5HWS_POOL_TYPE_STC,
|
||||
};
|
||||
|
||||
struct mlx5hws_pool_chunk {
|
||||
- u32 resource_idx;
|
||||
- /* Internal offset, relative to base index */
|
||||
int offset;
|
||||
int order;
|
||||
};
|
||||
@@ -72,14 +68,10 @@ enum mlx5hws_db_type {
|
||||
MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE,
|
||||
/* One resource only, all the elements are with same one size */
|
||||
MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE,
|
||||
- /* Many resources, the memory allocated with buddy mechanism */
|
||||
+ /* Entries are managed using a buddy mechanism. */
|
||||
MLX5HWS_POOL_DB_TYPE_BUDDY,
|
||||
};
|
||||
|
||||
-struct mlx5hws_buddy_manager {
|
||||
- struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ];
|
||||
-};
|
||||
-
|
||||
struct mlx5hws_pool_elements {
|
||||
u32 num_of_elements;
|
||||
unsigned long *bitmap;
|
||||
@@ -91,7 +83,7 @@ struct mlx5hws_pool_db {
|
||||
enum mlx5hws_db_type type;
|
||||
union {
|
||||
struct mlx5hws_pool_elements *element;
|
||||
- struct mlx5hws_buddy_manager *buddy_manager;
|
||||
+ struct mlx5hws_buddy_mem *buddy;
|
||||
};
|
||||
};
|
||||
|
||||
@@ -109,8 +101,8 @@ struct mlx5hws_pool {
|
||||
size_t alloc_log_sz;
|
||||
enum mlx5hws_table_type tbl_type;
|
||||
enum mlx5hws_pool_optimize opt_type;
|
||||
- struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
|
||||
- struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
|
||||
+ struct mlx5hws_pool_resource *resource;
|
||||
+ struct mlx5hws_pool_resource *mirror_resource;
|
||||
/* DB */
|
||||
struct mlx5hws_pool_db db;
|
||||
/* Functions */
|
||||
@@ -131,17 +123,13 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
|
||||
void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
|
||||
struct mlx5hws_pool_chunk *chunk);
|
||||
|
||||
-static inline u32
|
||||
-mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static inline u32 mlx5hws_pool_get_base_id(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- return pool->resource[chunk->resource_idx]->base_id;
|
||||
+ return pool->resource->base_id;
|
||||
}
|
||||
|
||||
-static inline u32
|
||||
-mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- return pool->mirror_resource[chunk->resource_idx]->base_id;
|
||||
+ return pool->mirror_resource->base_id;
|
||||
}
|
||||
#endif /* MLX5HWS_POOL_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
760
SOURCES/1349-net-mlx5-hws-refactor-pool-implementation.patch
Normal file
760
SOURCES/1349-net-mlx5-hws-refactor-pool-implementation.patch
Normal file
@ -0,0 +1,760 @@
|
||||
From ac067697d1ab53a4dceeec03736f9b8bf2363665 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:59 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Refactor pool implementation
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit d171ce3d988868bed9dc3c9eeb8428f87dd9ac85
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:34 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Refactor pool implementation
|
||||
|
||||
Refactor the pool implementation to remove unused flags and clarify its
|
||||
usage. A pool represents a single range of STEs or STCs which are
|
||||
allocated at pool creation time.
|
||||
|
||||
Pools are used under three patterns:
|
||||
|
||||
1. STCs are allocated one at a time from a global pool using a bitmap
|
||||
based implementation.
|
||||
|
||||
2. Action STEs are allocated in power-of-two blocks using a buddy
|
||||
algorithm.
|
||||
|
||||
3. Match STEs do not use allocation, since insertion into these tables
|
||||
is based on hashes or direct addressing. In such cases we use a pool
|
||||
only to create the STE range.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-5-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index 781ba8c4f733..39904b337b81 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1602,7 +1602,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
|
||||
pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
|
||||
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
|
||||
pool_attr.alloc_log_sz = 1;
|
||||
table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
|
||||
if (!table_ste->pool) {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
index 9cda2774fd64..b7cb736b74d7 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
@@ -34,7 +34,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx)
|
||||
|
||||
/* Create an STC pool per FT type */
|
||||
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC;
|
||||
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL;
|
||||
max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max);
|
||||
pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran);
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 59b14db427b4..95d31fd6c976 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -265,14 +265,6 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_attr.match_definer_0 = ctx->caps->linear_match_definer;
|
||||
}
|
||||
}
|
||||
-
|
||||
- /* Match pool requires implicit allocation */
|
||||
- ret = mlx5hws_pool_chunk_alloc(ste_pool, ste);
|
||||
- if (ret) {
|
||||
- mlx5hws_err(ctx, "Failed to allocate STE for %s RTC",
|
||||
- hws_matcher_rtc_type_to_str(rtc_type));
|
||||
- return ret;
|
||||
- }
|
||||
break;
|
||||
|
||||
case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
|
||||
@@ -357,23 +349,17 @@ static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
|
||||
{
|
||||
struct mlx5hws_matcher_action_ste *action_ste;
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
- struct mlx5hws_pool_chunk *ste;
|
||||
- struct mlx5hws_pool *ste_pool;
|
||||
u32 rtc_0_id, rtc_1_id;
|
||||
|
||||
switch (rtc_type) {
|
||||
case HWS_MATCHER_RTC_TYPE_MATCH:
|
||||
rtc_0_id = matcher->match_ste.rtc_0_id;
|
||||
rtc_1_id = matcher->match_ste.rtc_1_id;
|
||||
- ste_pool = matcher->match_ste.pool;
|
||||
- ste = &matcher->match_ste.ste;
|
||||
break;
|
||||
case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
|
||||
action_ste = &matcher->action_ste;
|
||||
rtc_0_id = action_ste->rtc_0_id;
|
||||
rtc_1_id = action_ste->rtc_1_id;
|
||||
- ste_pool = action_ste->pool;
|
||||
- ste = &action_ste->ste;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
@@ -383,8 +369,6 @@ static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
|
||||
mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id);
|
||||
|
||||
mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id);
|
||||
- if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
|
||||
- mlx5hws_pool_chunk_free(ste_pool, ste);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -557,7 +541,7 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
|
||||
/* Allocate action STE mempool */
|
||||
pool_attr.table_type = tbl->type;
|
||||
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
|
||||
+ pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY;
|
||||
/* Pool size is similar to action RTC size */
|
||||
pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
|
||||
matcher->attr.table.sz_row_log +
|
||||
@@ -636,7 +620,6 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
|
||||
/* Create an STE pool per matcher*/
|
||||
pool_attr.table_type = matcher->tbl->type;
|
||||
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL;
|
||||
pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log +
|
||||
matcher->attr.table.sz_row_log;
|
||||
hws_matcher_set_pool_attr(&pool_attr, matcher);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
index 0de03e17624c..270b333faab3 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
@@ -60,10 +60,8 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range,
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
- if (ret) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n");
|
||||
+ if (ret)
|
||||
goto free_resource;
|
||||
- }
|
||||
|
||||
resource->pool = pool;
|
||||
resource->range = 1 << log_range;
|
||||
@@ -76,17 +74,17 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static int
|
||||
-hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range)
|
||||
+static int hws_pool_resource_alloc(struct mlx5hws_pool *pool)
|
||||
{
|
||||
struct mlx5hws_pool_resource *resource;
|
||||
u32 fw_ft_type, opt_log_range;
|
||||
|
||||
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false);
|
||||
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range;
|
||||
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ?
|
||||
+ 0 : pool->alloc_log_sz;
|
||||
resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
|
||||
if (!resource) {
|
||||
- mlx5hws_err(pool->ctx, "Failed allocating resource\n");
|
||||
+ mlx5hws_err(pool->ctx, "Failed to allocate resource\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -96,10 +94,11 @@ hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range)
|
||||
struct mlx5hws_pool_resource *mirror_resource;
|
||||
|
||||
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true);
|
||||
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range;
|
||||
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ?
|
||||
+ 0 : pool->alloc_log_sz;
|
||||
mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
|
||||
if (!mirror_resource) {
|
||||
- mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n");
|
||||
+ mlx5hws_err(pool->ctx, "Failed to allocate mirrored resource\n");
|
||||
hws_pool_free_one_resource(resource);
|
||||
pool->resource = NULL;
|
||||
return -EINVAL;
|
||||
@@ -110,92 +109,58 @@ hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range)
|
||||
-{
|
||||
- unsigned long *cur_bmp;
|
||||
-
|
||||
- cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL);
|
||||
- if (!cur_bmp)
|
||||
- return NULL;
|
||||
-
|
||||
- bitmap_fill(cur_bmp, 1 << log_range);
|
||||
-
|
||||
- return cur_bmp;
|
||||
-}
|
||||
-
|
||||
-static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static int hws_pool_buddy_init(struct mlx5hws_pool *pool)
|
||||
{
|
||||
struct mlx5hws_buddy_mem *buddy;
|
||||
|
||||
- buddy = pool->db.buddy;
|
||||
+ buddy = mlx5hws_buddy_create(pool->alloc_log_sz);
|
||||
if (!buddy) {
|
||||
- mlx5hws_err(pool->ctx, "Bad buddy state\n");
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order);
|
||||
-}
|
||||
-
|
||||
-static struct mlx5hws_buddy_mem *
|
||||
-hws_pool_buddy_get_buddy(struct mlx5hws_pool *pool, u32 order)
|
||||
-{
|
||||
- static struct mlx5hws_buddy_mem *buddy;
|
||||
- u32 new_buddy_size;
|
||||
-
|
||||
- buddy = pool->db.buddy;
|
||||
- if (buddy)
|
||||
- return buddy;
|
||||
-
|
||||
- new_buddy_size = max(pool->alloc_log_sz, order);
|
||||
- buddy = mlx5hws_buddy_create(new_buddy_size);
|
||||
- if (!buddy) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create buddy order: %d\n",
|
||||
- new_buddy_size);
|
||||
- return NULL;
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create buddy order: %zu\n",
|
||||
+ pool->alloc_log_sz);
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
- if (hws_pool_resource_alloc(pool, new_buddy_size) != 0) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
- pool->type, new_buddy_size);
|
||||
+ if (hws_pool_resource_alloc(pool) != 0) {
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n",
|
||||
+ pool->type, pool->alloc_log_sz);
|
||||
mlx5hws_buddy_cleanup(buddy);
|
||||
- return NULL;
|
||||
+ return -ENOMEM;
|
||||
}
|
||||
|
||||
pool->db.buddy = buddy;
|
||||
|
||||
- return buddy;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
|
||||
- int order,
|
||||
- int *seg)
|
||||
+static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
+ struct mlx5hws_pool_chunk *chunk)
|
||||
{
|
||||
- struct mlx5hws_buddy_mem *buddy;
|
||||
+ struct mlx5hws_buddy_mem *buddy = pool->db.buddy;
|
||||
|
||||
- buddy = hws_pool_buddy_get_buddy(pool, order);
|
||||
- if (!buddy)
|
||||
- return -ENOMEM;
|
||||
+ if (!buddy) {
|
||||
+ mlx5hws_err(pool->ctx, "Bad buddy state\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
|
||||
- *seg = mlx5hws_buddy_alloc_mem(buddy, order);
|
||||
- if (*seg >= 0)
|
||||
+ chunk->offset = mlx5hws_buddy_alloc_mem(buddy, chunk->order);
|
||||
+ if (chunk->offset >= 0)
|
||||
return 0;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
-static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
+ struct mlx5hws_pool_chunk *chunk)
|
||||
{
|
||||
- int ret = 0;
|
||||
+ struct mlx5hws_buddy_mem *buddy;
|
||||
|
||||
- ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->offset);
|
||||
- if (ret)
|
||||
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
- chunk->order);
|
||||
+ buddy = pool->db.buddy;
|
||||
+ if (!buddy) {
|
||||
+ mlx5hws_err(pool->ctx, "Bad buddy state\n");
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
- return ret;
|
||||
+ mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order);
|
||||
}
|
||||
|
||||
static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool)
|
||||
@@ -210,15 +175,13 @@ static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool)
|
||||
}
|
||||
}
|
||||
|
||||
-static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
|
||||
+static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) {
|
||||
- if (!hws_pool_buddy_get_buddy(pool, log_range)) {
|
||||
- mlx5hws_err(pool->ctx,
|
||||
- "Failed allocating memory on create log_sz: %d\n", log_range);
|
||||
- return -ENOMEM;
|
||||
- }
|
||||
- }
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = hws_pool_buddy_init(pool);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
pool->p_db_uninit = &hws_pool_buddy_db_uninit;
|
||||
pool->p_get_chunk = &hws_pool_buddy_db_get_chunk;
|
||||
@@ -227,234 +190,105 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hws_pool_create_resource(struct mlx5hws_pool *pool, u32 alloc_size)
|
||||
-{
|
||||
- int ret = hws_pool_resource_alloc(pool, alloc_size);
|
||||
-
|
||||
- if (ret) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
- pool->type, alloc_size);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static struct mlx5hws_pool_elements *
|
||||
-hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order)
|
||||
+static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range)
|
||||
{
|
||||
- struct mlx5hws_pool_elements *elem;
|
||||
- u32 alloc_size;
|
||||
-
|
||||
- alloc_size = pool->alloc_log_sz;
|
||||
+ unsigned long *bitmap;
|
||||
|
||||
- elem = kzalloc(sizeof(*elem), GFP_KERNEL);
|
||||
- if (!elem)
|
||||
+ bitmap = bitmap_zalloc(1 << log_range, GFP_KERNEL);
|
||||
+ if (!bitmap)
|
||||
return NULL;
|
||||
|
||||
- /* Sharing the same resource, also means that all the elements are with size 1 */
|
||||
- if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) &&
|
||||
- !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) {
|
||||
- /* Currently all chunks in size 1 */
|
||||
- elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order);
|
||||
- if (!elem->bitmap) {
|
||||
- mlx5hws_err(pool->ctx,
|
||||
- "Failed to create bitmap type: %d: size %d\n",
|
||||
- pool->type, alloc_size);
|
||||
- goto free_elem;
|
||||
- }
|
||||
-
|
||||
- elem->log_size = alloc_size - order;
|
||||
- }
|
||||
-
|
||||
- if (hws_pool_create_resource(pool, alloc_size)) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d\n",
|
||||
- pool->type, alloc_size);
|
||||
- goto free_db;
|
||||
- }
|
||||
-
|
||||
- pool->db.element = elem;
|
||||
+ bitmap_fill(bitmap, 1 << log_range);
|
||||
|
||||
- return elem;
|
||||
-
|
||||
-free_db:
|
||||
- bitmap_free(elem->bitmap);
|
||||
-free_elem:
|
||||
- kfree(elem);
|
||||
- return NULL;
|
||||
+ return bitmap;
|
||||
}
|
||||
|
||||
-static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg)
|
||||
+static int hws_pool_bitmap_init(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- unsigned int segment, size;
|
||||
+ unsigned long *bitmap;
|
||||
|
||||
- size = 1 << elem->log_size;
|
||||
-
|
||||
- segment = find_first_bit(elem->bitmap, size);
|
||||
- if (segment >= size) {
|
||||
- elem->is_full = true;
|
||||
+ bitmap = hws_pool_create_and_init_bitmap(pool->alloc_log_sz);
|
||||
+ if (!bitmap) {
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create bitmap order: %zu\n",
|
||||
+ pool->alloc_log_sz);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- bitmap_clear(elem->bitmap, segment, 1);
|
||||
- *seg = segment;
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static int
|
||||
-hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
|
||||
- int *seg)
|
||||
-{
|
||||
- struct mlx5hws_pool_elements *elem;
|
||||
-
|
||||
- elem = pool->db.element;
|
||||
- if (!elem)
|
||||
- elem = hws_pool_element_create_new_elem(pool, order);
|
||||
- if (!elem)
|
||||
- goto err_no_elem;
|
||||
-
|
||||
- if (hws_pool_element_find_seg(elem, seg) != 0) {
|
||||
- mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
|
||||
+ if (hws_pool_resource_alloc(pool) != 0) {
|
||||
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %zu\n",
|
||||
+ pool->type, pool->alloc_log_sz);
|
||||
+ bitmap_free(bitmap);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
- elem->num_of_elements++;
|
||||
- return 0;
|
||||
+ pool->db.bitmap = bitmap;
|
||||
|
||||
-err_no_elem:
|
||||
- mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
|
||||
- return -ENOMEM;
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
-static int hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool,
|
||||
- u32 order, int *seg)
|
||||
+static int hws_pool_bitmap_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
+ struct mlx5hws_pool_chunk *chunk)
|
||||
{
|
||||
- int ret;
|
||||
+ unsigned long *bitmap, size;
|
||||
|
||||
- if (!pool->resource) {
|
||||
- ret = hws_pool_create_resource(pool, order);
|
||||
- if (ret)
|
||||
- goto err_no_res;
|
||||
- *seg = 0; /* One memory slot in that element */
|
||||
- return 0;
|
||||
+ if (chunk->order != 0) {
|
||||
+ mlx5hws_err(pool->ctx, "Pool only supports order 0 allocs\n");
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
- mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
|
||||
- return -ENOMEM;
|
||||
-
|
||||
-err_no_res:
|
||||
- mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
|
||||
- return -ENOMEM;
|
||||
-}
|
||||
-
|
||||
-static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->offset);
|
||||
- if (ret)
|
||||
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
- chunk->order);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
+ bitmap = pool->db.bitmap;
|
||||
+ if (!bitmap) {
|
||||
+ mlx5hws_err(pool->ctx, "Bad bitmap state\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
|
||||
-static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
-{
|
||||
- if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE)
|
||||
- hws_pool_resource_free(pool);
|
||||
-}
|
||||
+ size = 1 << pool->alloc_log_sz;
|
||||
|
||||
-static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool)
|
||||
-{
|
||||
- (void)pool;
|
||||
-}
|
||||
+ chunk->offset = find_first_bit(bitmap, size);
|
||||
+ if (chunk->offset >= size)
|
||||
+ return -ENOMEM;
|
||||
|
||||
-/* This memory management works as the following:
|
||||
- * - At start doesn't allocate no mem at all.
|
||||
- * - When new request for chunk arrived:
|
||||
- * allocate resource and give it.
|
||||
- * - When free that chunk:
|
||||
- * the resource is freed.
|
||||
- */
|
||||
-static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool)
|
||||
-{
|
||||
- pool->p_db_uninit = &hws_pool_general_element_db_uninit;
|
||||
- pool->p_get_chunk = &hws_pool_general_element_db_get_chunk;
|
||||
- pool->p_put_chunk = &hws_pool_general_element_db_put_chunk;
|
||||
+ bitmap_clear(bitmap, chunk->offset, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void
|
||||
-hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_elements *elem)
|
||||
-{
|
||||
- hws_pool_resource_free(pool);
|
||||
- bitmap_free(elem->bitmap);
|
||||
- kfree(elem);
|
||||
- pool->db.element = NULL;
|
||||
-}
|
||||
-
|
||||
-static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static void hws_pool_bitmap_db_put_chunk(struct mlx5hws_pool *pool,
|
||||
+ struct mlx5hws_pool_chunk *chunk)
|
||||
{
|
||||
- struct mlx5hws_pool_elements *elem;
|
||||
+ unsigned long *bitmap;
|
||||
|
||||
- elem = pool->db.element;
|
||||
- if (!elem) {
|
||||
- mlx5hws_err(pool->ctx, "Pool element was not allocated\n");
|
||||
+ bitmap = pool->db.bitmap;
|
||||
+ if (!bitmap) {
|
||||
+ mlx5hws_err(pool->ctx, "Bad bitmap state\n");
|
||||
return;
|
||||
}
|
||||
|
||||
- bitmap_set(elem->bitmap, chunk->offset, 1);
|
||||
- elem->is_full = false;
|
||||
- elem->num_of_elements--;
|
||||
-
|
||||
- if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE &&
|
||||
- !elem->num_of_elements)
|
||||
- hws_onesize_element_db_destroy_element(pool, elem);
|
||||
+ bitmap_set(bitmap, chunk->offset, 1);
|
||||
}
|
||||
|
||||
-static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
|
||||
- struct mlx5hws_pool_chunk *chunk)
|
||||
+static void hws_pool_bitmap_db_uninit(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- int ret = 0;
|
||||
-
|
||||
- ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order,
|
||||
- &chunk->offset);
|
||||
- if (ret)
|
||||
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
|
||||
- chunk->order);
|
||||
+ unsigned long *bitmap;
|
||||
|
||||
- return ret;
|
||||
+ bitmap = pool->db.bitmap;
|
||||
+ if (bitmap) {
|
||||
+ bitmap_free(bitmap);
|
||||
+ pool->db.bitmap = NULL;
|
||||
+ }
|
||||
}
|
||||
|
||||
-static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool)
|
||||
+static int hws_pool_bitmap_db_init(struct mlx5hws_pool *pool)
|
||||
{
|
||||
- struct mlx5hws_pool_elements *elem = pool->db.element;
|
||||
+ int ret;
|
||||
|
||||
- if (elem) {
|
||||
- bitmap_free(elem->bitmap);
|
||||
- kfree(elem);
|
||||
- pool->db.element = NULL;
|
||||
- }
|
||||
-}
|
||||
+ ret = hws_pool_bitmap_init(pool);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
|
||||
-/* This memory management works as the following:
|
||||
- * - At start doesn't allocate no mem at all.
|
||||
- * - When new request for chunk arrived:
|
||||
- * aloocate the first and only slot of memory/resource
|
||||
- * when it ended return error.
|
||||
- */
|
||||
-static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool)
|
||||
-{
|
||||
- pool->p_db_uninit = &hws_onesize_element_db_uninit;
|
||||
- pool->p_get_chunk = &hws_onesize_element_db_get_chunk;
|
||||
- pool->p_put_chunk = &hws_onesize_element_db_put_chunk;
|
||||
+ pool->p_db_uninit = &hws_pool_bitmap_db_uninit;
|
||||
+ pool->p_get_chunk = &hws_pool_bitmap_db_get_chunk;
|
||||
+ pool->p_put_chunk = &hws_pool_bitmap_db_put_chunk;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -464,15 +298,14 @@ static int hws_pool_db_init(struct mlx5hws_pool *pool,
|
||||
{
|
||||
int ret;
|
||||
|
||||
- if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE)
|
||||
- ret = hws_pool_general_element_db_init(pool);
|
||||
- else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE)
|
||||
- ret = hws_pool_onesize_element_db_init(pool);
|
||||
+ if (db_type == MLX5HWS_POOL_DB_TYPE_BITMAP)
|
||||
+ ret = hws_pool_bitmap_db_init(pool);
|
||||
else
|
||||
- ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz);
|
||||
+ ret = hws_pool_buddy_db_init(pool);
|
||||
|
||||
if (ret) {
|
||||
- mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret);
|
||||
+ mlx5hws_err(pool->ctx, "Failed to init pool type: %d (ret: %d)\n",
|
||||
+ db_type, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -521,15 +354,10 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_
|
||||
pool->tbl_type = pool_attr->table_type;
|
||||
pool->opt_type = pool_attr->opt_type;
|
||||
|
||||
- /* Support general db */
|
||||
- if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
|
||||
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK))
|
||||
- res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE;
|
||||
- else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
|
||||
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS))
|
||||
- res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE;
|
||||
- else
|
||||
+ if (pool->flags & MLX5HWS_POOL_FLAG_BUDDY)
|
||||
res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY;
|
||||
+ else
|
||||
+ res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP;
|
||||
|
||||
pool->alloc_log_sz = pool_attr->alloc_log_sz;
|
||||
|
||||
@@ -545,7 +373,7 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-int mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
|
||||
+void mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
|
||||
{
|
||||
mutex_destroy(&pool->lock);
|
||||
|
||||
@@ -555,5 +383,4 @@ int mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
|
||||
hws_pool_db_unint(pool);
|
||||
|
||||
kfree(pool);
|
||||
- return 0;
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
index 112a61cd2997..9a781a87f097 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
@@ -23,29 +23,10 @@ struct mlx5hws_pool_resource {
|
||||
};
|
||||
|
||||
enum mlx5hws_pool_flags {
|
||||
- /* Only a one resource in that pool */
|
||||
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0,
|
||||
- MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1,
|
||||
- /* No sharing resources between chunks */
|
||||
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2,
|
||||
- /* All objects are in the same size */
|
||||
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3,
|
||||
- /* Managed by buddy allocator */
|
||||
- MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4,
|
||||
- /* Allocate pool_type memory on pool creation */
|
||||
- MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5,
|
||||
-
|
||||
- /* These values should be used by the caller */
|
||||
- MLX5HWS_POOL_FLAGS_FOR_STC_POOL =
|
||||
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
|
||||
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS,
|
||||
- MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL =
|
||||
- MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
|
||||
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK,
|
||||
- MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL =
|
||||
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
|
||||
- MLX5HWS_POOL_FLAGS_BUDDY_MANAGED |
|
||||
- MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE,
|
||||
+ /* Managed by a buddy allocator. If this is not set only allocations of
|
||||
+ * order 0 are supported.
|
||||
+ */
|
||||
+ MLX5HWS_POOL_FLAG_BUDDY = BIT(0),
|
||||
};
|
||||
|
||||
enum mlx5hws_pool_optimize {
|
||||
@@ -64,25 +45,16 @@ struct mlx5hws_pool_attr {
|
||||
};
|
||||
|
||||
enum mlx5hws_db_type {
|
||||
- /* Uses for allocating chunk of big memory, each element has its own resource in the FW*/
|
||||
- MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE,
|
||||
- /* One resource only, all the elements are with same one size */
|
||||
- MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE,
|
||||
+ /* Uses a bitmap, supports only allocations of order 0. */
|
||||
+ MLX5HWS_POOL_DB_TYPE_BITMAP,
|
||||
/* Entries are managed using a buddy mechanism. */
|
||||
MLX5HWS_POOL_DB_TYPE_BUDDY,
|
||||
};
|
||||
|
||||
-struct mlx5hws_pool_elements {
|
||||
- u32 num_of_elements;
|
||||
- unsigned long *bitmap;
|
||||
- u32 log_size;
|
||||
- bool is_full;
|
||||
-};
|
||||
-
|
||||
struct mlx5hws_pool_db {
|
||||
enum mlx5hws_db_type type;
|
||||
union {
|
||||
- struct mlx5hws_pool_elements *element;
|
||||
+ unsigned long *bitmap;
|
||||
struct mlx5hws_buddy_mem *buddy;
|
||||
};
|
||||
};
|
||||
@@ -103,7 +75,6 @@ struct mlx5hws_pool {
|
||||
enum mlx5hws_pool_optimize opt_type;
|
||||
struct mlx5hws_pool_resource *resource;
|
||||
struct mlx5hws_pool_resource *mirror_resource;
|
||||
- /* DB */
|
||||
struct mlx5hws_pool_db db;
|
||||
/* Functions */
|
||||
mlx5hws_pool_unint_db p_db_uninit;
|
||||
@@ -115,7 +86,7 @@ struct mlx5hws_pool *
|
||||
mlx5hws_pool_create(struct mlx5hws_context *ctx,
|
||||
struct mlx5hws_pool_attr *pool_attr);
|
||||
|
||||
-int mlx5hws_pool_destroy(struct mlx5hws_pool *pool);
|
||||
+void mlx5hws_pool_destroy(struct mlx5hws_pool *pool);
|
||||
|
||||
int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
|
||||
struct mlx5hws_pool_chunk *chunk);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
265
SOURCES/1350-net-mlx5-hws-cleanup-after-pool-refactoring.patch
Normal file
265
SOURCES/1350-net-mlx5-hws-cleanup-after-pool-refactoring.patch
Normal file
@ -0,0 +1,265 @@
|
||||
From 333144760a660c248b241cf555a88ed2447c29b1 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:59 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Cleanup after pool refactoring
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 43a2038c6d8a810e8e70f0e7fcb965f431c92bfb
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:35 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Cleanup after pool refactoring
|
||||
|
||||
Remove members which are now no longer used. In fact, many of the
|
||||
`struct mlx5hws_pool_chunk` were not even written to beyond being
|
||||
initialized, but they were used in various internals.
|
||||
|
||||
Also cleanup some local variables which made more sense when the API was
|
||||
thicker.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-6-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index 39904b337b81..161ad720b339 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1583,7 +1583,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
struct mlx5hws_matcher_action_ste *table_ste;
|
||||
struct mlx5hws_pool_attr pool_attr = {0};
|
||||
struct mlx5hws_pool *ste_pool, *stc_pool;
|
||||
- struct mlx5hws_pool_chunk *ste;
|
||||
u32 *rtc_0_id, *rtc_1_id;
|
||||
u32 obj_id;
|
||||
int ret;
|
||||
@@ -1613,8 +1612,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
rtc_0_id = &table_ste->rtc_0_id;
|
||||
rtc_1_id = &table_ste->rtc_1_id;
|
||||
ste_pool = table_ste->pool;
|
||||
- ste = &table_ste->ste;
|
||||
- ste->order = 1;
|
||||
|
||||
rtc_attr.log_size = 0;
|
||||
rtc_attr.log_depth = 0;
|
||||
@@ -1630,7 +1627,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
|
||||
rtc_attr.pd = ctx->pd_num;
|
||||
rtc_attr.ste_base = obj_id;
|
||||
- rtc_attr.ste_offset = ste->offset;
|
||||
rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false);
|
||||
|
||||
@@ -1833,7 +1829,6 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
|
||||
stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
|
||||
stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
|
||||
stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
|
||||
- stc_attr.ste_table.ste = table_ste->ste;
|
||||
stc_attr.ste_table.ste_pool = table_ste->pool;
|
||||
stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
|
||||
index e8f98c109b99..9c83753e4592 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
|
||||
@@ -406,7 +406,6 @@ int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev,
|
||||
MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1);
|
||||
MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base);
|
||||
MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base);
|
||||
- MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset);
|
||||
MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id);
|
||||
MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode);
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
|
||||
index 51d9e0291ac1..fa6bff210266 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
|
||||
@@ -70,7 +70,6 @@ struct mlx5hws_cmd_rtc_create_attr {
|
||||
u32 pd;
|
||||
u32 stc_base;
|
||||
u32 ste_base;
|
||||
- u32 ste_offset;
|
||||
u32 miss_ft_id;
|
||||
bool fw_gen_wqe;
|
||||
u8 update_index_mode;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 95d31fd6c976..3028e0387e3f 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -197,22 +197,15 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
|
||||
|
||||
static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
|
||||
struct mlx5hws_cmd_rtc_create_attr *rtc_attr,
|
||||
- enum mlx5hws_matcher_rtc_type rtc_type,
|
||||
bool is_mirror)
|
||||
{
|
||||
- struct mlx5hws_pool_chunk *ste = &matcher->action_ste.ste;
|
||||
enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src;
|
||||
- bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH;
|
||||
|
||||
if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) ||
|
||||
(flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) {
|
||||
/* Optimize FDB RTC */
|
||||
rtc_attr->log_size = 0;
|
||||
rtc_attr->log_depth = 0;
|
||||
- } else {
|
||||
- /* Keep original values */
|
||||
- rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order;
|
||||
- rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,8 +218,7 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
struct mlx5hws_matcher_action_ste *action_ste;
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
- struct mlx5hws_pool *ste_pool, *stc_pool;
|
||||
- struct mlx5hws_pool_chunk *ste;
|
||||
+ struct mlx5hws_pool *ste_pool;
|
||||
u32 *rtc_0_id, *rtc_1_id;
|
||||
u32 obj_id;
|
||||
int ret;
|
||||
@@ -236,8 +228,6 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_0_id = &matcher->match_ste.rtc_0_id;
|
||||
rtc_1_id = &matcher->match_ste.rtc_1_id;
|
||||
ste_pool = matcher->match_ste.pool;
|
||||
- ste = &matcher->match_ste.ste;
|
||||
- ste->order = attr->table.sz_col_log + attr->table.sz_row_log;
|
||||
|
||||
rtc_attr.log_size = attr->table.sz_row_log;
|
||||
rtc_attr.log_depth = attr->table.sz_col_log;
|
||||
@@ -273,16 +263,15 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_0_id = &action_ste->rtc_0_id;
|
||||
rtc_1_id = &action_ste->rtc_1_id;
|
||||
ste_pool = action_ste->pool;
|
||||
- ste = &action_ste->ste;
|
||||
/* Action RTC size calculation:
|
||||
* log((max number of rules in matcher) *
|
||||
* (max number of action STEs per rule) *
|
||||
* (2 to support writing new STEs for update rule))
|
||||
*/
|
||||
- ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
|
||||
- attr->table.sz_row_log +
|
||||
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
|
||||
- rtc_attr.log_size = ste->order;
|
||||
+ rtc_attr.log_size =
|
||||
+ ilog2(roundup_pow_of_two(action_ste->max_stes)) +
|
||||
+ attr->table.sz_row_log +
|
||||
+ MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
|
||||
rtc_attr.log_depth = 0;
|
||||
rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
|
||||
/* The action STEs use the default always hit definer */
|
||||
@@ -300,21 +289,19 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
|
||||
rtc_attr.pd = ctx->pd_num;
|
||||
rtc_attr.ste_base = obj_id;
|
||||
- rtc_attr.ste_offset = ste->offset;
|
||||
rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false);
|
||||
- hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false);
|
||||
+ hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false);
|
||||
|
||||
/* STC is a single resource (obj_id), use any STC for the ID */
|
||||
- stc_pool = ctx->stc_pool;
|
||||
- obj_id = mlx5hws_pool_get_base_id(stc_pool);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
|
||||
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
|
||||
if (ret) {
|
||||
mlx5hws_err(ctx, "Failed to create matcher RTC of type %s",
|
||||
hws_matcher_rtc_type_to_str(rtc_type));
|
||||
- goto free_ste;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
@@ -322,9 +309,9 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_attr.ste_base = obj_id;
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true);
|
||||
|
||||
- obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
- hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true);
|
||||
+ hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true);
|
||||
|
||||
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
|
||||
if (ret) {
|
||||
@@ -338,16 +325,12 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
|
||||
destroy_rtc_0:
|
||||
mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id);
|
||||
-free_ste:
|
||||
- if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
|
||||
- mlx5hws_pool_chunk_free(ste_pool, ste);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
|
||||
enum mlx5hws_matcher_rtc_type rtc_type)
|
||||
{
|
||||
- struct mlx5hws_matcher_action_ste *action_ste;
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
u32 rtc_0_id, rtc_1_id;
|
||||
|
||||
@@ -357,18 +340,17 @@ static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_1_id = matcher->match_ste.rtc_1_id;
|
||||
break;
|
||||
case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
|
||||
- action_ste = &matcher->action_ste;
|
||||
- rtc_0_id = action_ste->rtc_0_id;
|
||||
- rtc_1_id = action_ste->rtc_1_id;
|
||||
+ rtc_0_id = matcher->action_ste.rtc_0_id;
|
||||
+ rtc_1_id = matcher->action_ste.rtc_1_id;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id);
|
||||
+ mlx5hws_cmd_rtc_destroy(tbl->ctx->mdev, rtc_1_id);
|
||||
|
||||
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id);
|
||||
+ mlx5hws_cmd_rtc_destroy(tbl->ctx->mdev, rtc_0_id);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -564,7 +546,6 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
|
||||
stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
|
||||
stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
|
||||
stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
|
||||
- stc_attr.ste_table.ste = action_ste->ste;
|
||||
stc_attr.ste_table.ste_pool = action_ste->pool;
|
||||
stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
index 20b32012c418..0450b6175ac9 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
@@ -45,14 +45,12 @@ struct mlx5hws_match_template {
|
||||
};
|
||||
|
||||
struct mlx5hws_matcher_match_ste {
|
||||
- struct mlx5hws_pool_chunk ste;
|
||||
u32 rtc_0_id;
|
||||
u32 rtc_1_id;
|
||||
struct mlx5hws_pool *pool;
|
||||
};
|
||||
|
||||
struct mlx5hws_matcher_action_ste {
|
||||
- struct mlx5hws_pool_chunk ste;
|
||||
struct mlx5hws_pool_chunk stc;
|
||||
u32 rtc_0_id;
|
||||
u32 rtc_1_id;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
108
SOURCES/1351-net-mlx5-hws-add-fullness-tracking-to-pool.patch
Normal file
108
SOURCES/1351-net-mlx5-hws-add-fullness-tracking-to-pool.patch
Normal file
@ -0,0 +1,108 @@
|
||||
From 2cd06eab502130ff9491b0f14378269d658826c8 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:59 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Add fullness tracking to pool
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 04562694766514f00e7086d3d4884db5f3a22d4e
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:36 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Add fullness tracking to pool
|
||||
|
||||
Future users will need to query whether a pool is empty.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-7-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
index 270b333faab3..26d85fe3c417 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
@@ -324,6 +324,8 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
|
||||
|
||||
mutex_lock(&pool->lock);
|
||||
ret = pool->p_get_chunk(pool, chunk);
|
||||
+ if (ret == 0)
|
||||
+ pool->available_elems -= 1 << chunk->order;
|
||||
mutex_unlock(&pool->lock);
|
||||
|
||||
return ret;
|
||||
@@ -334,6 +336,7 @@ void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
|
||||
{
|
||||
mutex_lock(&pool->lock);
|
||||
pool->p_put_chunk(pool, chunk);
|
||||
+ pool->available_elems += 1 << chunk->order;
|
||||
mutex_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
@@ -360,6 +363,7 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_
|
||||
res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP;
|
||||
|
||||
pool->alloc_log_sz = pool_attr->alloc_log_sz;
|
||||
+ pool->available_elems = 1 << pool_attr->alloc_log_sz;
|
||||
|
||||
if (hws_pool_db_init(pool, res_db_type))
|
||||
goto free_pool;
|
||||
@@ -377,6 +381,9 @@ void mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
|
||||
{
|
||||
mutex_destroy(&pool->lock);
|
||||
|
||||
+ if (pool->available_elems != 1 << pool->alloc_log_sz)
|
||||
+ mlx5hws_err(pool->ctx, "Attempting to destroy non-empty pool\n");
|
||||
+
|
||||
if (pool->resource)
|
||||
hws_pool_resource_free(pool);
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
index 9a781a87f097..c82760d53e1a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
@@ -71,6 +71,7 @@ struct mlx5hws_pool {
|
||||
enum mlx5hws_pool_flags flags;
|
||||
struct mutex lock; /* protect the pool */
|
||||
size_t alloc_log_sz;
|
||||
+ size_t available_elems;
|
||||
enum mlx5hws_table_type tbl_type;
|
||||
enum mlx5hws_pool_optimize opt_type;
|
||||
struct mlx5hws_pool_resource *resource;
|
||||
@@ -103,4 +104,28 @@ static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool)
|
||||
{
|
||||
return pool->mirror_resource->base_id;
|
||||
}
|
||||
+
|
||||
+static inline bool
|
||||
+mlx5hws_pool_empty(struct mlx5hws_pool *pool)
|
||||
+{
|
||||
+ bool ret;
|
||||
+
|
||||
+ mutex_lock(&pool->lock);
|
||||
+ ret = pool->available_elems == 0;
|
||||
+ mutex_unlock(&pool->lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static inline bool
|
||||
+mlx5hws_pool_full(struct mlx5hws_pool *pool)
|
||||
+{
|
||||
+ bool ret;
|
||||
+
|
||||
+ mutex_lock(&pool->lock);
|
||||
+ ret = pool->available_elems == (1 << pool->alloc_log_sz);
|
||||
+ mutex_unlock(&pool->lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
#endif /* MLX5HWS_POOL_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
53
SOURCES/1352-net-mlx5-hws-fix-pool-size-optimization.patch
Normal file
53
SOURCES/1352-net-mlx5-hws-fix-pool-size-optimization.patch
Normal file
@ -0,0 +1,53 @@
|
||||
From 83591e87d75f1fbe1bad278c3b590cc83e85c276 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:41:59 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Fix pool size optimization
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit a68334f9750f41fc36990840090ef9dbee1e2c7e
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:37 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Fix pool size optimization
|
||||
|
||||
The optimization to create a size-one STE range for the unused direction
|
||||
was broken. The hardware prevents us from creating RTCs over unallocated
|
||||
STE space, so the only reason this has worked so far is because the
|
||||
optimization was never used.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-8-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
index 26d85fe3c417..7e37d6e9eb83 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
|
||||
@@ -80,7 +80,7 @@ static int hws_pool_resource_alloc(struct mlx5hws_pool *pool)
|
||||
u32 fw_ft_type, opt_log_range;
|
||||
|
||||
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false);
|
||||
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ?
|
||||
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ?
|
||||
0 : pool->alloc_log_sz;
|
||||
resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
|
||||
if (!resource) {
|
||||
@@ -94,7 +94,7 @@ static int hws_pool_resource_alloc(struct mlx5hws_pool *pool)
|
||||
struct mlx5hws_pool_resource *mirror_resource;
|
||||
|
||||
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true);
|
||||
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ?
|
||||
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ?
|
||||
0 : pool->alloc_log_sz;
|
||||
mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
|
||||
if (!mirror_resource) {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
585
SOURCES/1353-net-mlx5-hws-implement-action-ste-pool.patch
Normal file
585
SOURCES/1353-net-mlx5-hws-implement-action-ste-pool.patch
Normal file
@ -0,0 +1,585 @@
|
||||
From be613df0e750dc94c718ad4c944fa5542870a95c Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:00 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Implement action STE pool
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 983d01b2ce0ac688bb42489f33a29a02274366d5
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:38 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Implement action STE pool
|
||||
|
||||
Implement a per-queue pool of action STEs that match STEs can link to,
|
||||
regardless of matcher.
|
||||
|
||||
The code relies on hints to optimize whether a given rule is added to
|
||||
rx-only, tx-only or both. Correspondingly, action STEs need to be added
|
||||
to different RTC for ingress or egress paths. For rx-and-tx rules, the
|
||||
current rule implementation dictates that the offsets for a given rule
|
||||
must be the same in both RTCs.
|
||||
|
||||
To avoid wasting STEs, each action STE pool element holds 3 pools:
|
||||
rx-only, tx-only, and rx-and-tx, corresponding to the possible values of
|
||||
the pool optimization enum. The implementation then chooses at rule
|
||||
creation / update which of these elements to allocate from.
|
||||
|
||||
Each element holds multiple action STE tables, which wrap an RTC, an STE
|
||||
range, the logic to buddy-allocate offsets from the range, and an STC
|
||||
that allows match STEs to point to this table. When allocating offsets
|
||||
from an element, we iterate through available action STE tables and, if
|
||||
needed, create a new table.
|
||||
|
||||
Similar to the previous implementation, this iteration does not free any
|
||||
resources. This is implemented in a subsequent patch.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-9-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
|
||||
index 568bbe5f83f5..d292e6a9e22c 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
|
||||
@@ -154,7 +154,8 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \
|
||||
steering/hws/vport.o \
|
||||
steering/hws/bwc_complex.o \
|
||||
steering/hws/fs_hws_pools.o \
|
||||
- steering/hws/fs_hws.o
|
||||
+ steering/hws/fs_hws.o \
|
||||
+ steering/hws/action_ste_pool.o
|
||||
|
||||
#
|
||||
# SF device
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
|
||||
new file mode 100644
|
||||
index 000000000000..cb6ad8411631
|
||||
--- /dev/null
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
|
||||
@@ -0,0 +1,387 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
|
||||
+
|
||||
+#include "internal.h"
|
||||
+
|
||||
+static const char *
|
||||
+hws_pool_opt_to_str(enum mlx5hws_pool_optimize opt)
|
||||
+{
|
||||
+ switch (opt) {
|
||||
+ case MLX5HWS_POOL_OPTIMIZE_NONE:
|
||||
+ return "rx-and-tx";
|
||||
+ case MLX5HWS_POOL_OPTIMIZE_ORIG:
|
||||
+ return "rx-only";
|
||||
+ case MLX5HWS_POOL_OPTIMIZE_MIRROR:
|
||||
+ return "tx-only";
|
||||
+ default:
|
||||
+ return "unknown";
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+hws_action_ste_table_create_pool(struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_table *action_tbl,
|
||||
+ enum mlx5hws_pool_optimize opt, size_t log_sz)
|
||||
+{
|
||||
+ struct mlx5hws_pool_attr pool_attr = { 0 };
|
||||
+
|
||||
+ pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
+ pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
|
||||
+ pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY;
|
||||
+ pool_attr.opt_type = opt;
|
||||
+ pool_attr.alloc_log_sz = log_sz;
|
||||
+
|
||||
+ action_tbl->pool = mlx5hws_pool_create(ctx, &pool_attr);
|
||||
+ if (!action_tbl->pool) {
|
||||
+ mlx5hws_err(ctx, "Failed to allocate STE pool\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hws_action_ste_table_create_single_rtc(
|
||||
+ struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_table *action_tbl,
|
||||
+ enum mlx5hws_pool_optimize opt, size_t log_sz, bool tx)
|
||||
+{
|
||||
+ struct mlx5hws_cmd_rtc_create_attr rtc_attr = { 0 };
|
||||
+ u32 *rtc_id;
|
||||
+
|
||||
+ rtc_attr.log_depth = 0;
|
||||
+ rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
|
||||
+ /* Action STEs use the default always hit definer. */
|
||||
+ rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
|
||||
+ rtc_attr.is_frst_jumbo = false;
|
||||
+ rtc_attr.miss_ft_id = 0;
|
||||
+ rtc_attr.pd = ctx->pd_num;
|
||||
+ rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
|
||||
+
|
||||
+ if (tx) {
|
||||
+ rtc_attr.table_type = FS_FT_FDB_TX;
|
||||
+ rtc_attr.ste_base =
|
||||
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool);
|
||||
+ rtc_attr.stc_base =
|
||||
+ mlx5hws_pool_get_base_mirror_id(ctx->stc_pool);
|
||||
+ rtc_attr.log_size =
|
||||
+ opt == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_sz;
|
||||
+ rtc_id = &action_tbl->rtc_1_id;
|
||||
+ } else {
|
||||
+ rtc_attr.table_type = FS_FT_FDB_RX;
|
||||
+ rtc_attr.ste_base = mlx5hws_pool_get_base_id(action_tbl->pool);
|
||||
+ rtc_attr.stc_base = mlx5hws_pool_get_base_id(ctx->stc_pool);
|
||||
+ rtc_attr.log_size =
|
||||
+ opt == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_sz;
|
||||
+ rtc_id = &action_tbl->rtc_0_id;
|
||||
+ }
|
||||
+
|
||||
+ return mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_id);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+hws_action_ste_table_create_rtcs(struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_table *action_tbl,
|
||||
+ enum mlx5hws_pool_optimize opt, size_t log_sz)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt,
|
||||
+ log_sz, false);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt,
|
||||
+ log_sz, true);
|
||||
+ if (err) {
|
||||
+ mlx5hws_cmd_rtc_destroy(ctx->mdev, action_tbl->rtc_0_id);
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+hws_action_ste_table_destroy_rtcs(struct mlx5hws_action_ste_table *action_tbl)
|
||||
+{
|
||||
+ mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev,
|
||||
+ action_tbl->rtc_1_id);
|
||||
+ mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev,
|
||||
+ action_tbl->rtc_0_id);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+hws_action_ste_table_create_stc(struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_table *action_tbl)
|
||||
+{
|
||||
+ struct mlx5hws_cmd_stc_modify_attr stc_attr = { 0 };
|
||||
+
|
||||
+ stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
|
||||
+ stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
|
||||
+ stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
|
||||
+ stc_attr.ste_table.ste_pool = action_tbl->pool;
|
||||
+ stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
|
||||
+
|
||||
+ return mlx5hws_action_alloc_single_stc(ctx, &stc_attr,
|
||||
+ MLX5HWS_TABLE_TYPE_FDB,
|
||||
+ &action_tbl->stc);
|
||||
+}
|
||||
+
|
||||
+static struct mlx5hws_action_ste_table *
|
||||
+hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem)
|
||||
+{
|
||||
+ enum mlx5hws_pool_optimize opt = parent_elem->opt;
|
||||
+ struct mlx5hws_context *ctx = parent_elem->ctx;
|
||||
+ struct mlx5hws_action_ste_table *action_tbl;
|
||||
+ size_t log_sz;
|
||||
+ int err;
|
||||
+
|
||||
+ log_sz = min(parent_elem->log_sz ?
|
||||
+ parent_elem->log_sz +
|
||||
+ MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ :
|
||||
+ MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ,
|
||||
+ MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ);
|
||||
+
|
||||
+ action_tbl = kzalloc(sizeof(*action_tbl), GFP_KERNEL);
|
||||
+ if (!action_tbl)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ err = hws_action_ste_table_create_pool(ctx, action_tbl, opt, log_sz);
|
||||
+ if (err)
|
||||
+ goto free_tbl;
|
||||
+
|
||||
+ err = hws_action_ste_table_create_rtcs(ctx, action_tbl, opt, log_sz);
|
||||
+ if (err)
|
||||
+ goto destroy_pool;
|
||||
+
|
||||
+ err = hws_action_ste_table_create_stc(ctx, action_tbl);
|
||||
+ if (err)
|
||||
+ goto destroy_rtcs;
|
||||
+
|
||||
+ action_tbl->parent_elem = parent_elem;
|
||||
+ INIT_LIST_HEAD(&action_tbl->list_node);
|
||||
+ list_add(&action_tbl->list_node, &parent_elem->available);
|
||||
+ parent_elem->log_sz = log_sz;
|
||||
+
|
||||
+ mlx5hws_dbg(ctx,
|
||||
+ "Allocated %s action STE table log_sz %zu; STEs (%d, %d); RTCs (%d, %d); STC %d\n",
|
||||
+ hws_pool_opt_to_str(opt), log_sz,
|
||||
+ mlx5hws_pool_get_base_id(action_tbl->pool),
|
||||
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool),
|
||||
+ action_tbl->rtc_0_id, action_tbl->rtc_1_id,
|
||||
+ action_tbl->stc.offset);
|
||||
+
|
||||
+ return action_tbl;
|
||||
+
|
||||
+destroy_rtcs:
|
||||
+ hws_action_ste_table_destroy_rtcs(action_tbl);
|
||||
+destroy_pool:
|
||||
+ mlx5hws_pool_destroy(action_tbl->pool);
|
||||
+free_tbl:
|
||||
+ kfree(action_tbl);
|
||||
+
|
||||
+ return ERR_PTR(err);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+hws_action_ste_table_destroy(struct mlx5hws_action_ste_table *action_tbl)
|
||||
+{
|
||||
+ struct mlx5hws_context *ctx = action_tbl->parent_elem->ctx;
|
||||
+
|
||||
+ mlx5hws_dbg(ctx,
|
||||
+ "Destroying %s action STE table: STEs (%d, %d); RTCs (%d, %d); STC %d\n",
|
||||
+ hws_pool_opt_to_str(action_tbl->parent_elem->opt),
|
||||
+ mlx5hws_pool_get_base_id(action_tbl->pool),
|
||||
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool),
|
||||
+ action_tbl->rtc_0_id, action_tbl->rtc_1_id,
|
||||
+ action_tbl->stc.offset);
|
||||
+
|
||||
+ mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB,
|
||||
+ &action_tbl->stc);
|
||||
+ hws_action_ste_table_destroy_rtcs(action_tbl);
|
||||
+ mlx5hws_pool_destroy(action_tbl->pool);
|
||||
+
|
||||
+ list_del(&action_tbl->list_node);
|
||||
+ kfree(action_tbl);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+hws_action_ste_pool_element_init(struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_pool_element *elem,
|
||||
+ enum mlx5hws_pool_optimize opt)
|
||||
+{
|
||||
+ elem->ctx = ctx;
|
||||
+ elem->opt = opt;
|
||||
+ INIT_LIST_HEAD(&elem->available);
|
||||
+ INIT_LIST_HEAD(&elem->full);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void hws_action_ste_pool_element_destroy(
|
||||
+ struct mlx5hws_action_ste_pool_element *elem)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_table *action_tbl, *p;
|
||||
+
|
||||
+ /* This should be empty, but attempt to free its elements anyway. */
|
||||
+ list_for_each_entry_safe(action_tbl, p, &elem->full, list_node)
|
||||
+ hws_action_ste_table_destroy(action_tbl);
|
||||
+
|
||||
+ list_for_each_entry_safe(action_tbl, p, &elem->available, list_node)
|
||||
+ hws_action_ste_table_destroy(action_tbl);
|
||||
+}
|
||||
+
|
||||
+static int hws_action_ste_pool_init(struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_action_ste_pool *pool)
|
||||
+{
|
||||
+ enum mlx5hws_pool_optimize opt;
|
||||
+ int err;
|
||||
+
|
||||
+ /* Rules which are added for both RX and TX must use the same action STE
|
||||
+ * indices for both. If we were to use a single table, then RX-only and
|
||||
+ * TX-only rules would waste the unused entries. Thus, we use separate
|
||||
+ * table sets for the three cases.
|
||||
+ */
|
||||
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX;
|
||||
+ opt++) {
|
||||
+ err = hws_action_ste_pool_element_init(ctx, &pool->elems[opt],
|
||||
+ opt);
|
||||
+ if (err)
|
||||
+ goto destroy_elems;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+destroy_elems:
|
||||
+ while (opt-- > MLX5HWS_POOL_OPTIMIZE_NONE)
|
||||
+ hws_action_ste_pool_element_destroy(&pool->elems[opt]);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool)
|
||||
+{
|
||||
+ int opt;
|
||||
+
|
||||
+ for (opt = MLX5HWS_POOL_OPTIMIZE_MAX - 1;
|
||||
+ opt >= MLX5HWS_POOL_OPTIMIZE_NONE; opt--)
|
||||
+ hws_action_ste_pool_element_destroy(&pool->elems[opt]);
|
||||
+}
|
||||
+
|
||||
+int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_pool *pool;
|
||||
+ size_t queues = ctx->queues;
|
||||
+ int i, err;
|
||||
+
|
||||
+ pool = kcalloc(queues, sizeof(*pool), GFP_KERNEL);
|
||||
+ if (!pool)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ for (i = 0; i < queues; i++) {
|
||||
+ err = hws_action_ste_pool_init(ctx, &pool[i]);
|
||||
+ if (err)
|
||||
+ goto free_pool;
|
||||
+ }
|
||||
+
|
||||
+ ctx->action_ste_pool = pool;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+free_pool:
|
||||
+ while (i--)
|
||||
+ hws_action_ste_pool_destroy(&pool[i]);
|
||||
+ kfree(pool);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx)
|
||||
+{
|
||||
+ size_t queues = ctx->queues;
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < queues; i++)
|
||||
+ hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]);
|
||||
+
|
||||
+ kfree(ctx->action_ste_pool);
|
||||
+}
|
||||
+
|
||||
+static struct mlx5hws_action_ste_pool_element *
|
||||
+hws_action_ste_choose_elem(struct mlx5hws_action_ste_pool *pool,
|
||||
+ bool skip_rx, bool skip_tx)
|
||||
+{
|
||||
+ if (skip_rx)
|
||||
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_MIRROR];
|
||||
+
|
||||
+ if (skip_tx)
|
||||
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_ORIG];
|
||||
+
|
||||
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_NONE];
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl,
|
||||
+ struct mlx5hws_action_ste_chunk *chunk)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = mlx5hws_pool_chunk_alloc(action_tbl->pool, &chunk->ste);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ chunk->action_tbl = action_tbl;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
|
||||
+ bool skip_rx, bool skip_tx,
|
||||
+ struct mlx5hws_action_ste_chunk *chunk)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_pool_element *elem;
|
||||
+ struct mlx5hws_action_ste_table *action_tbl;
|
||||
+ bool found;
|
||||
+ int err;
|
||||
+
|
||||
+ if (skip_rx && skip_tx)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx);
|
||||
+
|
||||
+ mlx5hws_dbg(elem->ctx,
|
||||
+ "Allocating action STEs skip_rx %d skip_tx %d order %d\n",
|
||||
+ skip_rx, skip_tx, chunk->ste.order);
|
||||
+
|
||||
+ found = false;
|
||||
+ list_for_each_entry(action_tbl, &elem->available, list_node) {
|
||||
+ if (!hws_action_ste_table_chunk_alloc(action_tbl, chunk)) {
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (!found) {
|
||||
+ action_tbl = hws_action_ste_table_alloc(elem);
|
||||
+ if (IS_ERR(action_tbl))
|
||||
+ return PTR_ERR(action_tbl);
|
||||
+
|
||||
+ err = hws_action_ste_table_chunk_alloc(action_tbl, chunk);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+ }
|
||||
+
|
||||
+ if (mlx5hws_pool_empty(action_tbl->pool))
|
||||
+ list_move(&action_tbl->list_node, &elem->full);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk)
|
||||
+{
|
||||
+ mlx5hws_dbg(chunk->action_tbl->pool->ctx,
|
||||
+ "Freeing action STEs offset %d order %d\n",
|
||||
+ chunk->ste.offset, chunk->ste.order);
|
||||
+ mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste);
|
||||
+ list_move(&chunk->action_tbl->list_node,
|
||||
+ &chunk->action_tbl->parent_elem->available);
|
||||
+}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
|
||||
new file mode 100644
|
||||
index 000000000000..2de660a63223
|
||||
--- /dev/null
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
|
||||
@@ -0,0 +1,58 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
|
||||
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
|
||||
+
|
||||
+#ifndef ACTION_STE_POOL_H_
|
||||
+#define ACTION_STE_POOL_H_
|
||||
+
|
||||
+#define MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ 10
|
||||
+#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1
|
||||
+#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20
|
||||
+
|
||||
+struct mlx5hws_action_ste_pool_element;
|
||||
+
|
||||
+struct mlx5hws_action_ste_table {
|
||||
+ struct mlx5hws_action_ste_pool_element *parent_elem;
|
||||
+ /* Wraps the RTC and STE range for this given action. */
|
||||
+ struct mlx5hws_pool *pool;
|
||||
+ /* Match STEs use this STC to jump to this pool's RTC. */
|
||||
+ struct mlx5hws_pool_chunk stc;
|
||||
+ u32 rtc_0_id;
|
||||
+ u32 rtc_1_id;
|
||||
+ struct list_head list_node;
|
||||
+};
|
||||
+
|
||||
+struct mlx5hws_action_ste_pool_element {
|
||||
+ struct mlx5hws_context *ctx;
|
||||
+ size_t log_sz; /* Size of the largest table so far. */
|
||||
+ enum mlx5hws_pool_optimize opt;
|
||||
+ struct list_head available;
|
||||
+ struct list_head full;
|
||||
+};
|
||||
+
|
||||
+/* Central repository of action STEs. The context contains one of these pools
|
||||
+ * per queue.
|
||||
+ */
|
||||
+struct mlx5hws_action_ste_pool {
|
||||
+ struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX];
|
||||
+};
|
||||
+
|
||||
+/* A chunk of STEs and the table it was allocated from. Used by rules. */
|
||||
+struct mlx5hws_action_ste_chunk {
|
||||
+ struct mlx5hws_action_ste_table *action_tbl;
|
||||
+ struct mlx5hws_pool_chunk ste;
|
||||
+};
|
||||
+
|
||||
+int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx);
|
||||
+
|
||||
+void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx);
|
||||
+
|
||||
+/* Callers are expected to fill chunk->ste.order. On success, this function
|
||||
+ * populates chunk->tbl and chunk->ste.offset.
|
||||
+ */
|
||||
+int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
|
||||
+ bool skip_rx, bool skip_tx,
|
||||
+ struct mlx5hws_action_ste_chunk *chunk);
|
||||
+
|
||||
+void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk);
|
||||
+
|
||||
+#endif /* ACTION_STE_POOL_H_ */
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
index b7cb736b74d7..428dae869706 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
|
||||
@@ -158,10 +158,16 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx,
|
||||
if (ret)
|
||||
goto pools_uninit;
|
||||
|
||||
+ ret = mlx5hws_action_ste_pool_init(ctx);
|
||||
+ if (ret)
|
||||
+ goto close_queues;
|
||||
+
|
||||
INIT_LIST_HEAD(&ctx->tbl_list);
|
||||
|
||||
return 0;
|
||||
|
||||
+close_queues:
|
||||
+ mlx5hws_send_queues_close(ctx);
|
||||
pools_uninit:
|
||||
hws_context_pools_uninit(ctx);
|
||||
uninit_pd:
|
||||
@@ -174,6 +180,7 @@ static void hws_context_uninit_hws(struct mlx5hws_context *ctx)
|
||||
if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT))
|
||||
return;
|
||||
|
||||
+ mlx5hws_action_ste_pool_uninit(ctx);
|
||||
mlx5hws_send_queues_close(ctx);
|
||||
hws_context_pools_uninit(ctx);
|
||||
hws_context_uninit_pd(ctx);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
index 38c3647444ad..e987e93bbc6e 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
@@ -39,6 +39,7 @@ struct mlx5hws_context {
|
||||
struct mlx5hws_cmd_query_caps *caps;
|
||||
u32 pd_num;
|
||||
struct mlx5hws_pool *stc_pool;
|
||||
+ struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */
|
||||
struct mlx5hws_context_common_res common_res;
|
||||
struct mlx5hws_pattern_cache *pattern_cache;
|
||||
struct mlx5hws_definer_cache *definer_cache;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
|
||||
index 30ccd635b505..21279d503117 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "context.h"
|
||||
#include "table.h"
|
||||
#include "send.h"
|
||||
+#include "action_ste_pool.h"
|
||||
#include "rule.h"
|
||||
#include "cmd.h"
|
||||
#include "action.h"
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
index c82760d53e1a..33e33d5f1fb3 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
|
||||
@@ -33,6 +33,7 @@ enum mlx5hws_pool_optimize {
|
||||
MLX5HWS_POOL_OPTIMIZE_NONE = 0x0,
|
||||
MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1,
|
||||
MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2,
|
||||
+ MLX5HWS_POOL_OPTIMIZE_MAX = 0x3,
|
||||
};
|
||||
|
||||
struct mlx5hws_pool_attr {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
190
SOURCES/1354-net-mlx5-hws-use-the-new-action-ste-pool.patch
Normal file
190
SOURCES/1354-net-mlx5-hws-use-the-new-action-ste-pool.patch
Normal file
@ -0,0 +1,190 @@
|
||||
From 95ecc26ff257b1b713163c5a13570543b03678f2 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:00 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Use the new action STE pool
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 593a9470a8565a59a07b577d6bcb3c199f232d4a
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:39 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Use the new action STE pool
|
||||
|
||||
Use the central action STE pool when creating / updating rules.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-10-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
index a27a2d5ffc7b..5b758467ed03 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
@@ -195,44 +195,30 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule,
|
||||
}
|
||||
}
|
||||
|
||||
-static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule)
|
||||
+static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule,
|
||||
+ u16 queue_id, bool skip_rx,
|
||||
+ bool skip_tx)
|
||||
{
|
||||
struct mlx5hws_matcher *matcher = rule->matcher;
|
||||
- struct mlx5hws_matcher_action_ste *action_ste;
|
||||
- struct mlx5hws_pool_chunk ste = {0};
|
||||
- int ret;
|
||||
-
|
||||
- action_ste = &matcher->action_ste;
|
||||
- ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes));
|
||||
- ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste);
|
||||
- if (unlikely(ret)) {
|
||||
- mlx5hws_err(matcher->tbl->ctx,
|
||||
- "Failed to allocate STE for rule actions");
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- rule->action_ste.pool = matcher->action_ste.pool;
|
||||
- rule->action_ste.num_stes = matcher->action_ste.max_stes;
|
||||
- rule->action_ste.index = ste.offset;
|
||||
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
|
||||
- return 0;
|
||||
+ rule->action_ste.ste.order =
|
||||
+ ilog2(roundup_pow_of_two(matcher->action_ste.max_stes));
|
||||
+ return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id],
|
||||
+ skip_rx, skip_tx,
|
||||
+ &rule->action_ste);
|
||||
}
|
||||
|
||||
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste)
|
||||
+void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste)
|
||||
{
|
||||
- struct mlx5hws_pool_chunk ste = {0};
|
||||
-
|
||||
- if (!action_ste->num_stes)
|
||||
+ if (!action_ste->action_tbl)
|
||||
return;
|
||||
|
||||
- ste.order = ilog2(roundup_pow_of_two(action_ste->num_stes));
|
||||
- ste.offset = action_ste->index;
|
||||
-
|
||||
/* This release is safe only when the rule match STE was deleted
|
||||
* (when the rule is being deleted) or replaced with the new STE that
|
||||
* isn't pointing to old action STEs (when the rule is being updated).
|
||||
*/
|
||||
- mlx5hws_pool_chunk_free(action_ste->pool, &ste);
|
||||
+ mlx5hws_action_ste_chunk_free(action_ste);
|
||||
}
|
||||
|
||||
static void hws_rule_create_init(struct mlx5hws_rule *rule,
|
||||
@@ -250,22 +236,15 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
|
||||
rule->rtc_0 = 0;
|
||||
rule->rtc_1 = 0;
|
||||
|
||||
- rule->action_ste.pool = NULL;
|
||||
- rule->action_ste.num_stes = 0;
|
||||
- rule->action_ste.index = -1;
|
||||
-
|
||||
rule->status = MLX5HWS_RULE_STATUS_CREATING;
|
||||
} else {
|
||||
rule->status = MLX5HWS_RULE_STATUS_UPDATING;
|
||||
+ /* Save the old action STE info so we can free it after writing
|
||||
+ * new action STEs and a corresponding match STE.
|
||||
+ */
|
||||
+ rule->old_action_ste = rule->action_ste;
|
||||
}
|
||||
|
||||
- /* Initialize the old action STE info - shallow-copy action_ste.
|
||||
- * In create flow this will set old_action_ste fields to initial values.
|
||||
- * In update flow this will save the existing action STE info,
|
||||
- * so that we will later use it to free old STEs.
|
||||
- */
|
||||
- rule->old_action_ste = rule->action_ste;
|
||||
-
|
||||
rule->pending_wqes = 0;
|
||||
|
||||
/* Init default send STE attributes */
|
||||
@@ -277,7 +256,6 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
|
||||
/* Init default action apply */
|
||||
apply->tbl_type = tbl->type;
|
||||
apply->common_res = &ctx->common_res;
|
||||
- apply->jump_to_action_stc = matcher->action_ste.stc.offset;
|
||||
apply->require_dep = 0;
|
||||
}
|
||||
|
||||
@@ -353,17 +331,24 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule,
|
||||
|
||||
if (action_stes) {
|
||||
/* Allocate action STEs for rules that need more than match STE */
|
||||
- ret = hws_rule_alloc_action_ste(rule);
|
||||
+ ret = mlx5hws_rule_alloc_action_ste(rule, attr->queue_id,
|
||||
+ !!ste_attr.rtc_0,
|
||||
+ !!ste_attr.rtc_1);
|
||||
if (ret) {
|
||||
mlx5hws_err(ctx, "Failed to allocate action memory %d", ret);
|
||||
mlx5hws_send_abort_new_dep_wqe(queue);
|
||||
return ret;
|
||||
}
|
||||
+ apply.jump_to_action_stc =
|
||||
+ rule->action_ste.action_tbl->stc.offset;
|
||||
/* Skip RX/TX based on the dep_wqe init */
|
||||
- ste_attr.rtc_0 = dep_wqe->rtc_0 ? matcher->action_ste.rtc_0_id : 0;
|
||||
- ste_attr.rtc_1 = dep_wqe->rtc_1 ? matcher->action_ste.rtc_1_id : 0;
|
||||
+ ste_attr.rtc_0 = dep_wqe->rtc_0 ?
|
||||
+ rule->action_ste.action_tbl->rtc_0_id : 0;
|
||||
+ ste_attr.rtc_1 = dep_wqe->rtc_1 ?
|
||||
+ rule->action_ste.action_tbl->rtc_1_id : 0;
|
||||
/* Action STEs are written to a specific index last to first */
|
||||
- ste_attr.direct_index = rule->action_ste.index + action_stes;
|
||||
+ ste_attr.direct_index =
|
||||
+ rule->action_ste.ste.offset + action_stes;
|
||||
apply.next_direct_idx = ste_attr.direct_index;
|
||||
} else {
|
||||
apply.next_direct_idx = 0;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
|
||||
index b5ee94ac449b..1c47a9c11572 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
|
||||
@@ -43,12 +43,6 @@ struct mlx5hws_rule_match_tag {
|
||||
};
|
||||
};
|
||||
|
||||
-struct mlx5hws_rule_action_ste_info {
|
||||
- struct mlx5hws_pool *pool;
|
||||
- int index; /* STE array index */
|
||||
- u8 num_stes;
|
||||
-};
|
||||
-
|
||||
struct mlx5hws_rule_resize_info {
|
||||
u32 rtc_0;
|
||||
u32 rtc_1;
|
||||
@@ -64,8 +58,8 @@ struct mlx5hws_rule {
|
||||
struct mlx5hws_rule_match_tag tag;
|
||||
struct mlx5hws_rule_resize_info *resize_info;
|
||||
};
|
||||
- struct mlx5hws_rule_action_ste_info action_ste;
|
||||
- struct mlx5hws_rule_action_ste_info old_action_ste;
|
||||
+ struct mlx5hws_action_ste_chunk action_ste;
|
||||
+ struct mlx5hws_action_ste_chunk old_action_ste;
|
||||
u32 rtc_0; /* The RTC into which the STE was inserted */
|
||||
u32 rtc_1; /* The RTC into which the STE was inserted */
|
||||
u8 status; /* enum mlx5hws_rule_status */
|
||||
@@ -75,7 +69,7 @@ struct mlx5hws_rule {
|
||||
*/
|
||||
};
|
||||
|
||||
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste);
|
||||
+void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste);
|
||||
|
||||
int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule,
|
||||
void *queue, void *user_data);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
875
SOURCES/1355-net-mlx5-hws-cleanup-matcher-action-ste-table.patch
Normal file
875
SOURCES/1355-net-mlx5-hws-cleanup-matcher-action-ste-table.patch
Normal file
@ -0,0 +1,875 @@
|
||||
From 0e63b341ab3882d9bf6aacf824f70c2b41ef65e7 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:00 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Cleanup matcher action STE table
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 22174f16f1218fc98e374b3653decae54aa481f8
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:40 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Cleanup matcher action STE table
|
||||
|
||||
Remove the matcher action STE implementation now that the code uses
|
||||
per-queue action STE pools. This also allows simplifying matcher code
|
||||
because it is now only handling a single type of RTC/STE.
|
||||
|
||||
The matcher resize data is also going away. Matchers were saving old
|
||||
action STE data because the rules still used it, but now that data lives
|
||||
in the action STE pool and is no longer coupled to a matcher.
|
||||
|
||||
Furthermore, matchers no longer need to rehash a due to action template
|
||||
addition. If a new action template needs more action STEs, we simply
|
||||
update the matcher's num_of_action_stes and future rules will allocate
|
||||
the correct number. Existing rules are unaffected by such an operation
|
||||
and can continue to use their existing action STEs.
|
||||
|
||||
The range action was using the matcher action STE implementation, but
|
||||
there was no reason to do this other than the container fitting the
|
||||
purpose. Extract that information to a separate structure.
|
||||
|
||||
Finally, stop dumping per-matcher information about action RTCs,
|
||||
because they no longer exist. A later patch in this series will add
|
||||
support for dumping action STE pools.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-11-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index 161ad720b339..bef4d25c1a2a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1574,13 +1574,13 @@ hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx)
|
||||
return definer;
|
||||
}
|
||||
|
||||
-static struct mlx5hws_matcher_action_ste *
|
||||
+static struct mlx5hws_range_action_table *
|
||||
hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
struct mlx5hws_definer *definer,
|
||||
u32 miss_ft_id)
|
||||
{
|
||||
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
|
||||
- struct mlx5hws_matcher_action_ste *table_ste;
|
||||
+ struct mlx5hws_range_action_table *table_ste;
|
||||
struct mlx5hws_pool_attr pool_attr = {0};
|
||||
struct mlx5hws_pool *ste_pool, *stc_pool;
|
||||
u32 *rtc_0_id, *rtc_1_id;
|
||||
@@ -1669,9 +1669,9 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static void
|
||||
-hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
- struct mlx5hws_matcher_action_ste *table_ste)
|
||||
+static void hws_action_destroy_dest_match_range_table(
|
||||
+ struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_range_action_table *table_ste)
|
||||
{
|
||||
mutex_lock(&ctx->ctrl_lock);
|
||||
|
||||
@@ -1683,12 +1683,11 @@ hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx,
|
||||
mutex_unlock(&ctx->ctrl_lock);
|
||||
}
|
||||
|
||||
-static int
|
||||
-hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx,
|
||||
- struct mlx5hws_matcher_action_ste *table_ste,
|
||||
- struct mlx5hws_action *hit_ft_action,
|
||||
- struct mlx5hws_definer *range_definer,
|
||||
- u32 min, u32 max)
|
||||
+static int hws_action_create_dest_match_range_fill_table(
|
||||
+ struct mlx5hws_context *ctx,
|
||||
+ struct mlx5hws_range_action_table *table_ste,
|
||||
+ struct mlx5hws_action *hit_ft_action,
|
||||
+ struct mlx5hws_definer *range_definer, u32 min, u32 max)
|
||||
{
|
||||
struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0};
|
||||
struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0};
|
||||
@@ -1784,7 +1783,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
|
||||
u32 min, u32 max, u32 flags)
|
||||
{
|
||||
struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
|
||||
- struct mlx5hws_matcher_action_ste *table_ste;
|
||||
+ struct mlx5hws_range_action_table *table_ste;
|
||||
struct mlx5hws_action *hit_ft_action;
|
||||
struct mlx5hws_definer *definer;
|
||||
struct mlx5hws_action *action;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
index 64b76075f7f8..25fa0d4c9221 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
@@ -118,6 +118,12 @@ struct mlx5hws_action_template {
|
||||
u8 only_term;
|
||||
};
|
||||
|
||||
+struct mlx5hws_range_action_table {
|
||||
+ struct mlx5hws_pool *pool;
|
||||
+ u32 rtc_0_id;
|
||||
+ u32 rtc_1_id;
|
||||
+};
|
||||
+
|
||||
struct mlx5hws_action {
|
||||
u8 type;
|
||||
u8 flags;
|
||||
@@ -186,7 +192,7 @@ struct mlx5hws_action {
|
||||
size_t size;
|
||||
} remove_header;
|
||||
struct {
|
||||
- struct mlx5hws_matcher_action_ste *table_ste;
|
||||
+ struct mlx5hws_range_action_table *table_ste;
|
||||
struct mlx5hws_action *hit_ft_action;
|
||||
struct mlx5hws_definer *definer;
|
||||
} range;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index 32de8bfc7644..510bfbbe5991 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -478,21 +478,9 @@ hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps;
|
||||
|
||||
/* check the match RTC size */
|
||||
- if ((bwc_matcher->size_log +
|
||||
- MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH +
|
||||
- MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) >
|
||||
- (caps->ste_alloc_log_max - 1))
|
||||
- return true;
|
||||
-
|
||||
- /* check the action RTC size */
|
||||
- if ((bwc_matcher->size_log +
|
||||
- MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP +
|
||||
- ilog2(roundup_pow_of_two(bwc_matcher->matcher->action_ste.max_stes)) +
|
||||
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT) >
|
||||
- (caps->ste_alloc_log_max - 1))
|
||||
- return true;
|
||||
-
|
||||
- return false;
|
||||
+ return (bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH +
|
||||
+ MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) >
|
||||
+ (caps->ste_alloc_log_max - 1);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -779,19 +767,6 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
return hws_bwc_matcher_move(bwc_matcher);
|
||||
}
|
||||
|
||||
-static int
|
||||
-hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
-{
|
||||
- /* Rehash by action template doesn't require any additional checking.
|
||||
- * The bwc_matcher already contains the new action template.
|
||||
- * Just do the usual rehash:
|
||||
- * - create new matcher
|
||||
- * - move all the rules to the new matcher
|
||||
- * - destroy the old matcher
|
||||
- */
|
||||
- return hws_bwc_matcher_move(bwc_matcher);
|
||||
-}
|
||||
-
|
||||
int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
u32 *match_param,
|
||||
struct mlx5hws_rule_action rule_actions[],
|
||||
@@ -803,7 +778,6 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
struct mlx5hws_rule_attr rule_attr;
|
||||
struct mutex *queue_lock; /* Protect the queue */
|
||||
u32 num_of_rules;
|
||||
- bool need_rehash;
|
||||
int ret = 0;
|
||||
int at_idx;
|
||||
|
||||
@@ -830,30 +804,11 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
at_idx = bwc_matcher->num_of_at - 1;
|
||||
|
||||
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx],
|
||||
- &need_rehash);
|
||||
+ bwc_matcher->at[at_idx]);
|
||||
if (unlikely(ret)) {
|
||||
hws_bwc_unlock_all_queues(ctx);
|
||||
return ret;
|
||||
}
|
||||
- if (unlikely(need_rehash)) {
|
||||
- /* The new action template requires more action STEs.
|
||||
- * Need to attempt creating new matcher with all
|
||||
- * the action templates, including the new one.
|
||||
- */
|
||||
- ret = hws_bwc_matcher_rehash_at(bwc_matcher);
|
||||
- if (unlikely(ret)) {
|
||||
- mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
|
||||
- bwc_matcher->at[at_idx] = NULL;
|
||||
- bwc_matcher->num_of_at--;
|
||||
-
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
-
|
||||
- mlx5hws_err(ctx,
|
||||
- "BWC rule insertion: rehash AT failed (%d)\n", ret);
|
||||
- return ret;
|
||||
- }
|
||||
- }
|
||||
|
||||
hws_bwc_unlock_all_queues(ctx);
|
||||
mutex_lock(queue_lock);
|
||||
@@ -973,7 +928,6 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
|
||||
struct mlx5hws_rule_attr rule_attr;
|
||||
struct mutex *queue_lock; /* Protect the queue */
|
||||
- bool need_rehash;
|
||||
int at_idx, ret;
|
||||
u16 idx;
|
||||
|
||||
@@ -1005,32 +959,11 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
at_idx = bwc_matcher->num_of_at - 1;
|
||||
|
||||
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx],
|
||||
- &need_rehash);
|
||||
+ bwc_matcher->at[at_idx]);
|
||||
if (unlikely(ret)) {
|
||||
hws_bwc_unlock_all_queues(ctx);
|
||||
return ret;
|
||||
}
|
||||
- if (unlikely(need_rehash)) {
|
||||
- /* The new action template requires more action
|
||||
- * STEs. Need to attempt creating new matcher
|
||||
- * with all the action templates, including the
|
||||
- * new one.
|
||||
- */
|
||||
- ret = hws_bwc_matcher_rehash_at(bwc_matcher);
|
||||
- if (unlikely(ret)) {
|
||||
- mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
|
||||
- bwc_matcher->at[at_idx] = NULL;
|
||||
- bwc_matcher->num_of_at--;
|
||||
-
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
-
|
||||
- mlx5hws_err(ctx,
|
||||
- "BWC rule update: rehash AT failed (%d)\n",
|
||||
- ret);
|
||||
- return ret;
|
||||
- }
|
||||
- }
|
||||
}
|
||||
|
||||
hws_bwc_unlock_all_queues(ctx);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
index 3491408c5d84..38f75dec9cfc 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
@@ -146,18 +146,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
|
||||
matcher->match_ste.rtc_1_id,
|
||||
(int)ste_1_id);
|
||||
|
||||
- ste_pool = matcher->action_ste.pool;
|
||||
- if (ste_pool) {
|
||||
- ste_0_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
- if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
- else
|
||||
- ste_1_id = -1;
|
||||
- } else {
|
||||
- ste_0_id = -1;
|
||||
- ste_1_id = -1;
|
||||
- }
|
||||
-
|
||||
ft_attr.type = matcher->tbl->fw_ft_type;
|
||||
ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev,
|
||||
matcher->end_ft_id,
|
||||
@@ -167,10 +155,7 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n",
|
||||
- matcher->action_ste.rtc_0_id, (int)ste_0_id,
|
||||
- matcher->action_ste.rtc_1_id, (int)ste_1_id,
|
||||
- 0,
|
||||
+ seq_printf(f, ",-1,-1,-1,-1,0,0x%llx,0x%llx\n",
|
||||
mlx5hws_debug_icm_to_idx(icm_addr_0),
|
||||
mlx5hws_debug_icm_to_idx(icm_addr_1));
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 3028e0387e3f..716502732d3d 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -3,25 +3,6 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
-enum mlx5hws_matcher_rtc_type {
|
||||
- HWS_MATCHER_RTC_TYPE_MATCH,
|
||||
- HWS_MATCHER_RTC_TYPE_STE_ARRAY,
|
||||
- HWS_MATCHER_RTC_TYPE_MAX,
|
||||
-};
|
||||
-
|
||||
-static const char * const mlx5hws_matcher_rtc_type_str[] = {
|
||||
- [HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH",
|
||||
- [HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY",
|
||||
- [HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN",
|
||||
-};
|
||||
-
|
||||
-static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type)
|
||||
-{
|
||||
- if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX)
|
||||
- rtc_type = HWS_MATCHER_RTC_TYPE_MAX;
|
||||
- return mlx5hws_matcher_rtc_type_str[rtc_type];
|
||||
-}
|
||||
-
|
||||
static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules)
|
||||
{
|
||||
/* Collision table concatenation is done only for large rule tables */
|
||||
@@ -209,83 +190,52 @@ static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
|
||||
}
|
||||
}
|
||||
|
||||
-static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
- enum mlx5hws_matcher_rtc_type rtc_type)
|
||||
+static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
struct mlx5hws_matcher_attr *attr = &matcher->attr;
|
||||
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
|
||||
struct mlx5hws_match_template *mt = matcher->mt;
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
- struct mlx5hws_matcher_action_ste *action_ste;
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
- struct mlx5hws_pool *ste_pool;
|
||||
- u32 *rtc_0_id, *rtc_1_id;
|
||||
u32 obj_id;
|
||||
int ret;
|
||||
|
||||
- switch (rtc_type) {
|
||||
- case HWS_MATCHER_RTC_TYPE_MATCH:
|
||||
- rtc_0_id = &matcher->match_ste.rtc_0_id;
|
||||
- rtc_1_id = &matcher->match_ste.rtc_1_id;
|
||||
- ste_pool = matcher->match_ste.pool;
|
||||
-
|
||||
- rtc_attr.log_size = attr->table.sz_row_log;
|
||||
- rtc_attr.log_depth = attr->table.sz_col_log;
|
||||
- rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
|
||||
- rtc_attr.is_scnd_range = 0;
|
||||
- rtc_attr.miss_ft_id = matcher->end_ft_id;
|
||||
-
|
||||
- if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) {
|
||||
- /* The usual Hash Table */
|
||||
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
|
||||
-
|
||||
- /* The first mt is used since all share the same definer */
|
||||
- rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
|
||||
- } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) {
|
||||
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
|
||||
- rtc_attr.num_hash_definer = 1;
|
||||
-
|
||||
- if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
|
||||
- /* Hash Split Table */
|
||||
- rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
|
||||
- rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
|
||||
- } else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
|
||||
- /* Linear Lookup Table */
|
||||
- rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR;
|
||||
- rtc_attr.match_definer_0 = ctx->caps->linear_match_definer;
|
||||
- }
|
||||
+ rtc_attr.log_size = attr->table.sz_row_log;
|
||||
+ rtc_attr.log_depth = attr->table.sz_col_log;
|
||||
+ rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
|
||||
+ rtc_attr.is_scnd_range = 0;
|
||||
+ rtc_attr.miss_ft_id = matcher->end_ft_id;
|
||||
+
|
||||
+ if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) {
|
||||
+ /* The usual Hash Table */
|
||||
+ rtc_attr.update_index_mode =
|
||||
+ MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
|
||||
+
|
||||
+ /* The first mt is used since all share the same definer */
|
||||
+ rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
|
||||
+ } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) {
|
||||
+ rtc_attr.update_index_mode =
|
||||
+ MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
|
||||
+ rtc_attr.num_hash_definer = 1;
|
||||
+
|
||||
+ if (attr->distribute_mode ==
|
||||
+ MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
|
||||
+ /* Hash Split Table */
|
||||
+ rtc_attr.access_index_mode =
|
||||
+ MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
|
||||
+ rtc_attr.match_definer_0 =
|
||||
+ mlx5hws_definer_get_id(mt->definer);
|
||||
+ } else if (attr->distribute_mode ==
|
||||
+ MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
|
||||
+ /* Linear Lookup Table */
|
||||
+ rtc_attr.access_index_mode =
|
||||
+ MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR;
|
||||
+ rtc_attr.match_definer_0 =
|
||||
+ ctx->caps->linear_match_definer;
|
||||
}
|
||||
- break;
|
||||
-
|
||||
- case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
|
||||
- action_ste = &matcher->action_ste;
|
||||
-
|
||||
- rtc_0_id = &action_ste->rtc_0_id;
|
||||
- rtc_1_id = &action_ste->rtc_1_id;
|
||||
- ste_pool = action_ste->pool;
|
||||
- /* Action RTC size calculation:
|
||||
- * log((max number of rules in matcher) *
|
||||
- * (max number of action STEs per rule) *
|
||||
- * (2 to support writing new STEs for update rule))
|
||||
- */
|
||||
- rtc_attr.log_size =
|
||||
- ilog2(roundup_pow_of_two(action_ste->max_stes)) +
|
||||
- attr->table.sz_row_log +
|
||||
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
|
||||
- rtc_attr.log_depth = 0;
|
||||
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
|
||||
- /* The action STEs use the default always hit definer */
|
||||
- rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
|
||||
- rtc_attr.is_frst_jumbo = false;
|
||||
- rtc_attr.miss_ft_id = 0;
|
||||
- break;
|
||||
-
|
||||
- default:
|
||||
- mlx5hws_err(ctx, "HWS Invalid RTC type\n");
|
||||
- return -EINVAL;
|
||||
}
|
||||
|
||||
- obj_id = mlx5hws_pool_get_base_id(ste_pool);
|
||||
+ obj_id = mlx5hws_pool_get_base_id(matcher->match_ste.pool);
|
||||
|
||||
rtc_attr.pd = ctx->pd_num;
|
||||
rtc_attr.ste_base = obj_id;
|
||||
@@ -297,15 +247,16 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool);
|
||||
rtc_attr.stc_base = obj_id;
|
||||
|
||||
- ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
|
||||
+ ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr,
|
||||
+ &matcher->match_ste.rtc_0_id);
|
||||
if (ret) {
|
||||
- mlx5hws_err(ctx, "Failed to create matcher RTC of type %s",
|
||||
- hws_matcher_rtc_type_to_str(rtc_type));
|
||||
+ mlx5hws_err(ctx, "Failed to create matcher RTC\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
|
||||
- obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
|
||||
+ obj_id = mlx5hws_pool_get_base_mirror_id(
|
||||
+ matcher->match_ste.pool);
|
||||
rtc_attr.ste_base = obj_id;
|
||||
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true);
|
||||
|
||||
@@ -313,10 +264,10 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
rtc_attr.stc_base = obj_id;
|
||||
hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true);
|
||||
|
||||
- ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
|
||||
+ ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr,
|
||||
+ &matcher->match_ste.rtc_1_id);
|
||||
if (ret) {
|
||||
- mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s",
|
||||
- hws_matcher_rtc_type_to_str(rtc_type));
|
||||
+ mlx5hws_err(ctx, "Failed to create mirror matcher RTC\n");
|
||||
goto destroy_rtc_0;
|
||||
}
|
||||
}
|
||||
@@ -324,33 +275,18 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
|
||||
return 0;
|
||||
|
||||
destroy_rtc_0:
|
||||
- mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id);
|
||||
+ mlx5hws_cmd_rtc_destroy(ctx->mdev, matcher->match_ste.rtc_0_id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
|
||||
- enum mlx5hws_matcher_rtc_type rtc_type)
|
||||
+static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
- struct mlx5hws_table *tbl = matcher->tbl;
|
||||
- u32 rtc_0_id, rtc_1_id;
|
||||
-
|
||||
- switch (rtc_type) {
|
||||
- case HWS_MATCHER_RTC_TYPE_MATCH:
|
||||
- rtc_0_id = matcher->match_ste.rtc_0_id;
|
||||
- rtc_1_id = matcher->match_ste.rtc_1_id;
|
||||
- break;
|
||||
- case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
|
||||
- rtc_0_id = matcher->action_ste.rtc_0_id;
|
||||
- rtc_1_id = matcher->action_ste.rtc_1_id;
|
||||
- break;
|
||||
- default:
|
||||
- return;
|
||||
- }
|
||||
+ struct mlx5_core_dev *mdev = matcher->tbl->ctx->mdev;
|
||||
|
||||
- if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- mlx5hws_cmd_rtc_destroy(tbl->ctx->mdev, rtc_1_id);
|
||||
+ if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
+ mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_1_id);
|
||||
|
||||
- mlx5hws_cmd_rtc_destroy(tbl->ctx->mdev, rtc_0_id);
|
||||
+ mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_0_id);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -418,85 +354,17 @@ static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher)
|
||||
-{
|
||||
- struct mlx5hws_matcher_resize_data *resize_data;
|
||||
-
|
||||
- resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL);
|
||||
- if (!resize_data)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- resize_data->max_stes = src_matcher->action_ste.max_stes;
|
||||
-
|
||||
- resize_data->stc = src_matcher->action_ste.stc;
|
||||
- resize_data->rtc_0_id = src_matcher->action_ste.rtc_0_id;
|
||||
- resize_data->rtc_1_id = src_matcher->action_ste.rtc_1_id;
|
||||
- resize_data->pool = src_matcher->action_ste.max_stes ?
|
||||
- src_matcher->action_ste.pool : NULL;
|
||||
-
|
||||
- /* Place the new resized matcher on the dst matcher's list */
|
||||
- list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
|
||||
-
|
||||
- /* Move all the previous resized matchers to the dst matcher's list */
|
||||
- while (!list_empty(&src_matcher->resize_data)) {
|
||||
- resize_data = list_first_entry(&src_matcher->resize_data,
|
||||
- struct mlx5hws_matcher_resize_data,
|
||||
- list_node);
|
||||
- list_del_init(&resize_data->list_node);
|
||||
- list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
|
||||
- }
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher)
|
||||
-{
|
||||
- struct mlx5hws_matcher_resize_data *resize_data;
|
||||
-
|
||||
- if (!mlx5hws_matcher_is_resizable(matcher))
|
||||
- return;
|
||||
-
|
||||
- while (!list_empty(&matcher->resize_data)) {
|
||||
- resize_data = list_first_entry(&matcher->resize_data,
|
||||
- struct mlx5hws_matcher_resize_data,
|
||||
- list_node);
|
||||
- list_del_init(&resize_data->list_node);
|
||||
-
|
||||
- if (resize_data->max_stes) {
|
||||
- mlx5hws_action_free_single_stc(matcher->tbl->ctx,
|
||||
- matcher->tbl->type,
|
||||
- &resize_data->stc);
|
||||
-
|
||||
- if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB)
|
||||
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
|
||||
- resize_data->rtc_1_id);
|
||||
-
|
||||
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
|
||||
- resize_data->rtc_0_id);
|
||||
-
|
||||
- if (resize_data->pool)
|
||||
- mlx5hws_pool_destroy(resize_data->pool);
|
||||
- }
|
||||
-
|
||||
- kfree(resize_data);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
|
||||
- struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
|
||||
- struct mlx5hws_matcher_action_ste *action_ste;
|
||||
- struct mlx5hws_table *tbl = matcher->tbl;
|
||||
- struct mlx5hws_pool_attr pool_attr = {0};
|
||||
- struct mlx5hws_context *ctx = tbl->ctx;
|
||||
- u32 required_stes;
|
||||
- u8 max_stes = 0;
|
||||
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
+ u8 required_stes, max_stes;
|
||||
int i, ret;
|
||||
|
||||
if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)
|
||||
return 0;
|
||||
|
||||
+ max_stes = 0;
|
||||
for (i = 0; i < matcher->num_of_at; i++) {
|
||||
struct mlx5hws_action_template *at = &matcher->at[i];
|
||||
|
||||
@@ -512,74 +380,9 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
|
||||
/* Future: Optimize reparse */
|
||||
}
|
||||
|
||||
- /* There are no additional STEs required for matcher */
|
||||
- if (!max_stes)
|
||||
- return 0;
|
||||
-
|
||||
- matcher->action_ste.max_stes = max_stes;
|
||||
-
|
||||
- action_ste = &matcher->action_ste;
|
||||
-
|
||||
- /* Allocate action STE mempool */
|
||||
- pool_attr.table_type = tbl->type;
|
||||
- pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
- pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY;
|
||||
- /* Pool size is similar to action RTC size */
|
||||
- pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
|
||||
- matcher->attr.table.sz_row_log +
|
||||
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
|
||||
- hws_matcher_set_pool_attr(&pool_attr, matcher);
|
||||
- action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
|
||||
- if (!action_ste->pool) {
|
||||
- mlx5hws_err(ctx, "Failed to create action ste pool\n");
|
||||
- return -EINVAL;
|
||||
- }
|
||||
-
|
||||
- /* Allocate action RTC */
|
||||
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
|
||||
- if (ret) {
|
||||
- mlx5hws_err(ctx, "Failed to create action RTC\n");
|
||||
- goto free_ste_pool;
|
||||
- }
|
||||
-
|
||||
- /* Allocate STC for jumps to STE */
|
||||
- stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
|
||||
- stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
|
||||
- stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
|
||||
- stc_attr.ste_table.ste_pool = action_ste->pool;
|
||||
- stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
|
||||
-
|
||||
- ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type,
|
||||
- &action_ste->stc);
|
||||
- if (ret) {
|
||||
- mlx5hws_err(ctx, "Failed to create action jump to table STC\n");
|
||||
- goto free_rtc;
|
||||
- }
|
||||
+ matcher->num_of_action_stes = max_stes;
|
||||
|
||||
return 0;
|
||||
-
|
||||
-free_rtc:
|
||||
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
|
||||
-free_ste_pool:
|
||||
- mlx5hws_pool_destroy(action_ste->pool);
|
||||
- return ret;
|
||||
-}
|
||||
-
|
||||
-static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher)
|
||||
-{
|
||||
- struct mlx5hws_matcher_action_ste *action_ste;
|
||||
- struct mlx5hws_table *tbl = matcher->tbl;
|
||||
-
|
||||
- action_ste = &matcher->action_ste;
|
||||
-
|
||||
- if (!action_ste->max_stes ||
|
||||
- matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION ||
|
||||
- mlx5hws_matcher_is_in_resize(matcher))
|
||||
- return;
|
||||
-
|
||||
- mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc);
|
||||
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
|
||||
- mlx5hws_pool_destroy(action_ste->pool);
|
||||
}
|
||||
|
||||
static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
|
||||
@@ -723,10 +526,10 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
|
||||
/* Create matcher end flow table anchor */
|
||||
ret = hws_matcher_create_end_ft(matcher);
|
||||
if (ret)
|
||||
- goto unbind_at;
|
||||
+ goto unbind_mt;
|
||||
|
||||
/* Allocate the RTC for the new matcher */
|
||||
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
|
||||
+ ret = hws_matcher_create_rtc(matcher);
|
||||
if (ret)
|
||||
goto destroy_end_ft;
|
||||
|
||||
@@ -738,11 +541,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
|
||||
return 0;
|
||||
|
||||
destroy_rtc:
|
||||
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
|
||||
+ hws_matcher_destroy_rtc(matcher);
|
||||
destroy_end_ft:
|
||||
hws_matcher_destroy_end_ft(matcher);
|
||||
-unbind_at:
|
||||
- hws_matcher_unbind_at(matcher);
|
||||
unbind_mt:
|
||||
hws_matcher_unbind_mt(matcher);
|
||||
return ret;
|
||||
@@ -750,11 +551,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
|
||||
|
||||
static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
- hws_matcher_resize_uninit(matcher);
|
||||
hws_matcher_disconnect(matcher);
|
||||
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
|
||||
+ hws_matcher_destroy_rtc(matcher);
|
||||
hws_matcher_destroy_end_ft(matcher);
|
||||
- hws_matcher_unbind_at(matcher);
|
||||
hws_matcher_unbind_mt(matcher);
|
||||
}
|
||||
|
||||
@@ -776,8 +575,6 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher)
|
||||
if (!col_matcher)
|
||||
return -ENOMEM;
|
||||
|
||||
- INIT_LIST_HEAD(&col_matcher->resize_data);
|
||||
-
|
||||
col_matcher->tbl = matcher->tbl;
|
||||
col_matcher->mt = matcher->mt;
|
||||
col_matcher->at = matcher->at;
|
||||
@@ -831,8 +628,6 @@ static int hws_matcher_init(struct mlx5hws_matcher *matcher)
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
int ret;
|
||||
|
||||
- INIT_LIST_HEAD(&matcher->resize_data);
|
||||
-
|
||||
mutex_lock(&ctx->ctrl_lock);
|
||||
|
||||
/* Allocate matcher resource and connect to the packet pipe */
|
||||
@@ -889,16 +684,12 @@ static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher)
|
||||
}
|
||||
|
||||
int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
- struct mlx5hws_action_template *at,
|
||||
- bool *need_rehash)
|
||||
+ struct mlx5hws_action_template *at)
|
||||
{
|
||||
bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
|
||||
- struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
u32 required_stes;
|
||||
int ret;
|
||||
|
||||
- *need_rehash = false;
|
||||
-
|
||||
if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) {
|
||||
ret = hws_matcher_grow_at_array(matcher);
|
||||
if (ret)
|
||||
@@ -916,11 +707,8 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
return ret;
|
||||
|
||||
required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
|
||||
- if (matcher->action_ste.max_stes < required_stes) {
|
||||
- mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n",
|
||||
- required_stes, matcher->action_ste.max_stes);
|
||||
- *need_rehash = true;
|
||||
- }
|
||||
+ if (matcher->num_of_action_stes < required_stes)
|
||||
+ matcher->num_of_action_stes = required_stes;
|
||||
|
||||
matcher->at[matcher->num_of_at] = *at;
|
||||
matcher->num_of_at += 1;
|
||||
@@ -1102,7 +890,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- if (src_matcher->action_ste.max_stes > dst_matcher->action_ste.max_stes) {
|
||||
+ if (src_matcher->num_of_action_stes > dst_matcher->num_of_action_stes) {
|
||||
mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1131,10 +919,6 @@ int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher,
|
||||
|
||||
src_matcher->resize_dst = dst_matcher;
|
||||
|
||||
- ret = hws_matcher_resize_init(src_matcher);
|
||||
- if (ret)
|
||||
- src_matcher->resize_dst = NULL;
|
||||
-
|
||||
out:
|
||||
mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock);
|
||||
return ret;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
index 0450b6175ac9..bad1fa8f77fd 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
@@ -50,23 +50,6 @@ struct mlx5hws_matcher_match_ste {
|
||||
struct mlx5hws_pool *pool;
|
||||
};
|
||||
|
||||
-struct mlx5hws_matcher_action_ste {
|
||||
- struct mlx5hws_pool_chunk stc;
|
||||
- u32 rtc_0_id;
|
||||
- u32 rtc_1_id;
|
||||
- struct mlx5hws_pool *pool;
|
||||
- u8 max_stes;
|
||||
-};
|
||||
-
|
||||
-struct mlx5hws_matcher_resize_data {
|
||||
- struct mlx5hws_pool_chunk stc;
|
||||
- u32 rtc_0_id;
|
||||
- u32 rtc_1_id;
|
||||
- struct mlx5hws_pool *pool;
|
||||
- u8 max_stes;
|
||||
- struct list_head list_node;
|
||||
-};
|
||||
-
|
||||
struct mlx5hws_matcher {
|
||||
struct mlx5hws_table *tbl;
|
||||
struct mlx5hws_matcher_attr attr;
|
||||
@@ -75,15 +58,14 @@ struct mlx5hws_matcher {
|
||||
u8 num_of_at;
|
||||
u8 size_of_at_array;
|
||||
u8 num_of_mt;
|
||||
+ u8 num_of_action_stes;
|
||||
/* enum mlx5hws_matcher_flags */
|
||||
u8 flags;
|
||||
u32 end_ft_id;
|
||||
struct mlx5hws_matcher *col_matcher;
|
||||
struct mlx5hws_matcher *resize_dst;
|
||||
struct mlx5hws_matcher_match_ste match_ste;
|
||||
- struct mlx5hws_matcher_action_ste action_ste;
|
||||
struct list_head list_node;
|
||||
- struct list_head resize_data;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
index 8ed8a715a2eb..5121951f2778 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
@@ -399,14 +399,11 @@ int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher);
|
||||
*
|
||||
* @matcher: Matcher to attach the action template to.
|
||||
* @at: Action template to be attached to the matcher.
|
||||
- * @need_rehash: Output parameter that tells callers if the matcher needs to be
|
||||
- * rehashed.
|
||||
*
|
||||
* Return: Zero on success, non-zero otherwise.
|
||||
*/
|
||||
int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
|
||||
- struct mlx5hws_action_template *at,
|
||||
- bool *need_rehash);
|
||||
+ struct mlx5hws_action_template *at);
|
||||
|
||||
/**
|
||||
* mlx5hws_matcher_resize_set_target - Link two matchers and enable moving rules.
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
index 5b758467ed03..9e6f35d68445 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
@@ -203,7 +203,7 @@ static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule,
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
|
||||
rule->action_ste.ste.order =
|
||||
- ilog2(roundup_pow_of_two(matcher->action_ste.max_stes));
|
||||
+ ilog2(roundup_pow_of_two(matcher->num_of_action_stes));
|
||||
return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id],
|
||||
skip_rx, skip_tx,
|
||||
&rule->action_ste);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
254
SOURCES/1356-net-mlx5-hws-free-unused-action-ste-tables.patch
Normal file
254
SOURCES/1356-net-mlx5-hws-free-unused-action-ste-tables.patch
Normal file
@ -0,0 +1,254 @@
|
||||
From b2936ce02b8545dd8b6b4bc1a135ba7d19d63488 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:00 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Free unused action STE tables
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 864531ca2072c55ff00ba9dfd8c15cf0f576051b
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:41 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Free unused action STE tables
|
||||
|
||||
Periodically check for unused action STE tables and free their
|
||||
associated resources. In order to do this safely, add a per-queue lock
|
||||
to synchronize the garbage collect work with regular operations on
|
||||
steering rules.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-12-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
|
||||
index cb6ad8411631..5766a9c82f96 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
|
||||
@@ -159,6 +159,7 @@ hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem)
|
||||
|
||||
action_tbl->parent_elem = parent_elem;
|
||||
INIT_LIST_HEAD(&action_tbl->list_node);
|
||||
+ action_tbl->last_used = jiffies;
|
||||
list_add(&action_tbl->list_node, &parent_elem->available);
|
||||
parent_elem->log_sz = log_sz;
|
||||
|
||||
@@ -236,6 +237,8 @@ static int hws_action_ste_pool_init(struct mlx5hws_context *ctx,
|
||||
enum mlx5hws_pool_optimize opt;
|
||||
int err;
|
||||
|
||||
+ mutex_init(&pool->lock);
|
||||
+
|
||||
/* Rules which are added for both RX and TX must use the same action STE
|
||||
* indices for both. If we were to use a single table, then RX-only and
|
||||
* TX-only rules would waste the unused entries. Thus, we use separate
|
||||
@@ -247,6 +250,7 @@ static int hws_action_ste_pool_init(struct mlx5hws_context *ctx,
|
||||
opt);
|
||||
if (err)
|
||||
goto destroy_elems;
|
||||
+ pool->elems[opt].parent_pool = pool;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -267,6 +271,58 @@ static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool)
|
||||
hws_action_ste_pool_element_destroy(&pool->elems[opt]);
|
||||
}
|
||||
|
||||
+static void hws_action_ste_pool_element_collect_stale(
|
||||
+ struct mlx5hws_action_ste_pool_element *elem, struct list_head *cleanup)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_table *action_tbl, *p;
|
||||
+ unsigned long expire_time, now;
|
||||
+
|
||||
+ expire_time = secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS);
|
||||
+ now = jiffies;
|
||||
+
|
||||
+ list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) {
|
||||
+ if (mlx5hws_pool_full(action_tbl->pool) &&
|
||||
+ time_before(action_tbl->last_used + expire_time, now))
|
||||
+ list_move(&action_tbl->list_node, cleanup);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void hws_action_ste_table_cleanup_list(struct list_head *cleanup)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_table *action_tbl, *p;
|
||||
+
|
||||
+ list_for_each_entry_safe(action_tbl, p, cleanup, list_node)
|
||||
+ hws_action_ste_table_destroy(action_tbl);
|
||||
+}
|
||||
+
|
||||
+static void hws_action_ste_pool_cleanup(struct work_struct *work)
|
||||
+{
|
||||
+ enum mlx5hws_pool_optimize opt;
|
||||
+ struct mlx5hws_context *ctx;
|
||||
+ LIST_HEAD(cleanup);
|
||||
+ int i;
|
||||
+
|
||||
+ ctx = container_of(work, struct mlx5hws_context,
|
||||
+ action_ste_cleanup.work);
|
||||
+
|
||||
+ for (i = 0; i < ctx->queues; i++) {
|
||||
+ struct mlx5hws_action_ste_pool *p = &ctx->action_ste_pool[i];
|
||||
+
|
||||
+ mutex_lock(&p->lock);
|
||||
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE;
|
||||
+ opt < MLX5HWS_POOL_OPTIMIZE_MAX; opt++)
|
||||
+ hws_action_ste_pool_element_collect_stale(
|
||||
+ &p->elems[opt], &cleanup);
|
||||
+ mutex_unlock(&p->lock);
|
||||
+ }
|
||||
+
|
||||
+ hws_action_ste_table_cleanup_list(&cleanup);
|
||||
+
|
||||
+ schedule_delayed_work(&ctx->action_ste_cleanup,
|
||||
+ secs_to_jiffies(
|
||||
+ MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS));
|
||||
+}
|
||||
+
|
||||
int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx)
|
||||
{
|
||||
struct mlx5hws_action_ste_pool *pool;
|
||||
@@ -285,6 +341,12 @@ int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx)
|
||||
|
||||
ctx->action_ste_pool = pool;
|
||||
|
||||
+ INIT_DELAYED_WORK(&ctx->action_ste_cleanup,
|
||||
+ hws_action_ste_pool_cleanup);
|
||||
+ schedule_delayed_work(
|
||||
+ &ctx->action_ste_cleanup,
|
||||
+ secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS));
|
||||
+
|
||||
return 0;
|
||||
|
||||
free_pool:
|
||||
@@ -300,6 +362,8 @@ void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx)
|
||||
size_t queues = ctx->queues;
|
||||
int i;
|
||||
|
||||
+ cancel_delayed_work_sync(&ctx->action_ste_cleanup);
|
||||
+
|
||||
for (i = 0; i < queues; i++)
|
||||
hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]);
|
||||
|
||||
@@ -330,6 +394,7 @@ hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl,
|
||||
return err;
|
||||
|
||||
chunk->action_tbl = action_tbl;
|
||||
+ action_tbl->last_used = jiffies;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -346,6 +411,8 @@ int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
|
||||
if (skip_rx && skip_tx)
|
||||
return -EINVAL;
|
||||
|
||||
+ mutex_lock(&pool->lock);
|
||||
+
|
||||
elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx);
|
||||
|
||||
mlx5hws_dbg(elem->ctx,
|
||||
@@ -362,26 +429,39 @@ int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
|
||||
|
||||
if (!found) {
|
||||
action_tbl = hws_action_ste_table_alloc(elem);
|
||||
- if (IS_ERR(action_tbl))
|
||||
- return PTR_ERR(action_tbl);
|
||||
+ if (IS_ERR(action_tbl)) {
|
||||
+ err = PTR_ERR(action_tbl);
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
err = hws_action_ste_table_chunk_alloc(action_tbl, chunk);
|
||||
if (err)
|
||||
- return err;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
if (mlx5hws_pool_empty(action_tbl->pool))
|
||||
list_move(&action_tbl->list_node, &elem->full);
|
||||
|
||||
- return 0;
|
||||
+ err = 0;
|
||||
+
|
||||
+out:
|
||||
+ mutex_unlock(&pool->lock);
|
||||
+
|
||||
+ return err;
|
||||
}
|
||||
|
||||
void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk)
|
||||
{
|
||||
+ struct mutex *lock = &chunk->action_tbl->parent_elem->parent_pool->lock;
|
||||
+
|
||||
mlx5hws_dbg(chunk->action_tbl->pool->ctx,
|
||||
"Freeing action STEs offset %d order %d\n",
|
||||
chunk->ste.offset, chunk->ste.order);
|
||||
+
|
||||
+ mutex_lock(lock);
|
||||
mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste);
|
||||
+ chunk->action_tbl->last_used = jiffies;
|
||||
list_move(&chunk->action_tbl->list_node,
|
||||
&chunk->action_tbl->parent_elem->available);
|
||||
+ mutex_unlock(lock);
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
|
||||
index 2de660a63223..a8ba97359e31 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
|
||||
@@ -8,6 +8,9 @@
|
||||
#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1
|
||||
#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20
|
||||
|
||||
+#define MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS 300
|
||||
+#define MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS 300
|
||||
+
|
||||
struct mlx5hws_action_ste_pool_element;
|
||||
|
||||
struct mlx5hws_action_ste_table {
|
||||
@@ -19,10 +22,12 @@ struct mlx5hws_action_ste_table {
|
||||
u32 rtc_0_id;
|
||||
u32 rtc_1_id;
|
||||
struct list_head list_node;
|
||||
+ unsigned long last_used;
|
||||
};
|
||||
|
||||
struct mlx5hws_action_ste_pool_element {
|
||||
struct mlx5hws_context *ctx;
|
||||
+ struct mlx5hws_action_ste_pool *parent_pool;
|
||||
size_t log_sz; /* Size of the largest table so far. */
|
||||
enum mlx5hws_pool_optimize opt;
|
||||
struct list_head available;
|
||||
@@ -33,6 +38,12 @@ struct mlx5hws_action_ste_pool_element {
|
||||
* per queue.
|
||||
*/
|
||||
struct mlx5hws_action_ste_pool {
|
||||
+ /* Protects the entire pool. We have one pool per queue and only one
|
||||
+ * operation can be active per rule at a given time. Thus this lock
|
||||
+ * protects solely against concurrent garbage collection and we expect
|
||||
+ * very little contention.
|
||||
+ */
|
||||
+ struct mutex lock;
|
||||
struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX];
|
||||
};
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
index e987e93bbc6e..3f8938c73dc0 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
|
||||
@@ -40,6 +40,7 @@ struct mlx5hws_context {
|
||||
u32 pd_num;
|
||||
struct mlx5hws_pool *stc_pool;
|
||||
struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */
|
||||
+ struct delayed_work action_ste_cleanup;
|
||||
struct mlx5hws_context_common_res common_res;
|
||||
struct mlx5hws_pattern_cache *pattern_cache;
|
||||
struct mlx5hws_definer_cache *definer_cache;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,99 @@
|
||||
From d1985c5a5885ee6fa478036997488534d181983c Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:01 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Export action STE tables to debugfs
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 3db55f8cc8d329a97e06fb44347b64a0ca44e780
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Thu Apr 10 22:17:42 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Export action STE tables to debugfs
|
||||
|
||||
Introduce a new type of dump object and dump all action STE tables,
|
||||
along with information on their RTCs and STEs.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Hamdan Agbariya <hamdani@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
|
||||
Link: https://patch.msgid.link/1744312662-356571-13-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
index 38f75dec9cfc..91568d6c1dac 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
|
||||
@@ -387,10 +387,41 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void
|
||||
+hws_debug_dump_action_ste_table(struct seq_file *f,
|
||||
+ struct mlx5hws_action_ste_table *action_tbl)
|
||||
+{
|
||||
+ int ste_0_id = mlx5hws_pool_get_base_id(action_tbl->pool);
|
||||
+ int ste_1_id = mlx5hws_pool_get_base_mirror_id(action_tbl->pool);
|
||||
+
|
||||
+ seq_printf(f, "%d,0x%llx,%d,%d,%d,%d\n",
|
||||
+ MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE,
|
||||
+ HWS_PTR_TO_ID(action_tbl),
|
||||
+ action_tbl->rtc_0_id, ste_0_id,
|
||||
+ action_tbl->rtc_1_id, ste_1_id);
|
||||
+}
|
||||
+
|
||||
+static void hws_debug_dump_action_ste_pool(struct seq_file *f,
|
||||
+ struct mlx5hws_action_ste_pool *pool)
|
||||
+{
|
||||
+ struct mlx5hws_action_ste_table *action_tbl;
|
||||
+ enum mlx5hws_pool_optimize opt;
|
||||
+
|
||||
+ mutex_lock(&pool->lock);
|
||||
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX;
|
||||
+ opt++) {
|
||||
+ list_for_each_entry(action_tbl, &pool->elems[opt].available,
|
||||
+ list_node) {
|
||||
+ hws_debug_dump_action_ste_table(f, action_tbl);
|
||||
+ }
|
||||
+ }
|
||||
+ mutex_unlock(&pool->lock);
|
||||
+}
|
||||
+
|
||||
static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx)
|
||||
{
|
||||
struct mlx5hws_table *tbl;
|
||||
- int ret;
|
||||
+ int ret, i;
|
||||
|
||||
ret = hws_debug_dump_context_info(f, ctx);
|
||||
if (ret)
|
||||
@@ -410,6 +441,9 @@ static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ct
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ for (i = 0; i < ctx->queues; i++)
|
||||
+ hws_debug_dump_action_ste_pool(f, &ctx->action_ste_pool[i]);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
|
||||
index e44e7ae28f93..89c396f9f266 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
|
||||
@@ -26,6 +26,8 @@ enum mlx5hws_debug_res_type {
|
||||
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205,
|
||||
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206,
|
||||
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207,
|
||||
+
|
||||
+ MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE = 4300,
|
||||
};
|
||||
|
||||
static inline u64
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,100 @@
|
||||
From e0fb28731ba130ec0f45f724aa5c27a9d49f363d Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:01 -0400
|
||||
Subject: [PATCH] net/mlx5e: ethtool: Fix formatting of
|
||||
ptp_rq0_csum_complete_tail_slow
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit cfba1d1b61ae3f32e4bc06e9860711a4488d98b7
|
||||
Author: Kees Cook <kees@kernel.org>
|
||||
Date: Tue Apr 15 19:01:14 2025 -0700
|
||||
|
||||
net/mlx5e: ethtool: Fix formatting of ptp_rq0_csum_complete_tail_slow
|
||||
|
||||
The new GCC 15 warning -Wunterminated-string-initialization reports:
|
||||
|
||||
In file included from drivers/net/ethernet/mellanox/mlx5/core/en.h:55,
|
||||
from drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:34:
|
||||
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h:57:46: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization]
|
||||
57 | #define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
|
||||
| ^~~~~~~~~~~
|
||||
drivers/net/ethernet/mellanox/mlx5/core/en_stats.c:2279:11: note: in expansion of macro 'MLX5E_DECLARE_PTP_RQ_STAT'
|
||||
2279 | { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
|
||||
| ^~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This stat string is being used in ethtool_sprintf(), so it must be a
|
||||
valid NUL-terminated string. Currently the string lacks the final NUL
|
||||
byte (as GCC warns), but by absolute luck, the next byte in memory is a
|
||||
space (decimal 32) followed by a NUL. "format" is immediately followed
|
||||
by little-endian size_t:
|
||||
|
||||
struct counter_desc {
|
||||
char format[32]; /* 0 32 */
|
||||
size_t offset; /* 32 8 */
|
||||
};
|
||||
|
||||
The "offset" member is populated by the stats member offset:
|
||||
|
||||
#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
|
||||
|
||||
which for this struct mlx5e_rq_stats member, csum_complete_tail_slow, is
|
||||
32, or space, and then the rest of the "offset" bytes are NULs.
|
||||
|
||||
struct mlx5e_rq_stats {
|
||||
...
|
||||
u64 csum_complete_tail_slow; /* 32 8 */
|
||||
|
||||
The use of vsnprintf(), within ethtool_sprintf(), reads past the end of
|
||||
"format" and sees the format string as "ptp_rq%d_csum_complete_tail_slow ",
|
||||
with %d getting resolved by MLX5E_PTP_CHANNEL_IX (value 0):
|
||||
|
||||
ethtool_sprintf(data, ptp_rq_stats_desc[i].format,
|
||||
MLX5E_PTP_CHANNEL_IX);
|
||||
|
||||
With an output result of "ptp_rq0_csum_complete_tail_slow", which gets
|
||||
precisely truncated to 31 characters with a trailing NUL.
|
||||
|
||||
So, instead of accidentally getting this correct due to the NUL bytes
|
||||
at the end of the size_t that happens to follow the format string, just
|
||||
make the string initializer 1 byte shorter by replacing "%d" with "0",
|
||||
since MLX5E_PTP_CHANNEL_IX is already hard-coded. This results in no
|
||||
initializer truncation and no need to call sprintf().
|
||||
|
||||
Signed-off-by: Kees Cook <kees@kernel.org>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250416020109.work.297-kees@kernel.org
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
|
||||
index 1c121b435016..19664fa7f217 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
|
||||
@@ -2424,8 +2424,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
|
||||
}
|
||||
if (priv->rx_ptp_opened) {
|
||||
for (i = 0; i < NUM_PTP_RQ_STATS; i++)
|
||||
- ethtool_sprintf(data, ptp_rq_stats_desc[i].format,
|
||||
- MLX5E_PTP_CHANNEL_IX);
|
||||
+ ethtool_puts(data, ptp_rq_stats_desc[i].format);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
|
||||
index 8de6fcbd3a03..def5dea1463d 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
|
||||
@@ -54,7 +54,7 @@
|
||||
#define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld)
|
||||
#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
|
||||
#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
|
||||
-#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
|
||||
+#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq0_"#fld, offsetof(type, fld)
|
||||
|
||||
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
From 3a54d4daa0cfe0a44c2f8cdc68d5b0c8b277a990 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:01 -0400
|
||||
Subject: [PATCH] net/mlx5: Fix spelling mistakes in mlx5_core_dbg message and
|
||||
comments
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 1e36473215297708dbe144c65b9f242c6e604520
|
||||
Author: Colin Ian King <colin.i.king@gmail.com>
|
||||
Date: Fri Apr 18 14:57:03 2025 +0100
|
||||
|
||||
net/mlx5: Fix spelling mistakes in mlx5_core_dbg message and comments
|
||||
|
||||
There is a spelling mistake in a mlx5_core_dbg and two spelling mistakes
|
||||
in comment blocks. Fix them.
|
||||
|
||||
Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
|
||||
Acked-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250418135703.542722-1-colin.i.king@gmail.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
|
||||
index 2c5f850c31f6..40024cfa3099 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
|
||||
@@ -148,7 +148,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
|
||||
* Free the IRQ and other resources such as rmap from the system.
|
||||
* BUT doesn't free or remove reference from mlx5.
|
||||
* This function is very important for the shutdown flow, where we need to
|
||||
- * cleanup system resoruces but keep mlx5 objects alive,
|
||||
+ * cleanup system resources but keep mlx5 objects alive,
|
||||
* see mlx5_irq_table_free_irqs().
|
||||
*/
|
||||
static void mlx5_system_free_irq(struct mlx5_irq *irq)
|
||||
@@ -588,7 +588,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
|
||||
struct mlx5_irq *irq;
|
||||
unsigned long index;
|
||||
|
||||
- /* There are cases in which we are destrying the irq_table before
|
||||
+ /* There are cases in which we are destroying the irq_table before
|
||||
* freeing all the IRQs, fast teardown for example. Hence, free the irqs
|
||||
* which might not have been freed.
|
||||
*/
|
||||
@@ -617,7 +617,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec,
|
||||
if (!mlx5_sf_max_functions(dev))
|
||||
return 0;
|
||||
if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
|
||||
- mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
|
||||
+ mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
136
SOURCES/1360-net-mlx5-hws-fix-ip-version-decision.patch
Normal file
136
SOURCES/1360-net-mlx5-hws-fix-ip-version-decision.patch
Normal file
@ -0,0 +1,136 @@
|
||||
From 988625f598a4722c53b34743d5ddef5d48a46a20 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:02 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Fix IP version decision
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 5f2f8d8b6800e4fc760c2eccec9b2bd2cacf80cf
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue Apr 22 12:25:38 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Fix IP version decision
|
||||
|
||||
Unify the check for IP version when creating a definer. A given matcher
|
||||
is deemed to match on IPv6 if any of the higher order (>31) bits of
|
||||
source or destination address mask are set.
|
||||
|
||||
A single packet cannot mix IP versions between source and destination
|
||||
addresses, so it makes no sense that they would be decided on
|
||||
independently.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250422092540.182091-2-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
index c8cc0c8115f5..5257e706dde2 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
@@ -509,9 +509,9 @@ static int
|
||||
hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
u32 *match_param)
|
||||
{
|
||||
- bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set;
|
||||
struct mlx5hws_definer_fc *fc = cd->fc;
|
||||
struct mlx5hws_definer_fc *curr_fc;
|
||||
+ bool is_ipv6, smac_set, dmac_set;
|
||||
u32 *s_ipv6, *d_ipv6;
|
||||
|
||||
if (HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type, 0x2) ||
|
||||
@@ -570,10 +570,10 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout);
|
||||
|
||||
/* Assume IPv6 is used if ipv6 bits are set */
|
||||
- is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2];
|
||||
- is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
+ is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] ||
|
||||
+ d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
|
||||
- if (is_s_ipv6) {
|
||||
+ if (is_ipv6) {
|
||||
/* Handle IPv6 source address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O,
|
||||
outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96,
|
||||
@@ -587,13 +587,6 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_O,
|
||||
outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
ipv6_src_outer.ipv6_address_31_0);
|
||||
- } else {
|
||||
- /* Handle IPv4 source address */
|
||||
- HWS_SET_HDR(fc, match_param, IPV4_SRC_O,
|
||||
- outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
- ipv4_src_dest_outer.source_address);
|
||||
- }
|
||||
- if (is_d_ipv6) {
|
||||
/* Handle IPv6 destination address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_O,
|
||||
outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96,
|
||||
@@ -608,6 +601,10 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
ipv6_dst_outer.ipv6_address_31_0);
|
||||
} else {
|
||||
+ /* Handle IPv4 source address */
|
||||
+ HWS_SET_HDR(fc, match_param, IPV4_SRC_O,
|
||||
+ outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
+ ipv4_src_dest_outer.source_address);
|
||||
/* Handle IPv4 destination address */
|
||||
HWS_SET_HDR(fc, match_param, IPV4_DST_O,
|
||||
outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
@@ -665,9 +662,9 @@ static int
|
||||
hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
u32 *match_param)
|
||||
{
|
||||
- bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set;
|
||||
struct mlx5hws_definer_fc *fc = cd->fc;
|
||||
struct mlx5hws_definer_fc *curr_fc;
|
||||
+ bool is_ipv6, smac_set, dmac_set;
|
||||
u32 *s_ipv6, *d_ipv6;
|
||||
|
||||
if (HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type, 0x2) ||
|
||||
@@ -728,10 +725,10 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
inner_headers.dst_ipv4_dst_ipv6.ipv6_layout);
|
||||
|
||||
/* Assume IPv6 is used if ipv6 bits are set */
|
||||
- is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2];
|
||||
- is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
+ is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] ||
|
||||
+ d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
|
||||
- if (is_s_ipv6) {
|
||||
+ if (is_ipv6) {
|
||||
/* Handle IPv6 source address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I,
|
||||
inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96,
|
||||
@@ -745,13 +742,6 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_I,
|
||||
inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
ipv6_src_inner.ipv6_address_31_0);
|
||||
- } else {
|
||||
- /* Handle IPv4 source address */
|
||||
- HWS_SET_HDR(fc, match_param, IPV4_SRC_I,
|
||||
- inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
- ipv4_src_dest_inner.source_address);
|
||||
- }
|
||||
- if (is_d_ipv6) {
|
||||
/* Handle IPv6 destination address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_I,
|
||||
inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96,
|
||||
@@ -766,6 +756,10 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
ipv6_dst_inner.ipv6_address_31_0);
|
||||
} else {
|
||||
+ /* Handle IPv4 source address */
|
||||
+ HWS_SET_HDR(fc, match_param, IPV4_SRC_I,
|
||||
+ inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
+ ipv4_src_dest_inner.source_address);
|
||||
/* Handle IPv4 destination address */
|
||||
HWS_SET_HDR(fc, match_param, IPV4_DST_I,
|
||||
inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
127
SOURCES/1361-net-mlx5-hws-harden-ip-version-definer-checks.patch
Normal file
127
SOURCES/1361-net-mlx5-hws-harden-ip-version-definer-checks.patch
Normal file
@ -0,0 +1,127 @@
|
||||
From 024b08ee6e9f4a7d00dcdbde0e76f34bebc32c27 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:02 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Harden IP version definer checks
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 6991a975e416154576b0f5f06256aec13e23b0a7
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue Apr 22 12:25:39 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Harden IP version definer checks
|
||||
|
||||
Replicate some sanity checks that firmware does, since hardware steering
|
||||
does not go through firmware.
|
||||
|
||||
When creating a definer, disallow matching on IP addresses without also
|
||||
matching on IP version. The latter can be satisfied by matching either
|
||||
on the version field in the IP header, or on the ethertype field.
|
||||
|
||||
Also refuse to match IPv4 IHL alongside IPv6.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250422092540.182091-3-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
index 5257e706dde2..1061a46811ac 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
@@ -509,9 +509,9 @@ static int
|
||||
hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
u32 *match_param)
|
||||
{
|
||||
+ bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set;
|
||||
struct mlx5hws_definer_fc *fc = cd->fc;
|
||||
struct mlx5hws_definer_fc *curr_fc;
|
||||
- bool is_ipv6, smac_set, dmac_set;
|
||||
u32 *s_ipv6, *d_ipv6;
|
||||
|
||||
if (HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type, 0x2) ||
|
||||
@@ -521,6 +521,20 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ip_addr_set = HWS_IS_FLD_SET_SZ(match_param,
|
||||
+ outer_headers.src_ipv4_src_ipv6,
|
||||
+ 0x80) ||
|
||||
+ HWS_IS_FLD_SET_SZ(match_param,
|
||||
+ outer_headers.dst_ipv4_dst_ipv6, 0x80);
|
||||
+ ip_ver_set = HWS_IS_FLD_SET(match_param, outer_headers.ip_version) ||
|
||||
+ HWS_IS_FLD_SET(match_param, outer_headers.ethertype);
|
||||
+
|
||||
+ if (ip_addr_set && !ip_ver_set) {
|
||||
+ mlx5hws_err(cd->ctx,
|
||||
+ "Unsupported match on IP address without version or ethertype\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
/* L2 Check ethertype */
|
||||
HWS_SET_HDR(fc, match_param, ETH_TYPE_O,
|
||||
outer_headers.ethertype,
|
||||
@@ -573,6 +587,12 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] ||
|
||||
d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
|
||||
+ /* IHL is an IPv4-specific field. */
|
||||
+ if (is_ipv6 && HWS_IS_FLD_SET(match_param, outer_headers.ipv4_ihl)) {
|
||||
+ mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
if (is_ipv6) {
|
||||
/* Handle IPv6 source address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O,
|
||||
@@ -662,9 +682,9 @@ static int
|
||||
hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
u32 *match_param)
|
||||
{
|
||||
+ bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set;
|
||||
struct mlx5hws_definer_fc *fc = cd->fc;
|
||||
struct mlx5hws_definer_fc *curr_fc;
|
||||
- bool is_ipv6, smac_set, dmac_set;
|
||||
u32 *s_ipv6, *d_ipv6;
|
||||
|
||||
if (HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type, 0x2) ||
|
||||
@@ -674,6 +694,20 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
+ ip_addr_set = HWS_IS_FLD_SET_SZ(match_param,
|
||||
+ inner_headers.src_ipv4_src_ipv6,
|
||||
+ 0x80) ||
|
||||
+ HWS_IS_FLD_SET_SZ(match_param,
|
||||
+ inner_headers.dst_ipv4_dst_ipv6, 0x80);
|
||||
+ ip_ver_set = HWS_IS_FLD_SET(match_param, inner_headers.ip_version) ||
|
||||
+ HWS_IS_FLD_SET(match_param, inner_headers.ethertype);
|
||||
+
|
||||
+ if (ip_addr_set && !ip_ver_set) {
|
||||
+ mlx5hws_err(cd->ctx,
|
||||
+ "Unsupported match on IP address without version or ethertype\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
/* L2 Check ethertype */
|
||||
HWS_SET_HDR(fc, match_param, ETH_TYPE_I,
|
||||
inner_headers.ethertype,
|
||||
@@ -728,6 +762,12 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd,
|
||||
is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] ||
|
||||
d_ipv6[0] || d_ipv6[1] || d_ipv6[2];
|
||||
|
||||
+ /* IHL is an IPv4-specific field. */
|
||||
+ if (is_ipv6 && HWS_IS_FLD_SET(match_param, inner_headers.ipv4_ihl)) {
|
||||
+ mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
if (is_ipv6) {
|
||||
/* Handle IPv6 source address */
|
||||
HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,256 @@
|
||||
From c0c4826b9a633587cd14595358b62c40a3672204 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:02 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Disallow matcher IP version mixing
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit f41f3edf0b15d7ce0b0f71c00a6125e8d7ca735f
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue Apr 22 12:25:40 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Disallow matcher IP version mixing
|
||||
|
||||
Signal clearly to the user, via an error, that mixing IPv4 and IPv6
|
||||
rules in the same matcher is not supported. Previously such cases
|
||||
silently failed by adding a rule that did not work correctly.
|
||||
|
||||
Rules can specify an IP version by one of two fields: IP version or
|
||||
ethertype. At matcher creation, store whether the template matches on
|
||||
any of these two fields. If yes, inspect each rule for its corresponding
|
||||
match value and store the IP version inside the matcher to guard against
|
||||
inconsistencies with subsequent rules.
|
||||
|
||||
Furthermore, also check rules for internal consistency, i.e. verify that
|
||||
the ethertype and IP version match values do not contradict each other.
|
||||
|
||||
The logic applies to inner and outer headers independently, to account
|
||||
for tunneling.
|
||||
|
||||
Rules that do not match on IP addresses are not affected.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250422092540.182091-4-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 716502732d3d..5b0c1623499b 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -385,6 +385,30 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void hws_matcher_set_ip_version_match(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < matcher->mt->fc_sz; i++) {
|
||||
+ switch (matcher->mt->fc[i].fname) {
|
||||
+ case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O:
|
||||
+ matcher->matches_outer_ethertype = 1;
|
||||
+ break;
|
||||
+ case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O:
|
||||
+ matcher->matches_outer_ip_version = 1;
|
||||
+ break;
|
||||
+ case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I:
|
||||
+ matcher->matches_inner_ethertype = 1;
|
||||
+ break;
|
||||
+ case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I:
|
||||
+ matcher->matches_inner_ip_version = 1;
|
||||
+ break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
@@ -401,6 +425,8 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
|
||||
}
|
||||
}
|
||||
|
||||
+ hws_matcher_set_ip_version_match(matcher);
|
||||
+
|
||||
/* Create an STE pool per matcher*/
|
||||
pool_attr.table_type = matcher->tbl->type;
|
||||
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
index bad1fa8f77fd..8e95158a66b5 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
@@ -50,6 +50,12 @@ struct mlx5hws_matcher_match_ste {
|
||||
struct mlx5hws_pool *pool;
|
||||
};
|
||||
|
||||
+enum {
|
||||
+ MLX5HWS_MATCHER_IPV_UNSET = 0,
|
||||
+ MLX5HWS_MATCHER_IPV_4 = 1,
|
||||
+ MLX5HWS_MATCHER_IPV_6 = 2,
|
||||
+};
|
||||
+
|
||||
struct mlx5hws_matcher {
|
||||
struct mlx5hws_table *tbl;
|
||||
struct mlx5hws_matcher_attr attr;
|
||||
@@ -61,6 +67,12 @@ struct mlx5hws_matcher {
|
||||
u8 num_of_action_stes;
|
||||
/* enum mlx5hws_matcher_flags */
|
||||
u8 flags;
|
||||
+ u8 matches_outer_ethertype:1;
|
||||
+ u8 matches_outer_ip_version:1;
|
||||
+ u8 matches_inner_ethertype:1;
|
||||
+ u8 matches_inner_ip_version:1;
|
||||
+ u8 outer_ip_version:2;
|
||||
+ u8 inner_ip_version:2;
|
||||
u32 end_ft_id;
|
||||
struct mlx5hws_matcher *col_matcher;
|
||||
struct mlx5hws_matcher *resize_dst;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
index 9e6f35d68445..5342a4cc7194 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
|
||||
@@ -655,6 +655,124 @@ int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static u8 hws_rule_ethertype_to_matcher_ipv(u32 ethertype)
|
||||
+{
|
||||
+ switch (ethertype) {
|
||||
+ case ETH_P_IP:
|
||||
+ return MLX5HWS_MATCHER_IPV_4;
|
||||
+ case ETH_P_IPV6:
|
||||
+ return MLX5HWS_MATCHER_IPV_6;
|
||||
+ default:
|
||||
+ return MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static u8 hws_rule_ip_version_to_matcher_ipv(u32 ip_version)
|
||||
+{
|
||||
+ switch (ip_version) {
|
||||
+ case 4:
|
||||
+ return MLX5HWS_MATCHER_IPV_4;
|
||||
+ case 6:
|
||||
+ return MLX5HWS_MATCHER_IPV_6;
|
||||
+ default:
|
||||
+ return MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int hws_rule_check_outer_ip_version(struct mlx5hws_matcher *matcher,
|
||||
+ u32 *match_param)
|
||||
+{
|
||||
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
+ u8 outer_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ u8 outer_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ u8 outer_ipv, ver;
|
||||
+
|
||||
+ if (matcher->matches_outer_ethertype) {
|
||||
+ ver = MLX5_GET(fte_match_param, match_param,
|
||||
+ outer_headers.ethertype);
|
||||
+ outer_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver);
|
||||
+ }
|
||||
+ if (matcher->matches_outer_ip_version) {
|
||||
+ ver = MLX5_GET(fte_match_param, match_param,
|
||||
+ outer_headers.ip_version);
|
||||
+ outer_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver);
|
||||
+ }
|
||||
+
|
||||
+ if (outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ outer_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ outer_ipv_ether != outer_ipv_ip) {
|
||||
+ mlx5hws_err(ctx, "Rule matches on inconsistent outer ethertype and ip version\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ outer_ipv = outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ?
|
||||
+ outer_ipv_ether : outer_ipv_ip;
|
||||
+ if (outer_ipv != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ matcher->outer_ip_version != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ outer_ipv != matcher->outer_ip_version) {
|
||||
+ mlx5hws_err(ctx, "Matcher and rule disagree on outer IP version\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ matcher->outer_ip_version = outer_ipv;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hws_rule_check_inner_ip_version(struct mlx5hws_matcher *matcher,
|
||||
+ u32 *match_param)
|
||||
+{
|
||||
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
|
||||
+ u8 inner_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ u8 inner_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET;
|
||||
+ u8 inner_ipv, ver;
|
||||
+
|
||||
+ if (matcher->matches_inner_ethertype) {
|
||||
+ ver = MLX5_GET(fte_match_param, match_param,
|
||||
+ inner_headers.ethertype);
|
||||
+ inner_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver);
|
||||
+ }
|
||||
+ if (matcher->matches_inner_ip_version) {
|
||||
+ ver = MLX5_GET(fte_match_param, match_param,
|
||||
+ inner_headers.ip_version);
|
||||
+ inner_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver);
|
||||
+ }
|
||||
+
|
||||
+ if (inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ inner_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ inner_ipv_ether != inner_ipv_ip) {
|
||||
+ mlx5hws_err(ctx, "Rule matches on inconsistent inner ethertype and ip version\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+
|
||||
+ inner_ipv = inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ?
|
||||
+ inner_ipv_ether : inner_ipv_ip;
|
||||
+ if (inner_ipv != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ matcher->inner_ip_version != MLX5HWS_MATCHER_IPV_UNSET &&
|
||||
+ inner_ipv != matcher->inner_ip_version) {
|
||||
+ mlx5hws_err(ctx, "Matcher and rule disagree on inner IP version\n");
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ matcher->inner_ip_version = inner_ipv;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hws_rule_check_ip_version(struct mlx5hws_matcher *matcher,
|
||||
+ u32 *match_param)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = hws_rule_check_outer_ip_version(matcher, match_param);
|
||||
+ if (unlikely(ret))
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = hws_rule_check_inner_ip_version(matcher, match_param);
|
||||
+ if (unlikely(ret))
|
||||
+ return ret;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
int mlx5hws_rule_create(struct mlx5hws_matcher *matcher,
|
||||
u8 mt_idx,
|
||||
u32 *match_param,
|
||||
@@ -665,6 +783,10 @@ int mlx5hws_rule_create(struct mlx5hws_matcher *matcher,
|
||||
{
|
||||
int ret;
|
||||
|
||||
+ ret = hws_rule_check_ip_version(matcher, match_param);
|
||||
+ if (unlikely(ret))
|
||||
+ return ret;
|
||||
+
|
||||
rule_handle->matcher = matcher;
|
||||
|
||||
ret = hws_rule_enqueue_precheck_create(rule_handle, attr);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,142 @@
|
||||
From 85e0a9a7588dbdecc6ff8e2facde4a75b8ff4299 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:02 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Fix error flow upon firmware failure for RQ
|
||||
destruction
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 5d2ea5aebbb2f3ebde4403f9c55b2b057e5dd2d6
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Mon Apr 28 14:34:07 2025 +0300
|
||||
|
||||
RDMA/mlx5: Fix error flow upon firmware failure for RQ destruction
|
||||
|
||||
Upon RQ destruction if the firmware command fails which is the
|
||||
last resource to be destroyed some SW resources were already cleaned
|
||||
regardless of the failure.
|
||||
|
||||
Now properly rollback the object to its original state upon such failure.
|
||||
|
||||
In order to avoid a use-after free in case someone tries to destroy the
|
||||
object again, which results in the following kernel trace:
|
||||
refcount_t: underflow; use-after-free.
|
||||
WARNING: CPU: 0 PID: 37589 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148
|
||||
Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) rfkill mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) psample mlxfw(OE) mlx_compat(OE) macsec tls pci_hyperv_intf sunrpc vfat fat virtio_net net_failover failover fuse loop nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_console virtio_gpu virtio_blk virtio_dma_buf virtio_mmio dm_mirror dm_region_hash dm_log dm_mod xpmem(OE)
|
||||
CPU: 0 UID: 0 PID: 37589 Comm: python3 Kdump: loaded Tainted: G OE ------- --- 6.12.0-54.el10.aarch64 #1
|
||||
Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
|
||||
Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
|
||||
pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
pc : refcount_warn_saturate+0xf4/0x148
|
||||
lr : refcount_warn_saturate+0xf4/0x148
|
||||
sp : ffff80008b81b7e0
|
||||
x29: ffff80008b81b7e0 x28: ffff000133d51600 x27: 0000000000000001
|
||||
x26: 0000000000000000 x25: 00000000ffffffea x24: ffff00010ae80f00
|
||||
x23: ffff00010ae80f80 x22: ffff0000c66e5d08 x21: 0000000000000000
|
||||
x20: ffff0000c66e0000 x19: ffff00010ae80340 x18: 0000000000000006
|
||||
x17: 0000000000000000 x16: 0000000000000020 x15: ffff80008b81b37f
|
||||
x14: 0000000000000000 x13: 2e656572662d7265 x12: ffff80008283ef78
|
||||
x11: ffff80008257efd0 x10: ffff80008283efd0 x9 : ffff80008021ed90
|
||||
x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff
|
||||
x5 : ffff0001fb8e3408 x4 : 0000000000000000 x3 : ffff800179993000
|
||||
x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000133d51600
|
||||
Call trace:
|
||||
refcount_warn_saturate+0xf4/0x148
|
||||
mlx5_core_put_rsc+0x88/0xa0 [mlx5_ib]
|
||||
mlx5_core_destroy_rq_tracked+0x64/0x98 [mlx5_ib]
|
||||
mlx5_ib_destroy_wq+0x34/0x80 [mlx5_ib]
|
||||
ib_destroy_wq_user+0x30/0xc0 [ib_core]
|
||||
uverbs_free_wq+0x28/0x58 [ib_uverbs]
|
||||
destroy_hw_idr_uobject+0x34/0x78 [ib_uverbs]
|
||||
uverbs_destroy_uobject+0x48/0x240 [ib_uverbs]
|
||||
__uverbs_cleanup_ufile+0xd4/0x1a8 [ib_uverbs]
|
||||
uverbs_destroy_ufile_hw+0x48/0x120 [ib_uverbs]
|
||||
ib_uverbs_close+0x2c/0x100 [ib_uverbs]
|
||||
__fput+0xd8/0x2f0
|
||||
__fput_sync+0x50/0x70
|
||||
__arm64_sys_close+0x40/0x90
|
||||
invoke_syscall.constprop.0+0x74/0xd0
|
||||
do_el0_svc+0x48/0xe8
|
||||
el0_svc+0x44/0x1d0
|
||||
el0t_64_sync_handler+0x120/0x130
|
||||
el0t_64_sync+0x1a4/0x1a8
|
||||
|
||||
Fixes: e2013b212f9f ("net/mlx5_core: Add RQ and SQ event handling")
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Link: https://patch.msgid.link/3181433ccdd695c63560eeeb3f0c990961732101.1745839855.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
|
||||
index d3dcc272200a..146d03ae40bd 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/qpc.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/qpc.c
|
||||
@@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
|
||||
spin_lock_irqsave(&table->lock, flags);
|
||||
|
||||
common = radix_tree_lookup(&table->tree, rsn);
|
||||
- if (common)
|
||||
+ if (common && !common->invalid)
|
||||
refcount_inc(&common->refcount);
|
||||
+ else
|
||||
+ common = NULL;
|
||||
|
||||
spin_unlock_irqrestore(&table->lock, flags);
|
||||
|
||||
@@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
|
||||
+ struct mlx5_core_qp *qp,
|
||||
+ bool invalid)
|
||||
+{
|
||||
+ struct mlx5_qp_table *table = &dev->qp_table;
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ spin_lock_irqsave(&table->lock, flags);
|
||||
+ qp->common.invalid = invalid;
|
||||
+ spin_unlock_irqrestore(&table->lock, flags);
|
||||
+}
|
||||
+
|
||||
static void destroy_resource_common(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_core_qp *qp)
|
||||
{
|
||||
@@ -609,8 +623,20 @@ int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
|
||||
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_core_qp *rq)
|
||||
{
|
||||
+ int ret;
|
||||
+
|
||||
+ /* The rq destruction can be called again in case it fails, hence we
|
||||
+ * mark the common resource as invalid and only once FW destruction
|
||||
+ * is completed successfully we actually destroy the resources.
|
||||
+ */
|
||||
+ modify_resource_common_state(dev, rq, true);
|
||||
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
|
||||
+ if (ret) {
|
||||
+ modify_resource_common_state(dev, rq, false);
|
||||
+ return ret;
|
||||
+ }
|
||||
destroy_resource_common(dev, rq);
|
||||
- return destroy_rq_tracked(dev, rq->qpn, rq->uid);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
|
||||
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
|
||||
index 04705078dfab..df76aece6be9 100644
|
||||
--- a/include/linux/mlx5/driver.h
|
||||
+++ b/include/linux/mlx5/driver.h
|
||||
@@ -398,6 +398,7 @@ struct mlx5_core_rsc_common {
|
||||
enum mlx5_res_type res;
|
||||
refcount_t refcount;
|
||||
struct completion free;
|
||||
+ bool invalid;
|
||||
};
|
||||
|
||||
struct mlx5_uars_page {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
78
SOURCES/1364-net-mlx5-support-software-tx-timestamp.patch
Normal file
78
SOURCES/1364-net-mlx5-support-software-tx-timestamp.patch
Normal file
@ -0,0 +1,78 @@
|
||||
From caaa5c0c5b3a539eefe31c4fe578b881ba6512bf Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:03 -0400
|
||||
Subject: [PATCH] net/mlx5: support software TX timestamp
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 2451d3fb388f29d87d1abd3d2952d5ce36109816
|
||||
Author: Stanislav Fomichev <sdf@fomichev.me>
|
||||
Date: Thu May 8 16:51:09 2025 -0700
|
||||
|
||||
net/mlx5: support software TX timestamp
|
||||
|
||||
Having a software timestamp (along with existing hardware one) is
|
||||
useful to trace how the packets flow through the stack.
|
||||
mlx5e_tx_skb_update_hwts_flags is called from tx paths
|
||||
to setup HW timestamp; extend it to add software one as well.
|
||||
|
||||
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
|
||||
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
|
||||
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
|
||||
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250508235109.585096-1-stfomichev@gmail.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
index 8578f03783bc..e6c9338ddae8 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
@@ -1686,6 +1686,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
|
||||
return 0;
|
||||
|
||||
info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
|
||||
+ SOF_TIMESTAMPING_TX_SOFTWARE |
|
||||
SOF_TIMESTAMPING_RX_HARDWARE |
|
||||
SOF_TIMESTAMPING_RAW_HARDWARE;
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
|
||||
index 4fd853d19e31..55a8629f0792 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
|
||||
@@ -337,10 +337,11 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
|
||||
};
|
||||
}
|
||||
|
||||
-static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
|
||||
+static void mlx5e_tx_skb_update_ts_flags(struct sk_buff *skb)
|
||||
{
|
||||
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
|
||||
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
|
||||
+ skb_tx_timestamp(skb);
|
||||
}
|
||||
|
||||
static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
|
||||
@@ -392,7 +393,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
|
||||
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
|
||||
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
|
||||
|
||||
- mlx5e_tx_skb_update_hwts_flags(skb);
|
||||
+ mlx5e_tx_skb_update_ts_flags(skb);
|
||||
|
||||
sq->pc += wi->num_wqebbs;
|
||||
|
||||
@@ -625,7 +626,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
|
||||
mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
|
||||
mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
|
||||
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
|
||||
- mlx5e_tx_skb_update_hwts_flags(skb);
|
||||
+ mlx5e_tx_skb_update_ts_flags(skb);
|
||||
|
||||
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
|
||||
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
From 61618987cfeb590d9694abd9fbcdb68f8845d29b Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:03 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, expose function mlx5hws_table_ft_set_next_ft
|
||||
in header
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit d2338a27fcee9158d0378d759152b8e0a5933c88
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:01 2025 +0300
|
||||
|
||||
net/mlx5: HWS, expose function mlx5hws_table_ft_set_next_ft in header
|
||||
|
||||
In preparation for complex matcher support, make function
|
||||
mlx5hws_table_ft_set_next_ft() non-static and expose it in header.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-2-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
|
||||
index ab1297531232..568f691733f3 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
|
||||
@@ -342,10 +342,10 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx,
|
||||
return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id);
|
||||
}
|
||||
|
||||
-static int hws_table_ft_set_next_ft(struct mlx5hws_context *ctx,
|
||||
- u32 ft_id,
|
||||
- u32 fw_ft_type,
|
||||
- u32 next_ft_id)
|
||||
+int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx,
|
||||
+ u32 ft_id,
|
||||
+ u32 fw_ft_type,
|
||||
+ u32 next_ft_id)
|
||||
{
|
||||
struct mlx5hws_cmd_ft_modify_attr ft_attr = {0};
|
||||
|
||||
@@ -389,10 +389,10 @@ int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl,
|
||||
if (dst_tbl) {
|
||||
if (list_empty(&dst_tbl->matchers_list)) {
|
||||
/* Connect src_tbl last_ft to dst_tbl start anchor */
|
||||
- ret = hws_table_ft_set_next_ft(src_tbl->ctx,
|
||||
- last_ft_id,
|
||||
- src_tbl->fw_ft_type,
|
||||
- dst_tbl->ft_id);
|
||||
+ ret = mlx5hws_table_ft_set_next_ft(src_tbl->ctx,
|
||||
+ last_ft_id,
|
||||
+ src_tbl->fw_ft_type,
|
||||
+ dst_tbl->ft_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h
|
||||
index dd50420eec9e..0400cce0c317 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h
|
||||
@@ -65,4 +65,9 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx,
|
||||
u32 rtc_0_id,
|
||||
u32 rtc_1_id);
|
||||
|
||||
+int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx,
|
||||
+ u32 ft_id,
|
||||
+ u32 fw_ft_type,
|
||||
+ u32 next_ft_id);
|
||||
+
|
||||
#endif /* MLX5HWS_TABLE_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,263 @@
|
||||
From f8db8d6e3362f5fac65193f3ece0fffb4ad20588 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:03 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, add definer function to get field name str
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit fed5f4831281593a4bda2f8ef6912fdbcad6e670
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:02 2025 +0300
|
||||
|
||||
net/mlx5: HWS, add definer function to get field name str
|
||||
|
||||
In preparation for complex matcher support, add function for
|
||||
converting definer fname to str, which will be used in following
|
||||
patches.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-3-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
index 1061a46811ac..5cc0dc002ac1 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
@@ -158,6 +158,218 @@ struct mlx5hws_definer_conv_data {
|
||||
u32 match_flags;
|
||||
};
|
||||
|
||||
+#define HWS_DEFINER_ENTRY(name)[MLX5HWS_DEFINER_FNAME_##name] = #name
|
||||
+
|
||||
+static const char * const hws_definer_fname_to_str[] = {
|
||||
+ HWS_DEFINER_ENTRY(ETH_SMAC_47_16_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_SMAC_47_16_I),
|
||||
+ HWS_DEFINER_ENTRY(ETH_SMAC_15_0_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_SMAC_15_0_I),
|
||||
+ HWS_DEFINER_ENTRY(ETH_DMAC_47_16_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_DMAC_47_16_I),
|
||||
+ HWS_DEFINER_ENTRY(ETH_DMAC_15_0_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_DMAC_15_0_I),
|
||||
+ HWS_DEFINER_ENTRY(ETH_TYPE_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_TYPE_I),
|
||||
+ HWS_DEFINER_ENTRY(ETH_L3_TYPE_O),
|
||||
+ HWS_DEFINER_ENTRY(ETH_L3_TYPE_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_TYPE_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_TYPE_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_CFI_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_CFI_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_ID_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_ID_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_I),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_ID_O),
|
||||
+ HWS_DEFINER_ENTRY(VLAN_SECOND_ID_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_IHL_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_IHL_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_DSCP_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_DSCP_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_ECN_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_ECN_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_TTL_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_TTL_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_DST_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_DST_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_SRC_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV4_SRC_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_VERSION_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_VERSION_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_FRAG_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_FRAG_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_LEN_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_LEN_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_TOS_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_TOS_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_127_96_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_95_64_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_63_32_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_31_0_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_127_96_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_95_64_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_63_32_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_DST_31_0_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_127_96_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_95_64_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_63_32_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_31_0_O),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_127_96_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_95_64_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_63_32_I),
|
||||
+ HWS_DEFINER_ENTRY(IPV6_SRC_31_0_I),
|
||||
+ HWS_DEFINER_ENTRY(IP_PROTOCOL_O),
|
||||
+ HWS_DEFINER_ENTRY(IP_PROTOCOL_I),
|
||||
+ HWS_DEFINER_ENTRY(L4_SPORT_O),
|
||||
+ HWS_DEFINER_ENTRY(L4_SPORT_I),
|
||||
+ HWS_DEFINER_ENTRY(L4_DPORT_O),
|
||||
+ HWS_DEFINER_ENTRY(L4_DPORT_I),
|
||||
+ HWS_DEFINER_ENTRY(TCP_FLAGS_I),
|
||||
+ HWS_DEFINER_ENTRY(TCP_FLAGS_O),
|
||||
+ HWS_DEFINER_ENTRY(TCP_SEQ_NUM),
|
||||
+ HWS_DEFINER_ENTRY(TCP_ACK_NUM),
|
||||
+ HWS_DEFINER_ENTRY(GTP_TEID),
|
||||
+ HWS_DEFINER_ENTRY(GTP_MSG_TYPE),
|
||||
+ HWS_DEFINER_ENTRY(GTP_EXT_FLAG),
|
||||
+ HWS_DEFINER_ENTRY(GTP_NEXT_EXT_HDR),
|
||||
+ HWS_DEFINER_ENTRY(GTP_EXT_HDR_PDU),
|
||||
+ HWS_DEFINER_ENTRY(GTP_EXT_HDR_QFI),
|
||||
+ HWS_DEFINER_ENTRY(GTPU_DW0),
|
||||
+ HWS_DEFINER_ENTRY(GTPU_FIRST_EXT_DW0),
|
||||
+ HWS_DEFINER_ENTRY(GTPU_DW2),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_0),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_1),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_2),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_3),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_4),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_5),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_6),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER_7),
|
||||
+ HWS_DEFINER_ENTRY(VPORT_REG_C_0),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_FLAGS),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_VNI),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_GPE_FLAGS),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD0),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_GPE_PROTO),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_GPE_VNI),
|
||||
+ HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD1),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_LEN),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OAM),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_PROTO),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_VNI),
|
||||
+ HWS_DEFINER_ENTRY(SOURCE_QP),
|
||||
+ HWS_DEFINER_ENTRY(SOURCE_GVMI),
|
||||
+ HWS_DEFINER_ENTRY(REG_0),
|
||||
+ HWS_DEFINER_ENTRY(REG_1),
|
||||
+ HWS_DEFINER_ENTRY(REG_2),
|
||||
+ HWS_DEFINER_ENTRY(REG_3),
|
||||
+ HWS_DEFINER_ENTRY(REG_4),
|
||||
+ HWS_DEFINER_ENTRY(REG_5),
|
||||
+ HWS_DEFINER_ENTRY(REG_6),
|
||||
+ HWS_DEFINER_ENTRY(REG_7),
|
||||
+ HWS_DEFINER_ENTRY(REG_8),
|
||||
+ HWS_DEFINER_ENTRY(REG_9),
|
||||
+ HWS_DEFINER_ENTRY(REG_10),
|
||||
+ HWS_DEFINER_ENTRY(REG_11),
|
||||
+ HWS_DEFINER_ENTRY(REG_A),
|
||||
+ HWS_DEFINER_ENTRY(REG_B),
|
||||
+ HWS_DEFINER_ENTRY(GRE_KEY_PRESENT),
|
||||
+ HWS_DEFINER_ENTRY(GRE_C),
|
||||
+ HWS_DEFINER_ENTRY(GRE_K),
|
||||
+ HWS_DEFINER_ENTRY(GRE_S),
|
||||
+ HWS_DEFINER_ENTRY(GRE_PROTOCOL),
|
||||
+ HWS_DEFINER_ENTRY(GRE_OPT_KEY),
|
||||
+ HWS_DEFINER_ENTRY(GRE_OPT_SEQ),
|
||||
+ HWS_DEFINER_ENTRY(GRE_OPT_CHECKSUM),
|
||||
+ HWS_DEFINER_ENTRY(INTEGRITY_O),
|
||||
+ HWS_DEFINER_ENTRY(INTEGRITY_I),
|
||||
+ HWS_DEFINER_ENTRY(ICMP_DW1),
|
||||
+ HWS_DEFINER_ENTRY(ICMP_DW2),
|
||||
+ HWS_DEFINER_ENTRY(ICMP_DW3),
|
||||
+ HWS_DEFINER_ENTRY(IPSEC_SPI),
|
||||
+ HWS_DEFINER_ENTRY(IPSEC_SEQUENCE_NUMBER),
|
||||
+ HWS_DEFINER_ENTRY(IPSEC_SYNDROME),
|
||||
+ HWS_DEFINER_ENTRY(MPLS0_O),
|
||||
+ HWS_DEFINER_ENTRY(MPLS1_O),
|
||||
+ HWS_DEFINER_ENTRY(MPLS2_O),
|
||||
+ HWS_DEFINER_ENTRY(MPLS3_O),
|
||||
+ HWS_DEFINER_ENTRY(MPLS4_O),
|
||||
+ HWS_DEFINER_ENTRY(MPLS0_I),
|
||||
+ HWS_DEFINER_ENTRY(MPLS1_I),
|
||||
+ HWS_DEFINER_ENTRY(MPLS2_I),
|
||||
+ HWS_DEFINER_ENTRY(MPLS3_I),
|
||||
+ HWS_DEFINER_ENTRY(MPLS4_I),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER0_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER1_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER2_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER3_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER4_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER5_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER6_OK),
|
||||
+ HWS_DEFINER_ENTRY(FLEX_PARSER7_OK),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS0_O),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS1_O),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS2_O),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS3_O),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS4_O),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS0_I),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS1_I),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS2_I),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS3_I),
|
||||
+ HWS_DEFINER_ENTRY(OKS2_MPLS4_I),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_0),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_1),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_2),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_3),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_4),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_5),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_6),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_OK_7),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_0),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_1),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_2),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_3),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_4),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_5),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_6),
|
||||
+ HWS_DEFINER_ENTRY(GENEVE_OPT_DW_7),
|
||||
+ HWS_DEFINER_ENTRY(IB_L4_OPCODE),
|
||||
+ HWS_DEFINER_ENTRY(IB_L4_QPN),
|
||||
+ HWS_DEFINER_ENTRY(IB_L4_A),
|
||||
+ HWS_DEFINER_ENTRY(RANDOM_NUM),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L2_O),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L2_I),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L3_O),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L3_I),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L4_O),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L4_I),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L4_EXT_O),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_L4_EXT_I),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_FRAG_O),
|
||||
+ HWS_DEFINER_ENTRY(PTYPE_FRAG_I),
|
||||
+ HWS_DEFINER_ENTRY(TNL_HDR_0),
|
||||
+ HWS_DEFINER_ENTRY(TNL_HDR_1),
|
||||
+ HWS_DEFINER_ENTRY(TNL_HDR_2),
|
||||
+ HWS_DEFINER_ENTRY(TNL_HDR_3),
|
||||
+ [MLX5HWS_DEFINER_FNAME_MAX] = "DEFINER_FNAME_UNKNOWN",
|
||||
+};
|
||||
+
|
||||
+const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname)
|
||||
+{
|
||||
+ if (fname > MLX5HWS_DEFINER_FNAME_MAX)
|
||||
+ fname = MLX5HWS_DEFINER_FNAME_MAX;
|
||||
+ return hws_definer_fname_to_str[fname];
|
||||
+}
|
||||
+
|
||||
static void
|
||||
hws_definer_ones_set(struct mlx5hws_definer_fc *fc,
|
||||
void *match_param,
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
|
||||
index 5c1a2086efba..62da55389331 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
|
||||
@@ -831,4 +831,6 @@ mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx,
|
||||
u32 *match_param,
|
||||
int *fc_sz);
|
||||
|
||||
+const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname);
|
||||
+
|
||||
#endif /* HWS_DEFINER_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,120 @@
|
||||
From 7287dfe961f39c52d8e7fbc0a719036457df2b41 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:04 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, expose polling function in header file
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 3c739d1624e3c3186a0a0248e91851a085f6e45b
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:03 2025 +0300
|
||||
|
||||
net/mlx5: HWS, expose polling function in header file
|
||||
|
||||
In preparation for complex matcher, expose the function that is
|
||||
polling queue for completion (mlx5hws_bwc_queue_poll) in header
|
||||
file, so that it will be used by complex matcher code.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-4-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index 510bfbbe5991..27b6420678d8 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -223,10 +223,10 @@ int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
|
||||
- u16 queue_id,
|
||||
- u32 *pending_rules,
|
||||
- bool drain)
|
||||
+int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx,
|
||||
+ u16 queue_id,
|
||||
+ u32 *pending_rules,
|
||||
+ bool drain)
|
||||
{
|
||||
unsigned long timeout = jiffies +
|
||||
secs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT);
|
||||
@@ -361,7 +361,8 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
- ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id,
|
||||
+ &expected_completions, true);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
@@ -442,9 +443,8 @@ hws_bwc_rule_create_sync(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
- ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
|
||||
-
|
||||
- return ret;
|
||||
+ return mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id,
|
||||
+ &expected_completions, true);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -465,7 +465,8 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
- ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id,
|
||||
+ &expected_completions, true);
|
||||
if (unlikely(ret))
|
||||
mlx5hws_err(ctx, "Failed updating BWC rule (%d)\n", ret);
|
||||
|
||||
@@ -651,8 +652,10 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match
|
||||
&bwc_matcher->rules[i]) ?
|
||||
NULL : list_next_entry(bwc_rules[i], list_node);
|
||||
|
||||
- ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id,
|
||||
- &pending_rules[i], false);
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx,
|
||||
+ rule_attr.queue_id,
|
||||
+ &pending_rules[i],
|
||||
+ false);
|
||||
if (unlikely(ret)) {
|
||||
mlx5hws_err(ctx,
|
||||
"Moving BWC rule failed during rehash (%d)\n",
|
||||
@@ -669,8 +672,8 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match
|
||||
u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
|
||||
|
||||
mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]);
|
||||
- ret = hws_bwc_queue_poll(ctx, queue_id,
|
||||
- &pending_rules[i], true);
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx, queue_id,
|
||||
+ &pending_rules[i], true);
|
||||
if (unlikely(ret)) {
|
||||
mlx5hws_err(ctx,
|
||||
"Moving BWC rule failed during rehash (%d)\n", ret);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
index bb0cf4b922ce..a2aa2d5da694 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
@@ -64,6 +64,11 @@ void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher,
|
||||
u32 flow_source,
|
||||
struct mlx5hws_rule_attr *rule_attr);
|
||||
|
||||
+int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx,
|
||||
+ u16 queue_id,
|
||||
+ u32 *pending_rules,
|
||||
+ bool drain);
|
||||
+
|
||||
static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx)
|
||||
{
|
||||
/* Besides the control queue, half of the queues are
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
414
SOURCES/1368-net-mlx5-hws-introduce-isolated-matchers.patch
Normal file
414
SOURCES/1368-net-mlx5-hws-introduce-isolated-matchers.patch
Normal file
@ -0,0 +1,414 @@
|
||||
From e52f06951f1709e7bd3b78b3c4932d5fc69d10eb Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:04 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, introduce isolated matchers
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b816743a182f532faaeaa9aaed147ff09513e375
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:04 2025 +0300
|
||||
|
||||
net/mlx5: HWS, introduce isolated matchers
|
||||
|
||||
In preparation for complex matcher support, introduce the isolated
|
||||
matcher.
|
||||
|
||||
Isolated matcher is a matcher that has its own isolated table.
|
||||
It is used as the second half of the complex matcher: when the rule
|
||||
is split into two parts (complex rule), then matching on the first
|
||||
part will send the packet to the isolated matcher that will try to
|
||||
match on the second part. In case of miss, the packet goes back to
|
||||
the matcher's end flow table.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-5-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
index 5b0c1623499b..ce28ee1c0e41 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
|
||||
@@ -23,19 +23,199 @@ static void hws_matcher_destroy_end_ft(struct mlx5hws_matcher *matcher)
|
||||
mlx5hws_table_destroy_default_ft(matcher->tbl, matcher->end_ft_id);
|
||||
}
|
||||
|
||||
+int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl,
|
||||
+ u32 miss_ft_id)
|
||||
+{
|
||||
+ struct mlx5hws_matcher *tmp_matcher;
|
||||
+
|
||||
+ if (list_empty(&tbl->matchers_list))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ /* Update isolated_matcher_end_ft_id attribute for all
|
||||
+ * the matchers in isolated table.
|
||||
+ */
|
||||
+ list_for_each_entry(tmp_matcher, &tbl->matchers_list, list_node)
|
||||
+ tmp_matcher->attr.isolated_matcher_end_ft_id = miss_ft_id;
|
||||
+
|
||||
+ tmp_matcher = list_last_entry(&tbl->matchers_list,
|
||||
+ struct mlx5hws_matcher,
|
||||
+ list_node);
|
||||
+
|
||||
+ return mlx5hws_table_ft_set_next_ft(tbl->ctx,
|
||||
+ tmp_matcher->end_ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ miss_ft_id);
|
||||
+}
|
||||
+
|
||||
+static int hws_matcher_connect_end_ft_isolated(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ struct mlx5hws_table *tbl = matcher->tbl;
|
||||
+ u32 end_ft_id;
|
||||
+ int ret;
|
||||
+
|
||||
+ /* Reset end_ft next RTCs */
|
||||
+ ret = mlx5hws_table_ft_set_next_rtc(tbl->ctx,
|
||||
+ matcher->end_ft_id,
|
||||
+ matcher->tbl->fw_ft_type,
|
||||
+ 0, 0);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ /* Connect isolated matcher's end_ft to the complex matcher's end FT */
|
||||
+ end_ft_id = matcher->attr.isolated_matcher_end_ft_id;
|
||||
+ ret = mlx5hws_table_ft_set_next_ft(tbl->ctx,
|
||||
+ matcher->end_ft_id,
|
||||
+ matcher->tbl->fw_ft_type,
|
||||
+ end_ft_id);
|
||||
+
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ struct mlx5hws_table *tbl = matcher->tbl;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev,
|
||||
+ tbl,
|
||||
+ &matcher->end_ft_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ ret = hws_matcher_connect_end_ft_isolated(matcher);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to connect end FT\n");
|
||||
+ goto destroy_default_ft;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+destroy_default_ft:
|
||||
+ mlx5hws_table_destroy_default_ft(tbl, matcher->end_ft_id);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
int ret;
|
||||
|
||||
- ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &matcher->end_ft_id);
|
||||
+ if (mlx5hws_matcher_is_isolated(matcher))
|
||||
+ ret = hws_matcher_create_end_ft_isolated(matcher);
|
||||
+ else
|
||||
+ ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl,
|
||||
+ &matcher->end_ft_id);
|
||||
+
|
||||
if (ret) {
|
||||
mlx5hws_err(tbl->ctx, "Failed to create matcher end flow table\n");
|
||||
return ret;
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int hws_matcher_connect_isolated_first(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ struct mlx5hws_table *tbl = matcher->tbl;
|
||||
+ struct mlx5hws_context *ctx = tbl->ctx;
|
||||
+ int ret;
|
||||
+
|
||||
+ /* Isolated matcher's end_ft is already pointing to the end_ft
|
||||
+ * of the complex matcher - it was set at creation of end_ft,
|
||||
+ * so no need to connect it.
|
||||
+ * We still need to connect the isolated table's start FT to
|
||||
+ * this matcher's RTC.
|
||||
+ */
|
||||
+ ret = mlx5hws_table_ft_set_next_rtc(ctx,
|
||||
+ tbl->ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ matcher->match_ste.rtc_0_id,
|
||||
+ matcher->match_ste.rtc_1_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(ctx, "Isolated matcher: failed to connect start FT to match RTC\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ /* Reset table's FT default miss (drop refcount) */
|
||||
+ ret = mlx5hws_table_ft_set_default_next_ft(tbl, tbl->ft_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(ctx, "Isolated matcher: failed to reset table ft default miss\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ list_add(&matcher->list_node, &tbl->matchers_list);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static int hws_matcher_connect_isolated_last(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ struct mlx5hws_table *tbl = matcher->tbl;
|
||||
+ struct mlx5hws_context *ctx = tbl->ctx;
|
||||
+ struct mlx5hws_matcher *last;
|
||||
+ int ret;
|
||||
+
|
||||
+ last = list_last_entry(&tbl->matchers_list,
|
||||
+ struct mlx5hws_matcher,
|
||||
+ list_node);
|
||||
+
|
||||
+ /* New matcher's end_ft is already pointing to the end_ft of
|
||||
+ * the complex matcher.
|
||||
+ * Connect previous matcher's end_ft to this new matcher RTC.
|
||||
+ */
|
||||
+ ret = mlx5hws_table_ft_set_next_rtc(ctx,
|
||||
+ last->end_ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ matcher->match_ste.rtc_0_id,
|
||||
+ matcher->match_ste.rtc_1_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(ctx,
|
||||
+ "Isolated matcher: failed to connect matcher end_ft to new match RTC\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ /* Reset prev matcher FT default miss (drop refcount) */
|
||||
+ ret = mlx5hws_table_ft_set_default_next_ft(tbl, last->end_ft_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(ctx, "Isolated matcher: failed to reset matcher ft default miss\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ /* Insert after the last matcher */
|
||||
+ list_add(&matcher->list_node, &last->list_node);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int hws_matcher_connect_isolated(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ /* Isolated matcher is expected to be the only one in its table.
|
||||
+ * However, it can have a collision matcher, and it can go through
|
||||
+ * rehash process, in which case we will temporary have both old and
|
||||
+ * new matchers in the isolated table.
|
||||
+ * Check if this is the first matcher in the isolated table.
|
||||
+ */
|
||||
+ if (list_empty(&matcher->tbl->matchers_list))
|
||||
+ return hws_matcher_connect_isolated_first(matcher);
|
||||
+
|
||||
+ /* If this wasn't the first matcher, then we have 3 possible cases:
|
||||
+ * - this is a collision matcher for the first matcher
|
||||
+ * - this is a new rehash dest matcher
|
||||
+ * - this is a collision matcher for the new rehash dest matcher
|
||||
+ * The logic to add new matcher is the same for all these cases.
|
||||
+ */
|
||||
+ return hws_matcher_connect_isolated_last(matcher);
|
||||
+}
|
||||
+
|
||||
static int hws_matcher_connect(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
struct mlx5hws_table *tbl = matcher->tbl;
|
||||
@@ -45,6 +225,9 @@ static int hws_matcher_connect(struct mlx5hws_matcher *matcher)
|
||||
struct mlx5hws_matcher *tmp_matcher;
|
||||
int ret;
|
||||
|
||||
+ if (mlx5hws_matcher_is_isolated(matcher))
|
||||
+ return hws_matcher_connect_isolated(matcher);
|
||||
+
|
||||
/* Find location in matcher list */
|
||||
if (list_empty(&tbl->matchers_list)) {
|
||||
list_add(&matcher->list_node, &tbl->matchers_list);
|
||||
@@ -121,6 +304,92 @@ static int hws_matcher_connect(struct mlx5hws_matcher *matcher)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int hws_matcher_disconnect_isolated(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ struct mlx5hws_matcher *first, *last, *prev, *next;
|
||||
+ struct mlx5hws_table *tbl = matcher->tbl;
|
||||
+ struct mlx5hws_context *ctx = tbl->ctx;
|
||||
+ u32 end_ft_id;
|
||||
+ int ret;
|
||||
+
|
||||
+ first = list_first_entry(&tbl->matchers_list,
|
||||
+ struct mlx5hws_matcher,
|
||||
+ list_node);
|
||||
+ last = list_last_entry(&tbl->matchers_list,
|
||||
+ struct mlx5hws_matcher,
|
||||
+ list_node);
|
||||
+ prev = list_prev_entry(matcher, list_node);
|
||||
+ next = list_next_entry(matcher, list_node);
|
||||
+
|
||||
+ list_del_init(&matcher->list_node);
|
||||
+
|
||||
+ if (first == last) {
|
||||
+ /* This was the only matcher in the list.
|
||||
+ * Reset isolated table FT next RTCs and connect it
|
||||
+ * to the whole complex matcher end FT instead.
|
||||
+ */
|
||||
+ ret = mlx5hws_table_ft_set_next_rtc(ctx,
|
||||
+ tbl->ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ 0, 0);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ end_ft_id = matcher->attr.isolated_matcher_end_ft_id;
|
||||
+ ret = mlx5hws_table_ft_set_next_ft(tbl->ctx,
|
||||
+ tbl->ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ end_ft_id);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n");
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ /* At this point we know that there are more matchers in the list */
|
||||
+
|
||||
+ if (matcher == first) {
|
||||
+ /* We've disconnected the first matcher.
|
||||
+ * Now update isolated table default FT.
|
||||
+ */
|
||||
+ if (!next)
|
||||
+ return -EINVAL;
|
||||
+ return mlx5hws_table_ft_set_next_rtc(ctx,
|
||||
+ tbl->ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ next->match_ste.rtc_0_id,
|
||||
+ next->match_ste.rtc_1_id);
|
||||
+ }
|
||||
+
|
||||
+ if (matcher == last) {
|
||||
+ /* If we've disconnected the last matcher - update prev
|
||||
+ * matcher's end_ft to point to the complex matcher end_ft.
|
||||
+ */
|
||||
+ if (!prev)
|
||||
+ return -EINVAL;
|
||||
+ return hws_matcher_connect_end_ft_isolated(prev);
|
||||
+ }
|
||||
+
|
||||
+ /* This wasn't the first or the last matcher, which means that it has
|
||||
+ * both prev and next matchers. Note that this only happens if we're
|
||||
+ * disconnecting collision matcher of the old matcher during rehash.
|
||||
+ */
|
||||
+ if (!prev || !next ||
|
||||
+ !(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ /* Update prev end FT to point to next match RTC */
|
||||
+ return mlx5hws_table_ft_set_next_rtc(ctx,
|
||||
+ prev->end_ft_id,
|
||||
+ tbl->fw_ft_type,
|
||||
+ next->match_ste.rtc_0_id,
|
||||
+ next->match_ste.rtc_1_id);
|
||||
+}
|
||||
+
|
||||
static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
struct mlx5hws_matcher *next = NULL, *prev = NULL;
|
||||
@@ -128,6 +397,9 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
|
||||
u32 prev_ft_id = tbl->ft_id;
|
||||
int ret;
|
||||
|
||||
+ if (mlx5hws_matcher_is_isolated(matcher))
|
||||
+ return hws_matcher_disconnect_isolated(matcher);
|
||||
+
|
||||
if (!list_is_first(&matcher->list_node, &tbl->matchers_list)) {
|
||||
prev = list_prev_entry(matcher, list_node);
|
||||
prev_ft_id = prev->end_ft_id;
|
||||
@@ -531,6 +803,8 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps,
|
||||
attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log);
|
||||
|
||||
matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0;
|
||||
+ matcher->flags |= attr->isolated_matcher_end_ft_id ?
|
||||
+ MLX5HWS_MATCHER_FLAGS_ISOLATED : 0;
|
||||
|
||||
return hws_matcher_check_attr_sz(caps, matcher);
|
||||
}
|
||||
@@ -617,6 +891,8 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher)
|
||||
col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO;
|
||||
|
||||
col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach;
|
||||
+ col_matcher->attr.isolated_matcher_end_ft_id =
|
||||
+ matcher->attr.isolated_matcher_end_ft_id;
|
||||
|
||||
ret = hws_matcher_process_attr(ctx->caps, col_matcher);
|
||||
if (ret)
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
index 8e95158a66b5..32e83cddcd60 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
|
||||
@@ -34,6 +34,7 @@ enum mlx5hws_matcher_offset {
|
||||
enum mlx5hws_matcher_flags {
|
||||
MLX5HWS_MATCHER_FLAGS_COLLISION = 1 << 2,
|
||||
MLX5HWS_MATCHER_FLAGS_RESIZABLE = 1 << 3,
|
||||
+ MLX5HWS_MATCHER_FLAGS_ISOLATED = 1 << 4,
|
||||
};
|
||||
|
||||
struct mlx5hws_match_template {
|
||||
@@ -96,9 +97,17 @@ static inline bool mlx5hws_matcher_is_in_resize(struct mlx5hws_matcher *matcher)
|
||||
return !!matcher->resize_dst;
|
||||
}
|
||||
|
||||
+static inline bool mlx5hws_matcher_is_isolated(struct mlx5hws_matcher *matcher)
|
||||
+{
|
||||
+ return !!(matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED);
|
||||
+}
|
||||
+
|
||||
static inline bool mlx5hws_matcher_is_insert_by_idx(struct mlx5hws_matcher *matcher)
|
||||
{
|
||||
return matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX;
|
||||
}
|
||||
|
||||
+int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl,
|
||||
+ u32 miss_ft_id);
|
||||
+
|
||||
#endif /* HWS_MATCHER_H_ */
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
index 5121951f2778..fbd63369da10 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
@@ -119,6 +119,8 @@ struct mlx5hws_matcher_attr {
|
||||
};
|
||||
/* Optional AT attach configuration - Max number of additional AT */
|
||||
u8 max_num_of_at_attach;
|
||||
+ /* Optional end FT (miss FT ID) for match RTC (for isolated matcher) */
|
||||
+ u32 isolated_matcher_end_ft_id;
|
||||
};
|
||||
|
||||
struct mlx5hws_rule_attr {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
1740
SOURCES/1369-net-mlx5-hws-support-complex-matchers.patch
Normal file
1740
SOURCES/1369-net-mlx5-hws-support-complex-matchers.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,93 @@
|
||||
From bf97c3c319b7e24f5e432eae209ee48da86864e3 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:04 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, force rehash when rule insertion failed
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 9d4024edce1063b616fa8bf7b2363290503cc322
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:06 2025 +0300
|
||||
|
||||
net/mlx5: HWS, force rehash when rule insertion failed
|
||||
|
||||
Rules are inserted into hash table in accordance with their hash index.
|
||||
When a certain number of rules is reached, the table is rehashed:
|
||||
a bigger new table is allocated and all the rules are moved there.
|
||||
But sometimes a new rule can't be inserted into the hash table
|
||||
because its index is full, even though the number of rules in the
|
||||
table is well below the threshold. The hash function is not perfect,
|
||||
so such cases are not rare. When that happens, we want to do the same
|
||||
rehash, in order to increase the table size and lower the probability
|
||||
for such cases.
|
||||
|
||||
This patch fixes the usecase where rule insertion was failing, but
|
||||
rehash couldn't be initiated due to low number of rules: it adds flag
|
||||
that denotes that rehash is required, even if the number of rules in
|
||||
the table is below the rehash threshold.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-7-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index d70db6948dbb..dce2605fc99b 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -169,6 +169,7 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table,
|
||||
return NULL;
|
||||
|
||||
atomic_set(&bwc_matcher->num_of_rules, 0);
|
||||
+ atomic_set(&bwc_matcher->rehash_required, false);
|
||||
|
||||
/* Check if the required match params can be all matched
|
||||
* in single STE, otherwise complex matcher is needed.
|
||||
@@ -769,9 +770,9 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
|
||||
/* It is possible that other rule has already performed rehash.
|
||||
* Need to check again if we really need rehash.
|
||||
- * If the reason for rehash was size, but not any more - skip rehash.
|
||||
*/
|
||||
- if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher,
|
||||
+ if (!atomic_read(&bwc_matcher->rehash_required) &&
|
||||
+ !hws_bwc_matcher_rehash_size_needed(bwc_matcher,
|
||||
atomic_read(&bwc_matcher->num_of_rules)))
|
||||
return 0;
|
||||
|
||||
@@ -782,6 +783,8 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
* - destroy the old matcher
|
||||
*/
|
||||
|
||||
+ atomic_set(&bwc_matcher->rehash_required, false);
|
||||
+
|
||||
ret = hws_bwc_matcher_extend_size(bwc_matcher);
|
||||
if (ret)
|
||||
return ret;
|
||||
@@ -875,6 +878,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
* Try rehash by size and insert rule again - last chance.
|
||||
*/
|
||||
|
||||
+ atomic_set(&bwc_matcher->rehash_required, true);
|
||||
mutex_unlock(queue_lock);
|
||||
|
||||
hws_bwc_lock_all_queues(ctx);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
index cf2b65146317..d21fc247a510 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
|
||||
@@ -30,6 +30,7 @@ struct mlx5hws_bwc_matcher {
|
||||
u8 size_log;
|
||||
u32 priority;
|
||||
atomic_t num_of_rules;
|
||||
+ atomic_t rehash_required;
|
||||
struct list_head *rules;
|
||||
};
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,99 @@
|
||||
From cfa1d3ecb42da569e4f38ae06c426e91a98f92ea Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:05 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, fix counting of rules in the matcher
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 4c56b5cbc323a10ebb6595500fb78fd8a4762efd
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:07 2025 +0300
|
||||
|
||||
net/mlx5: HWS, fix counting of rules in the matcher
|
||||
|
||||
Currently the counter that counts number of rules in a matcher is
|
||||
increased only when rule insertion is completed. In a multi-threaded
|
||||
usecase this can lead to a scenario that many rules can be in process
|
||||
of insertion in the same matcher, while none of them has completed
|
||||
the insertion and the rule counter is not updated. This results in
|
||||
a rule insertion failure for many of them at first attempt, which
|
||||
leads to all of them requiring rehash and requiring locking of all
|
||||
the queue locks.
|
||||
|
||||
This patch fixes the case by increasing the rule counter in the
|
||||
beginning of insertion process and decreasing in case of any failure.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-8-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index dce2605fc99b..7d991a61eeb3 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -341,16 +341,12 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx)
|
||||
{
|
||||
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
|
||||
|
||||
- atomic_inc(&bwc_matcher->num_of_rules);
|
||||
bwc_rule->bwc_queue_idx = idx;
|
||||
list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]);
|
||||
}
|
||||
|
||||
static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule)
|
||||
{
|
||||
- struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
|
||||
-
|
||||
- atomic_dec(&bwc_matcher->num_of_rules);
|
||||
list_del_init(&bwc_rule->list_node);
|
||||
}
|
||||
|
||||
@@ -404,6 +400,7 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule)
|
||||
mutex_lock(queue_lock);
|
||||
|
||||
ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr);
|
||||
+ atomic_dec(&bwc_matcher->num_of_rules);
|
||||
hws_bwc_rule_list_remove(bwc_rule);
|
||||
|
||||
mutex_unlock(queue_lock);
|
||||
@@ -840,7 +837,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
}
|
||||
|
||||
/* check if number of rules require rehash */
|
||||
- num_of_rules = atomic_read(&bwc_matcher->num_of_rules);
|
||||
+ num_of_rules = atomic_inc_return(&bwc_matcher->num_of_rules);
|
||||
|
||||
if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) {
|
||||
mutex_unlock(queue_lock);
|
||||
@@ -854,6 +851,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP,
|
||||
bwc_matcher->size_log,
|
||||
ret);
|
||||
+ atomic_dec(&bwc_matcher->num_of_rules);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -887,6 +885,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
|
||||
if (ret) {
|
||||
mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret);
|
||||
+ atomic_dec(&bwc_matcher->num_of_rules);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -902,6 +901,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
if (unlikely(ret)) {
|
||||
mutex_unlock(queue_lock);
|
||||
mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret);
|
||||
+ atomic_dec(&bwc_matcher->num_of_rules);
|
||||
return ret;
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,171 @@
|
||||
From cb05a4cab576c3226584ec674529506113f984f5 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:05 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, fix redundant extension of action templates
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 041861b40f599311214a52075140db8be29fd27f
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:08 2025 +0300
|
||||
|
||||
net/mlx5: HWS, fix redundant extension of action templates
|
||||
|
||||
When a rule is inserted into a matcher, we search for the suitable
|
||||
action template. If such template is not found, action template array
|
||||
is extended with the new template. However, when several threads are
|
||||
performing this in parallel, there is a race - we can end up with
|
||||
extending the action templates array with the same template.
|
||||
|
||||
This patch is doing the following:
|
||||
- refactor the code to find action template index in rule create and
|
||||
update, have the common code in an auxiliary function
|
||||
- after locking all the queues, check again if the action template
|
||||
array still needs to be extended
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-9-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index 7d991a61eeb3..456fac895f5e 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -789,6 +789,53 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
return hws_bwc_matcher_move(bwc_matcher);
|
||||
}
|
||||
|
||||
+static int hws_bwc_rule_get_at_idx(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
+ struct mlx5hws_rule_action rule_actions[],
|
||||
+ u16 bwc_queue_idx)
|
||||
+{
|
||||
+ struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
|
||||
+ struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
|
||||
+ struct mutex *queue_lock; /* Protect the queue */
|
||||
+ int at_idx, ret;
|
||||
+
|
||||
+ /* check if rehash needed due to missing action template */
|
||||
+ at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
|
||||
+ if (likely(at_idx >= 0))
|
||||
+ return at_idx;
|
||||
+
|
||||
+ /* we need to extend BWC matcher action templates array */
|
||||
+ queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx);
|
||||
+ mutex_unlock(queue_lock);
|
||||
+ hws_bwc_lock_all_queues(ctx);
|
||||
+
|
||||
+ /* check again - perhaps other thread already did extend_at */
|
||||
+ at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
|
||||
+ if (at_idx >= 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions);
|
||||
+ if (unlikely(ret)) {
|
||||
+ mlx5hws_err(ctx, "BWC rule: failed extending AT (%d)", ret);
|
||||
+ at_idx = -EINVAL;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /* action templates array was extended, we need the last idx */
|
||||
+ at_idx = bwc_matcher->num_of_at - 1;
|
||||
+ ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
+ bwc_matcher->at[at_idx]);
|
||||
+ if (unlikely(ret)) {
|
||||
+ mlx5hws_err(ctx, "BWC rule: failed attaching new AT (%d)", ret);
|
||||
+ at_idx = -EINVAL;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+out:
|
||||
+ hws_bwc_unlock_all_queues(ctx);
|
||||
+ mutex_lock(queue_lock);
|
||||
+ return at_idx;
|
||||
+}
|
||||
+
|
||||
int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
u32 *match_param,
|
||||
struct mlx5hws_rule_action rule_actions[],
|
||||
@@ -809,31 +856,12 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
|
||||
mutex_lock(queue_lock);
|
||||
|
||||
- /* check if rehash needed due to missing action template */
|
||||
- at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
|
||||
+ at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, bwc_queue_idx);
|
||||
if (unlikely(at_idx < 0)) {
|
||||
- /* we need to extend BWC matcher action templates array */
|
||||
mutex_unlock(queue_lock);
|
||||
- hws_bwc_lock_all_queues(ctx);
|
||||
-
|
||||
- ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions);
|
||||
- if (unlikely(ret)) {
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- /* action templates array was extended, we need the last idx */
|
||||
- at_idx = bwc_matcher->num_of_at - 1;
|
||||
-
|
||||
- ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx]);
|
||||
- if (unlikely(ret)) {
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- return ret;
|
||||
- }
|
||||
-
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- mutex_lock(queue_lock);
|
||||
+ mlx5hws_err(ctx, "BWC rule create: failed getting AT (%d)",
|
||||
+ ret);
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
/* check if number of rules require rehash */
|
||||
@@ -971,36 +999,11 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
|
||||
mutex_lock(queue_lock);
|
||||
|
||||
- /* check if rehash needed due to missing action template */
|
||||
- at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
|
||||
+ at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, idx);
|
||||
if (unlikely(at_idx < 0)) {
|
||||
- /* we need to extend BWC matcher action templates array */
|
||||
mutex_unlock(queue_lock);
|
||||
- hws_bwc_lock_all_queues(ctx);
|
||||
-
|
||||
- /* check again - perhaps other thread already did extend_at */
|
||||
- at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions);
|
||||
- if (likely(at_idx < 0)) {
|
||||
- ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions);
|
||||
- if (unlikely(ret)) {
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- mlx5hws_err(ctx, "BWC rule update: failed extending AT (%d)", ret);
|
||||
- return -EINVAL;
|
||||
- }
|
||||
-
|
||||
- /* action templates array was extended, we need the last idx */
|
||||
- at_idx = bwc_matcher->num_of_at - 1;
|
||||
-
|
||||
- ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
|
||||
- bwc_matcher->at[at_idx]);
|
||||
- if (unlikely(ret)) {
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- return ret;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- hws_bwc_unlock_all_queues(ctx);
|
||||
- mutex_lock(queue_lock);
|
||||
+ mlx5hws_err(ctx, "BWC rule update: failed getting AT\n");
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
ret = hws_bwc_rule_update_sync(bwc_rule,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
209
SOURCES/1373-net-mlx5-hws-rework-rehash-loop.patch
Normal file
209
SOURCES/1373-net-mlx5-hws-rework-rehash-loop.patch
Normal file
@ -0,0 +1,209 @@
|
||||
From a380ec59fea7f1801a94b324bd7f688f2d3be0dd Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:05 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, rework rehash loop
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit ef94799a87415790d4297cf06075f99b70c420cd
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:09 2025 +0300
|
||||
|
||||
net/mlx5: HWS, rework rehash loop
|
||||
|
||||
Reworking the rehash loop - simplifying the code and making it less
|
||||
error prone:
|
||||
- Instead of doing round-robin on all the queues with batch of rules in
|
||||
each cycle, just go over all the queues and move all the rules that
|
||||
belong to this queue.
|
||||
- If at some stage of moving the rule we get a failure (which should
|
||||
not happen), this can't be rolled back. So instead of aborting
|
||||
rehash and leaving the matcher in a broken state, allow the loop
|
||||
to continue: attempt to move the rest of the rules and delete the
|
||||
old matcher. A rule that failed to move to a new matcher will loose
|
||||
its match STE once the rehash is completed and the old matcher is
|
||||
deleted, so the rule won't match any traffic any more. This rule's
|
||||
packets will fall back to the steering pipeline w/o HW offload.
|
||||
Rehash procedure will return an error, which will cause the rule
|
||||
insertion to fail for the rule that started this whole rehash.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-10-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
index 456fac895f5e..9e057f808ea5 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
|
||||
@@ -610,95 +610,69 @@ hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher,
|
||||
|
||||
static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
{
|
||||
+ bool move_error = false, poll_error = false, drain_error = false;
|
||||
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
|
||||
+ struct mlx5hws_matcher *matcher = bwc_matcher->matcher;
|
||||
u16 bwc_queues = mlx5hws_bwc_queues(ctx);
|
||||
- struct mlx5hws_bwc_rule **bwc_rules;
|
||||
struct mlx5hws_rule_attr rule_attr;
|
||||
- u32 *pending_rules;
|
||||
- int i, j, ret = 0;
|
||||
- bool all_done;
|
||||
- u16 burst_th;
|
||||
+ struct mlx5hws_bwc_rule *bwc_rule;
|
||||
+ struct mlx5hws_send_engine *queue;
|
||||
+ struct list_head *rules_list;
|
||||
+ u32 pending_rules;
|
||||
+ int i, ret = 0;
|
||||
|
||||
mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr);
|
||||
|
||||
- pending_rules = kcalloc(bwc_queues, sizeof(*pending_rules), GFP_KERNEL);
|
||||
- if (!pending_rules)
|
||||
- return -ENOMEM;
|
||||
-
|
||||
- bwc_rules = kcalloc(bwc_queues, sizeof(*bwc_rules), GFP_KERNEL);
|
||||
- if (!bwc_rules) {
|
||||
- ret = -ENOMEM;
|
||||
- goto free_pending_rules;
|
||||
- }
|
||||
-
|
||||
for (i = 0; i < bwc_queues; i++) {
|
||||
if (list_empty(&bwc_matcher->rules[i]))
|
||||
- bwc_rules[i] = NULL;
|
||||
- else
|
||||
- bwc_rules[i] = list_first_entry(&bwc_matcher->rules[i],
|
||||
- struct mlx5hws_bwc_rule,
|
||||
- list_node);
|
||||
- }
|
||||
+ continue;
|
||||
|
||||
- do {
|
||||
- all_done = true;
|
||||
+ pending_rules = 0;
|
||||
+ rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
|
||||
+ rules_list = &bwc_matcher->rules[i];
|
||||
|
||||
- for (i = 0; i < bwc_queues; i++) {
|
||||
- rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
|
||||
- burst_th = hws_bwc_get_burst_th(ctx, rule_attr.queue_id);
|
||||
-
|
||||
- for (j = 0; j < burst_th && bwc_rules[i]; j++) {
|
||||
- rule_attr.burst = !!((j + 1) % burst_th);
|
||||
- ret = mlx5hws_matcher_resize_rule_move(bwc_matcher->matcher,
|
||||
- bwc_rules[i]->rule,
|
||||
- &rule_attr);
|
||||
- if (unlikely(ret)) {
|
||||
- mlx5hws_err(ctx,
|
||||
- "Moving BWC rule failed during rehash (%d)\n",
|
||||
- ret);
|
||||
- goto free_bwc_rules;
|
||||
- }
|
||||
+ list_for_each_entry(bwc_rule, rules_list, list_node) {
|
||||
+ ret = mlx5hws_matcher_resize_rule_move(matcher,
|
||||
+ bwc_rule->rule,
|
||||
+ &rule_attr);
|
||||
+ if (unlikely(ret && !move_error)) {
|
||||
+ mlx5hws_err(ctx,
|
||||
+ "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n",
|
||||
+ ret);
|
||||
+ move_error = true;
|
||||
+ }
|
||||
|
||||
- all_done = false;
|
||||
- pending_rules[i]++;
|
||||
- bwc_rules[i] = list_is_last(&bwc_rules[i]->list_node,
|
||||
- &bwc_matcher->rules[i]) ?
|
||||
- NULL : list_next_entry(bwc_rules[i], list_node);
|
||||
-
|
||||
- ret = mlx5hws_bwc_queue_poll(ctx,
|
||||
- rule_attr.queue_id,
|
||||
- &pending_rules[i],
|
||||
- false);
|
||||
- if (unlikely(ret)) {
|
||||
- mlx5hws_err(ctx,
|
||||
- "Moving BWC rule failed during rehash (%d)\n",
|
||||
- ret);
|
||||
- goto free_bwc_rules;
|
||||
- }
|
||||
+ pending_rules++;
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx,
|
||||
+ rule_attr.queue_id,
|
||||
+ &pending_rules,
|
||||
+ false);
|
||||
+ if (unlikely(ret && !poll_error)) {
|
||||
+ mlx5hws_err(ctx,
|
||||
+ "Moving BWC rule: poll failed (%d), attempting to move rest of the rules\n",
|
||||
+ ret);
|
||||
+ poll_error = true;
|
||||
}
|
||||
}
|
||||
- } while (!all_done);
|
||||
-
|
||||
- /* drain all the bwc queues */
|
||||
- for (i = 0; i < bwc_queues; i++) {
|
||||
- if (pending_rules[i]) {
|
||||
- u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i);
|
||||
|
||||
- mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]);
|
||||
- ret = mlx5hws_bwc_queue_poll(ctx, queue_id,
|
||||
- &pending_rules[i], true);
|
||||
- if (unlikely(ret)) {
|
||||
+ if (pending_rules) {
|
||||
+ queue = &ctx->send_queue[rule_attr.queue_id];
|
||||
+ mlx5hws_send_engine_flush_queue(queue);
|
||||
+ ret = mlx5hws_bwc_queue_poll(ctx,
|
||||
+ rule_attr.queue_id,
|
||||
+ &pending_rules,
|
||||
+ true);
|
||||
+ if (unlikely(ret && !drain_error)) {
|
||||
mlx5hws_err(ctx,
|
||||
- "Moving BWC rule failed during rehash (%d)\n", ret);
|
||||
- goto free_bwc_rules;
|
||||
+ "Moving BWC rule: drain failed (%d), attempting to move rest of the rules\n",
|
||||
+ ret);
|
||||
+ drain_error = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
-free_bwc_rules:
|
||||
- kfree(bwc_rules);
|
||||
-free_pending_rules:
|
||||
- kfree(pending_rules);
|
||||
+ if (move_error || poll_error || drain_error)
|
||||
+ ret = -EINVAL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -742,15 +716,18 @@ static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher)
|
||||
}
|
||||
|
||||
ret = hws_bwc_matcher_move_all(bwc_matcher);
|
||||
- if (ret) {
|
||||
- mlx5hws_err(ctx, "Rehash error: moving rules failed\n");
|
||||
- return -ENOMEM;
|
||||
- }
|
||||
+ if (ret)
|
||||
+ mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n");
|
||||
+
|
||||
+ /* Error during rehash can't be rolled back.
|
||||
+ * The best option here is to allow the rehash to complete and remove
|
||||
+ * the old matcher - can't leave the matcher in the 'in_resize' state.
|
||||
+ */
|
||||
|
||||
bwc_matcher->matcher = new_matcher;
|
||||
mlx5hws_matcher_destroy(old_matcher);
|
||||
|
||||
- return 0;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
191
SOURCES/1374-net-mlx5-hws-dump-bad-completion-details.patch
Normal file
191
SOURCES/1374-net-mlx5-hws-dump-bad-completion-details.patch
Normal file
@ -0,0 +1,191 @@
|
||||
From 965036aa649e8bc7524cb8c517e5701242864867 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:05 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, dump bad completion details
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 578b856b5e72b7b8cd2390a0e525e240d3e80c92
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Sun May 11 22:38:10 2025 +0300
|
||||
|
||||
net/mlx5: HWS, dump bad completion details
|
||||
|
||||
Failing to insert/delete a rule should not happen. If it does happen,
|
||||
it would be good to know at which stage it happened and what was the
|
||||
failure. This patch adds printing of bad CQE details.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1746992290-568936-11-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
|
||||
index cb6abc4ab7df..c4b22be19a9b 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
|
||||
@@ -344,18 +344,133 @@ hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue,
|
||||
}
|
||||
}
|
||||
|
||||
+static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue,
|
||||
+ struct mlx5hws_send_ring_priv *priv,
|
||||
+ struct mlx5_cqe64 *cqe)
|
||||
+{
|
||||
+ u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0;
|
||||
+ struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx;
|
||||
+ u32 opcode = cqe ? get_cqe_opcode(cqe) : 0;
|
||||
+ struct mlx5hws_rule *rule = priv->rule;
|
||||
+
|
||||
+ /* If something bad happens and lots of rules are failing, we don't
|
||||
+ * want to pollute dmesg. Print only the first bad cqe per engine,
|
||||
+ * the one that started the avalanche.
|
||||
+ */
|
||||
+ if (queue->error_cqe_printed)
|
||||
+ return;
|
||||
+
|
||||
+ queue->error_cqe_printed = true;
|
||||
+
|
||||
+ if (mlx5hws_rule_move_in_progress(rule))
|
||||
+ mlx5hws_err(ctx,
|
||||
+ "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ rule->resize_info->state ==
|
||||
+ MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" :
|
||||
+ rule->resize_info->state ==
|
||||
+ MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" :
|
||||
+ "UNKNOWN",
|
||||
+ rule->pending_wqes);
|
||||
+ else
|
||||
+ mlx5hws_err(ctx,
|
||||
+ "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ rule->status ==
|
||||
+ MLX5HWS_RULE_STATUS_CREATING ? "CREATING" :
|
||||
+ rule->status ==
|
||||
+ MLX5HWS_RULE_STATUS_DELETING ? "DELETING" :
|
||||
+ rule->status ==
|
||||
+ MLX5HWS_RULE_STATUS_FAILING ? "FAILING" :
|
||||
+ rule->status ==
|
||||
+ MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA",
|
||||
+ rule->status,
|
||||
+ rule->pending_wqes);
|
||||
+
|
||||
+ mlx5hws_err(ctx, " rule 0x%08llx: matcher 0x%llx %s\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ HWS_PTR_TO_ID(rule->matcher),
|
||||
+ (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ?
|
||||
+ "(isolated)" : "");
|
||||
+
|
||||
+ if (!cqe) {
|
||||
+ mlx5hws_err(ctx, " rule 0x%08llx: no CQE\n",
|
||||
+ HWS_PTR_TO_ID(rule));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ mlx5hws_err(ctx, " rule 0x%08llx: cqe->opcode = %d %s\n",
|
||||
+ HWS_PTR_TO_ID(rule), opcode,
|
||||
+ opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" :
|
||||
+ opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " ");
|
||||
+
|
||||
+ if (opcode == MLX5_CQE_REQ_ERR) {
|
||||
+ struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
|
||||
+
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |--- hw_error_syndrome = 0x%x\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ err_cqe->rsvd1[16]);
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |--- hw_syndrome_type = 0x%x\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ err_cqe->rsvd1[17] >> 4);
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |--- vendor_err_synd = 0x%x\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ err_cqe->vendor_err_synd);
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |--- syndrome = 0x%x\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ err_cqe->syndrome);
|
||||
+ }
|
||||
+
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: cqe->byte_cnt = 0x%08x\n",
|
||||
+ HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt));
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |-- UPDATE STATUS = %s\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ?
|
||||
+ "FAILURE" : "SUCCESS");
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |------- SYNDROME = %s\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ?
|
||||
+ "SET_FLOW_FAIL" :
|
||||
+ ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ?
|
||||
+ "DISABLE_FLOW_FAIL" : "UNKNOWN");
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: cqe->sop_drop_qpn = 0x%08x\n",
|
||||
+ HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn));
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |-send wqe opcode = 0x%02x %s\n",
|
||||
+ HWS_PTR_TO_ID(rule), wqe_opcode,
|
||||
+ wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ?
|
||||
+ "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)");
|
||||
+ mlx5hws_err(ctx,
|
||||
+ " rule 0x%08llx: |------------ qpn = 0x%06x\n",
|
||||
+ HWS_PTR_TO_ID(rule),
|
||||
+ be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff);
|
||||
+}
|
||||
+
|
||||
static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
|
||||
struct mlx5hws_send_ring_priv *priv,
|
||||
u16 wqe_cnt,
|
||||
- enum mlx5hws_flow_op_status *status)
|
||||
+ enum mlx5hws_flow_op_status *status,
|
||||
+ struct mlx5_cqe64 *cqe)
|
||||
{
|
||||
priv->rule->pending_wqes--;
|
||||
|
||||
- if (*status == MLX5HWS_FLOW_OP_ERROR) {
|
||||
+ if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) {
|
||||
if (priv->retry_id) {
|
||||
+ /* If there is a retry_id, then it's not an error yet,
|
||||
+ * retry to insert this rule in the collision RTC.
|
||||
+ */
|
||||
hws_send_engine_retry_post_send(queue, priv, wqe_cnt);
|
||||
return;
|
||||
}
|
||||
+ hws_send_engine_dump_error_cqe(queue, priv, cqe);
|
||||
/* Some part of the rule failed */
|
||||
priv->rule->status = MLX5HWS_RULE_STATUS_FAILING;
|
||||
*priv->used_id = 0;
|
||||
@@ -420,7 +535,8 @@ static void hws_send_engine_update(struct mlx5hws_send_engine *queue,
|
||||
|
||||
if (priv->user_data) {
|
||||
if (priv->rule) {
|
||||
- hws_send_engine_update_rule(queue, priv, wqe_cnt, &status);
|
||||
+ hws_send_engine_update_rule(queue, priv, wqe_cnt,
|
||||
+ &status, cqe);
|
||||
/* Completion is provided on the last rule WQE */
|
||||
if (priv->rule->pending_wqes)
|
||||
return;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h
|
||||
index f833092235c1..3fb8e99309b2 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h
|
||||
@@ -140,6 +140,7 @@ struct mlx5hws_send_engine {
|
||||
u16 used_entries;
|
||||
u16 num_entries;
|
||||
bool err;
|
||||
+ bool error_cqe_printed;
|
||||
struct mutex lock; /* Protects the send engine */
|
||||
};
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
40
SOURCES/1375-net-mlx5-use-to-delayed-work.patch
Normal file
40
SOURCES/1375-net-mlx5-use-to-delayed-work.patch
Normal file
@ -0,0 +1,40 @@
|
||||
From 5447a8c66fd43b005c3f33fe8b63145af1ce5893 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:06 -0400
|
||||
Subject: [PATCH] net/mlx5: Use to_delayed_work()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit ee39bae6c141876f5b4c001f6b12b4f8ffb4cd08
|
||||
Author: Chen Ni <nichen@iscas.ac.cn>
|
||||
Date: Wed May 14 15:24:19 2025 +0800
|
||||
|
||||
net/mlx5: Use to_delayed_work()
|
||||
|
||||
Use to_delayed_work() instead of open-coding it.
|
||||
|
||||
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
|
||||
Acked-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250514072419.2707578-1-nichen@iscas.ac.cn
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
index e53dbdc0a7a1..b1aeea7c4a91 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
@@ -927,8 +927,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
|
||||
|
||||
static void cb_timeout_handler(struct work_struct *work)
|
||||
{
|
||||
- struct delayed_work *dwork = container_of(work, struct delayed_work,
|
||||
- work);
|
||||
+ struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct mlx5_cmd_work_ent *ent = container_of(dwork,
|
||||
struct mlx5_cmd_work_ent,
|
||||
cb_timeout_work);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
196
SOURCES/1376-net-mlx5-sws-fix-reformat-id-error-handling.patch
Normal file
196
SOURCES/1376-net-mlx5-sws-fix-reformat-id-error-handling.patch
Normal file
@ -0,0 +1,196 @@
|
||||
From d9f5ece10ab6345c8de1a61e674bcdef45d0ce56 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:06 -0400
|
||||
Subject: [PATCH] net/mlx5: SWS, fix reformat id error handling
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit ca7690dae1269f454572c163ed5271feed060af5
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue May 20 21:46:39 2025 +0300
|
||||
|
||||
net/mlx5: SWS, fix reformat id error handling
|
||||
|
||||
The firmware reformat id is a u32 and can't safely be returned as an
|
||||
int. Because the functions also need a way to signal error, prefer to
|
||||
return the id as an output parameter and keep the return code only for
|
||||
success/error.
|
||||
|
||||
While we're at it, also extract some duplicate code to fetch the
|
||||
reformat id from a more generic struct pkt_reformat.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1747766802-958178-2-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
|
||||
index a47c29571f64..1af76da8b132 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
|
||||
@@ -527,7 +527,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
|
||||
struct mlx5_flow_rule *dst;
|
||||
void *in_flow_context, *vlan;
|
||||
void *in_match_value;
|
||||
- int reformat_id = 0;
|
||||
+ u32 reformat_id = 0;
|
||||
unsigned int inlen;
|
||||
int dst_cnt_size;
|
||||
u32 *in, action;
|
||||
@@ -580,23 +580,21 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
|
||||
MLX5_SET(flow_context, in_flow_context, action, action);
|
||||
|
||||
if (!extended_dest && fte->act_dests.action.pkt_reformat) {
|
||||
- struct mlx5_pkt_reformat *pkt_reformat = fte->act_dests.action.pkt_reformat;
|
||||
-
|
||||
- if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
|
||||
- reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat);
|
||||
- if (reformat_id < 0) {
|
||||
- mlx5_core_err(dev,
|
||||
- "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n",
|
||||
- pkt_reformat->reformat_type);
|
||||
- err = reformat_id;
|
||||
- goto err_out;
|
||||
- }
|
||||
- } else {
|
||||
- reformat_id = fte->act_dests.action.pkt_reformat->id;
|
||||
+ struct mlx5_pkt_reformat *pkt_reformat =
|
||||
+ fte->act_dests.action.pkt_reformat;
|
||||
+
|
||||
+ err = mlx5_fs_get_packet_reformat_id(pkt_reformat,
|
||||
+ &reformat_id);
|
||||
+ if (err) {
|
||||
+ mlx5_core_err(dev,
|
||||
+ "Unsupported pkt_reformat type (%d)\n",
|
||||
+ pkt_reformat->reformat_type);
|
||||
+ goto err_out;
|
||||
}
|
||||
}
|
||||
|
||||
- MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id);
|
||||
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
|
||||
+ reformat_id);
|
||||
|
||||
if (fte->act_dests.action.modify_hdr) {
|
||||
if (fte->act_dests.action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
index a22ecf141518..c7ce9fc797c4 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
@@ -1830,14 +1830,33 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
|
||||
return err;
|
||||
}
|
||||
|
||||
+int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *id)
|
||||
+{
|
||||
+ switch (pkt_reformat->owner) {
|
||||
+ case MLX5_FLOW_RESOURCE_OWNER_FW:
|
||||
+ *id = pkt_reformat->id;
|
||||
+ return 0;
|
||||
+ case MLX5_FLOW_RESOURCE_OWNER_SW:
|
||||
+ return mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat, id);
|
||||
+ default:
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
|
||||
struct mlx5_pkt_reformat *p2)
|
||||
{
|
||||
- return p1->owner == p2->owner &&
|
||||
- (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
|
||||
- p1->id == p2->id :
|
||||
- mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
|
||||
- mlx5_fs_dr_action_get_pkt_reformat_id(p2));
|
||||
+ int err1, err2;
|
||||
+ u32 id1, id2;
|
||||
+
|
||||
+ if (p1->owner != p2->owner)
|
||||
+ return false;
|
||||
+
|
||||
+ err1 = mlx5_fs_get_packet_reformat_id(p1, &id1);
|
||||
+ err2 = mlx5_fs_get_packet_reformat_id(p2, &id2);
|
||||
+
|
||||
+ return !err1 && !err2 && id1 == id2;
|
||||
}
|
||||
|
||||
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
index 1f523fb761f6..a41d3491d2af 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
@@ -387,6 +387,9 @@ u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace
|
||||
|
||||
struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
|
||||
|
||||
+int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *id);
|
||||
+
|
||||
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
|
||||
|
||||
#define fs_list_for_each_entry(pos, root) \
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c
|
||||
index 8007d3f523c9..f367997ab61e 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c
|
||||
@@ -833,15 +833,21 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns,
|
||||
return steering_caps;
|
||||
}
|
||||
|
||||
-int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat)
|
||||
+int
|
||||
+mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id)
|
||||
{
|
||||
+ struct mlx5dr_action *dr_action;
|
||||
+
|
||||
switch (pkt_reformat->reformat_type) {
|
||||
case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
|
||||
case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
|
||||
case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
|
||||
case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
|
||||
case MLX5_REFORMAT_TYPE_INSERT_HDR:
|
||||
- return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->fs_dr_action.dr_action);
|
||||
+ dr_action = pkt_reformat->fs_dr_action.dr_action;
|
||||
+ *reformat_id = mlx5dr_action_get_pkt_reformat_id(dr_action);
|
||||
+ return 0;
|
||||
}
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h
|
||||
index 99a3b2eff6b8..f869f2daefbf 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h
|
||||
@@ -38,7 +38,9 @@ struct mlx5_fs_dr_table {
|
||||
|
||||
bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev);
|
||||
|
||||
-int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat);
|
||||
+int
|
||||
+mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id);
|
||||
|
||||
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void);
|
||||
|
||||
@@ -49,9 +51,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat)
|
||||
+static inline int
|
||||
+mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id)
|
||||
{
|
||||
- return 0;
|
||||
+ return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev)
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,246 @@
|
||||
From a2158eb7d1e077c0cf3a4b1bad863e6bd081d31d Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:06 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, register reformat actions with fw
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b206d9ec19dfc2db706883ff6b46b259831a033d
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue May 20 21:46:40 2025 +0300
|
||||
|
||||
net/mlx5: HWS, register reformat actions with fw
|
||||
|
||||
Hardware steering handles actions differently from firmware, but for
|
||||
termination rules that use encapsulation the firmware needs to be aware
|
||||
of the action.
|
||||
|
||||
Fix this by registering reformat actions with the firmware the first
|
||||
time this is needed. To do this, add a third possible owner for an
|
||||
action, and also a lock to protect against registration of the same
|
||||
action from different threads.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1747766802-958178-3-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
index c7ce9fc797c4..c330b64a506b 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
@@ -1839,6 +1839,8 @@ int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
return 0;
|
||||
case MLX5_FLOW_RESOURCE_OWNER_SW:
|
||||
return mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat, id);
|
||||
+ case MLX5_FLOW_RESOURCE_OWNER_HWS:
|
||||
+ return mlx5_fs_hws_action_get_pkt_reformat_id(pkt_reformat, id);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
index a41d3491d2af..e6a95b310b55 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
|
||||
@@ -58,6 +58,7 @@ struct mlx5_flow_definer {
|
||||
enum mlx5_flow_resource_owner {
|
||||
MLX5_FLOW_RESOURCE_OWNER_FW,
|
||||
MLX5_FLOW_RESOURCE_OWNER_SW,
|
||||
+ MLX5_FLOW_RESOURCE_OWNER_HWS,
|
||||
};
|
||||
|
||||
struct mlx5_modify_hdr {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index bef4d25c1a2a..aa47a7af6f50 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -72,6 +72,11 @@ enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action)
|
||||
return action->type;
|
||||
}
|
||||
|
||||
+struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action)
|
||||
+{
|
||||
+ return action->ctx->mdev;
|
||||
+}
|
||||
+
|
||||
static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx,
|
||||
enum mlx5hws_context_shared_stc_type stc_type,
|
||||
u8 tbl_type)
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
index 1b787cd66e6f..9d1c0e4b224a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
@@ -1081,13 +1081,8 @@ static int mlx5_cmd_hws_create_fte(struct mlx5_flow_root_namespace *ns,
|
||||
struct mlx5hws_bwc_rule *rule;
|
||||
int err = 0;
|
||||
|
||||
- if (mlx5_fs_cmd_is_fw_term_table(ft)) {
|
||||
- /* Packet reformat on terminamtion table not supported yet */
|
||||
- if (fte->act_dests.action.action &
|
||||
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
|
||||
- return -EOPNOTSUPP;
|
||||
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
|
||||
return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
|
||||
- }
|
||||
|
||||
err = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions);
|
||||
if (err)
|
||||
@@ -1362,7 +1357,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
|
||||
pkt_reformat->fs_hws_action.pr_data = pr_data;
|
||||
}
|
||||
|
||||
- pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
|
||||
+ pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS;
|
||||
pkt_reformat->fs_hws_action.hws_action = hws_action;
|
||||
return 0;
|
||||
|
||||
@@ -1380,6 +1375,15 @@ static void mlx5_cmd_hws_packet_reformat_dealloc(struct mlx5_flow_root_namespace
|
||||
struct mlx5_fs_hws_pr *pr_data;
|
||||
struct mlx5_fs_pool *pr_pool;
|
||||
|
||||
+ if (pkt_reformat->fs_hws_action.fw_reformat_id != 0) {
|
||||
+ struct mlx5_pkt_reformat fw_pkt_reformat = { 0 };
|
||||
+
|
||||
+ fw_pkt_reformat.id = pkt_reformat->fs_hws_action.fw_reformat_id;
|
||||
+ mlx5_fs_cmd_get_fw_cmds()->
|
||||
+ packet_reformat_dealloc(ns, &fw_pkt_reformat);
|
||||
+ pkt_reformat->fs_hws_action.fw_reformat_id = 0;
|
||||
+ }
|
||||
+
|
||||
if (pkt_reformat->reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR)
|
||||
return;
|
||||
|
||||
@@ -1499,6 +1503,7 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
|
||||
err = -ENOMEM;
|
||||
goto release_mh;
|
||||
}
|
||||
+ mutex_init(&modify_hdr->fs_hws_action.lock);
|
||||
modify_hdr->fs_hws_action.mh_data = mh_data;
|
||||
modify_hdr->fs_hws_action.fs_pool = pool;
|
||||
modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
|
||||
@@ -1532,6 +1537,58 @@ static void mlx5_cmd_hws_modify_header_dealloc(struct mlx5_flow_root_namespace *
|
||||
modify_hdr->fs_hws_action.mh_data = NULL;
|
||||
}
|
||||
|
||||
+int
|
||||
+mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id)
|
||||
+{
|
||||
+ enum mlx5_flow_namespace_type ns_type = pkt_reformat->ns_type;
|
||||
+ struct mutex *lock = &pkt_reformat->fs_hws_action.lock;
|
||||
+ u32 *id = &pkt_reformat->fs_hws_action.fw_reformat_id;
|
||||
+ struct mlx5_pkt_reformat fw_pkt_reformat = { 0 };
|
||||
+ struct mlx5_pkt_reformat_params params = { 0 };
|
||||
+ struct mlx5_flow_root_namespace *ns;
|
||||
+ struct mlx5_core_dev *dev;
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_lock(lock);
|
||||
+
|
||||
+ if (*id != 0) {
|
||||
+ *reformat_id = *id;
|
||||
+ ret = 0;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ dev = mlx5hws_action_get_dev(pkt_reformat->fs_hws_action.hws_action);
|
||||
+ if (!dev) {
|
||||
+ ret = -EINVAL;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ ns = mlx5_get_root_namespace(dev, ns_type);
|
||||
+ if (!ns) {
|
||||
+ ret = -EINVAL;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ params.type = pkt_reformat->reformat_type;
|
||||
+ params.size = pkt_reformat->fs_hws_action.pr_data->data_size;
|
||||
+ params.data = pkt_reformat->fs_hws_action.pr_data->data;
|
||||
+
|
||||
+ ret = mlx5_fs_cmd_get_fw_cmds()->
|
||||
+ packet_reformat_alloc(ns, ¶ms, ns_type, &fw_pkt_reformat);
|
||||
+ if (ret)
|
||||
+ goto unlock;
|
||||
+
|
||||
+ *id = fw_pkt_reformat.id;
|
||||
+ *reformat_id = *id;
|
||||
+ ret = 0;
|
||||
+
|
||||
+unlock:
|
||||
+ mutex_unlock(lock);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int mlx5_cmd_hws_create_match_definer(struct mlx5_flow_root_namespace *ns,
|
||||
u16 format_id, u32 *match_mask)
|
||||
{
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h
|
||||
index 8b56298288da..b92d55b2d147 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h
|
||||
@@ -41,6 +41,11 @@ struct mlx5_fs_hws_action {
|
||||
struct mlx5_fs_pool *fs_pool;
|
||||
struct mlx5_fs_hws_pr *pr_data;
|
||||
struct mlx5_fs_hws_mh *mh_data;
|
||||
+ u32 fw_reformat_id;
|
||||
+ /* Protect `fw_reformat_id` against being initialized from multiple
|
||||
+ * threads.
|
||||
+ */
|
||||
+ struct mutex lock;
|
||||
};
|
||||
|
||||
struct mlx5_fs_hws_matcher {
|
||||
@@ -84,12 +89,23 @@ void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data);
|
||||
|
||||
#ifdef CONFIG_MLX5_HW_STEERING
|
||||
|
||||
+int
|
||||
+mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id);
|
||||
+
|
||||
bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev);
|
||||
|
||||
const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void);
|
||||
|
||||
#else
|
||||
|
||||
+static inline int
|
||||
+mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat,
|
||||
+ u32 *reformat_id)
|
||||
+{
|
||||
+ return -EOPNOTSUPP;
|
||||
+}
|
||||
+
|
||||
static inline bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev)
|
||||
{
|
||||
return false;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
index fbd63369da10..9bbadc4d8a0b 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
@@ -503,6 +503,15 @@ int mlx5hws_rule_action_update(struct mlx5hws_rule *rule,
|
||||
enum mlx5hws_action_type
|
||||
mlx5hws_action_get_type(struct mlx5hws_action *action);
|
||||
|
||||
+/**
|
||||
+ * mlx5hws_action_get_dev - Get mlx5 core device.
|
||||
+ *
|
||||
+ * @action: The action to get the device from.
|
||||
+ *
|
||||
+ * Return: mlx5 core device.
|
||||
+ */
|
||||
+struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action);
|
||||
+
|
||||
/**
|
||||
* mlx5hws_action_create_dest_drop - Create a direct rule drop action.
|
||||
*
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
220
SOURCES/1378-net-mlx5-hws-fix-typo-nope-to-nop.patch
Normal file
220
SOURCES/1378-net-mlx5-hws-fix-typo-nope-to-nop.patch
Normal file
@ -0,0 +1,220 @@
|
||||
From 80492ad30af1ed97b996b34f6e6daac2f98ef98d Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:07 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, fix typo - 'nope' to 'nop'
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 0b6e452caf03da63aeb2e84475771d6fb6d6cd99
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Tue May 20 21:46:41 2025 +0300
|
||||
|
||||
net/mlx5: HWS, fix typo - 'nope' to 'nop'
|
||||
|
||||
Fix typo - rename 'nope_locations' to 'nop_locations', which describes
|
||||
the locations of 'nop' actions. To shorten the lines, this renaming
|
||||
also required some refactoring.
|
||||
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1747766802-958178-4-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index aa47a7af6f50..64d115feef2c 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1207,16 +1207,16 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action,
|
||||
for (i = 0; i < num_of_patterns; i++) {
|
||||
size_t new_num_actions;
|
||||
size_t cur_num_actions;
|
||||
- u32 nope_location;
|
||||
+ u32 nop_locations;
|
||||
|
||||
cur_num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE;
|
||||
|
||||
- mlx5hws_pat_calc_nope(pattern[i].data, cur_num_actions,
|
||||
- pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE,
|
||||
- &new_num_actions, &nope_location,
|
||||
- &new_pattern[i * pat_max_sz]);
|
||||
+ mlx5hws_pat_calc_nop(pattern[i].data, cur_num_actions,
|
||||
+ pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE,
|
||||
+ &new_num_actions, &nop_locations,
|
||||
+ &new_pattern[i * pat_max_sz]);
|
||||
|
||||
- action[i].modify_header.nope_locations = nope_location;
|
||||
+ action[i].modify_header.nop_locations = nop_locations;
|
||||
action[i].modify_header.num_of_actions = new_num_actions;
|
||||
|
||||
max_mh_actions = max(max_mh_actions, new_num_actions);
|
||||
@@ -1263,7 +1263,7 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action,
|
||||
MLX5_GET(set_action_in, pattern[i].data, action_type);
|
||||
} else {
|
||||
/* Multiple modify actions require a pattern */
|
||||
- if (unlikely(action[i].modify_header.nope_locations)) {
|
||||
+ if (unlikely(action[i].modify_header.nop_locations)) {
|
||||
size_t pattern_sz;
|
||||
|
||||
pattern_sz = action[i].modify_header.num_of_actions *
|
||||
@@ -2105,12 +2105,12 @@ static void hws_action_modify_write(struct mlx5hws_send_engine *queue,
|
||||
u32 arg_idx,
|
||||
u8 *arg_data,
|
||||
u16 num_of_actions,
|
||||
- u32 nope_locations)
|
||||
+ u32 nop_locations)
|
||||
{
|
||||
u8 *new_arg_data = NULL;
|
||||
int i, j;
|
||||
|
||||
- if (unlikely(nope_locations)) {
|
||||
+ if (unlikely(nop_locations)) {
|
||||
new_arg_data = kcalloc(num_of_actions,
|
||||
MLX5HWS_MODIFY_ACTION_SIZE, GFP_KERNEL);
|
||||
if (unlikely(!new_arg_data))
|
||||
@@ -2118,7 +2118,7 @@ static void hws_action_modify_write(struct mlx5hws_send_engine *queue,
|
||||
|
||||
for (i = 0, j = 0; i < num_of_actions; i++, j++) {
|
||||
memcpy(&new_arg_data[j], arg_data, MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
- if (BIT(i) & nope_locations)
|
||||
+ if (BIT(i) & nop_locations)
|
||||
j++;
|
||||
}
|
||||
}
|
||||
@@ -2215,6 +2215,7 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply,
|
||||
struct mlx5hws_action *action;
|
||||
u32 arg_sz, arg_idx;
|
||||
u8 *single_action;
|
||||
+ u8 max_actions;
|
||||
__be32 stc_idx;
|
||||
|
||||
rule_action = &apply->rule_action[setter->idx_double];
|
||||
@@ -2242,21 +2243,23 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply,
|
||||
|
||||
apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] =
|
||||
*(__be32 *)MLX5_ADDR_OF(set_action_in, single_action, data);
|
||||
- } else {
|
||||
- /* Argument offset multiple with number of args per these actions */
|
||||
- arg_sz = mlx5hws_arg_get_arg_size(action->modify_header.max_num_of_actions);
|
||||
- arg_idx = rule_action->modify_header.offset * arg_sz;
|
||||
-
|
||||
- apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
|
||||
-
|
||||
- if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) {
|
||||
- apply->require_dep = 1;
|
||||
- hws_action_modify_write(apply->queue,
|
||||
- action->modify_header.arg_id + arg_idx,
|
||||
- rule_action->modify_header.data,
|
||||
- action->modify_header.num_of_actions,
|
||||
- action->modify_header.nope_locations);
|
||||
- }
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Argument offset multiple with number of args per these actions */
|
||||
+ max_actions = action->modify_header.max_num_of_actions;
|
||||
+ arg_sz = mlx5hws_arg_get_arg_size(max_actions);
|
||||
+ arg_idx = rule_action->modify_header.offset * arg_sz;
|
||||
+
|
||||
+ apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
|
||||
+
|
||||
+ if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) {
|
||||
+ apply->require_dep = 1;
|
||||
+ hws_action_modify_write(apply->queue,
|
||||
+ action->modify_header.arg_id + arg_idx,
|
||||
+ rule_action->modify_header.data,
|
||||
+ action->modify_header.num_of_actions,
|
||||
+ action->modify_header.nop_locations);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
index 25fa0d4c9221..55a079fdd08f 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
|
||||
@@ -136,7 +136,7 @@ struct mlx5hws_action {
|
||||
u32 pat_id;
|
||||
u32 arg_id;
|
||||
__be64 single_action;
|
||||
- u32 nope_locations;
|
||||
+ u32 nop_locations;
|
||||
u8 num_of_patterns;
|
||||
u8 single_action_type;
|
||||
u8 num_of_actions;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
index f51ed24526b9..78de19c074a7 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
@@ -522,9 +522,9 @@ bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], s
|
||||
return true;
|
||||
}
|
||||
|
||||
-void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions,
|
||||
- size_t max_actions, size_t *new_size,
|
||||
- u32 *nope_location, __be64 *new_pat)
|
||||
+void mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
+ size_t max_actions, size_t *new_size,
|
||||
+ u32 *nop_locations, __be64 *new_pat)
|
||||
{
|
||||
u16 prev_src_field = 0, prev_dst_field = 0;
|
||||
u16 src_field, dst_field;
|
||||
@@ -532,7 +532,7 @@ void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions,
|
||||
size_t i, j;
|
||||
|
||||
*new_size = num_actions;
|
||||
- *nope_location = 0;
|
||||
+ *nop_locations = 0;
|
||||
|
||||
if (num_actions == 1)
|
||||
return;
|
||||
@@ -546,18 +546,18 @@ void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions,
|
||||
if (action_type == MLX5_ACTION_TYPE_COPY &&
|
||||
(prev_src_field == src_field ||
|
||||
prev_dst_field == dst_field)) {
|
||||
- /* need Nope */
|
||||
+ /* need Nop */
|
||||
*new_size += 1;
|
||||
- *nope_location |= BIT(i);
|
||||
+ *nop_locations |= BIT(i);
|
||||
memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
MLX5_SET(set_action_in, &new_pat[j],
|
||||
action_type,
|
||||
MLX5_MODIFICATION_TYPE_NOP);
|
||||
j++;
|
||||
} else if (prev_src_field == src_field) {
|
||||
- /* need Nope*/
|
||||
+ /* need Nop */
|
||||
*new_size += 1;
|
||||
- *nope_location |= BIT(i);
|
||||
+ *nop_locations |= BIT(i);
|
||||
MLX5_SET(set_action_in, &new_pat[j],
|
||||
action_type,
|
||||
MLX5_MODIFICATION_TYPE_NOP);
|
||||
@@ -568,7 +568,7 @@ void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions,
|
||||
/* check if no more space */
|
||||
if (j > max_actions) {
|
||||
*new_size = num_actions;
|
||||
- *nope_location = 0;
|
||||
+ *nop_locations = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
index 8ddb51980044..91bd2572a341 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
@@ -96,6 +96,7 @@ int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx,
|
||||
u8 *arg_data,
|
||||
size_t data_size);
|
||||
|
||||
-void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, size_t max_actions,
|
||||
- size_t *new_size, u32 *nope_location, __be64 *new_pat);
|
||||
+void mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
+ size_t max_actions, size_t *new_size,
|
||||
+ u32 *nop_locations, __be64 *new_pat);
|
||||
#endif /* MLX5HWS_PAT_ARG_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,228 @@
|
||||
From 49dd65e6ee03b3423cd69b885fdab5ae5ec32303 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:07 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, handle modify header actions dependency
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 01e035fd0380b285d72725adb5a45f1d73549db8
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Tue May 20 21:46:42 2025 +0300
|
||||
|
||||
net/mlx5: HWS, handle modify header actions dependency
|
||||
|
||||
Having adjacent accelerated modify header actions (so-called
|
||||
pattern-argument actions) may result in inconsistent outcome.
|
||||
These inconsistencies can take the form of writes to the same
|
||||
field or a read coupled with a write to the same field. The
|
||||
solution is to detect such dependencies and insert nops between
|
||||
the offending actions.
|
||||
|
||||
The existing implementation had a few issues, which pretty much
|
||||
required a complete rewrite of the code that handles these
|
||||
dependencies.
|
||||
|
||||
In the new implementation we're doing the following:
|
||||
|
||||
* Checking any two adjacent actions for conflicts (not just
|
||||
odd-even pairs).
|
||||
* Marking 'set' and 'add' action fields as destination, rather
|
||||
than source, for the purposes of checking for conflicts.
|
||||
* Checking all types of actions ('add', 'set', 'copy') for
|
||||
dependencies.
|
||||
* Managing offsets of the args in the buffer - copy the action
|
||||
args to the right place in the buffer.
|
||||
* Checking that after inserting nops we're still within the number
|
||||
of supported actions - return an error otherwise.
|
||||
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/1747766802-958178-5-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index 64d115feef2c..fb62f3bc4bd4 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1190,14 +1190,15 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action,
|
||||
struct mlx5hws_action_mh_pattern *pattern,
|
||||
u32 log_bulk_size)
|
||||
{
|
||||
+ u16 num_actions, max_mh_actions = 0, hw_max_actions;
|
||||
struct mlx5hws_context *ctx = action->ctx;
|
||||
- u16 num_actions, max_mh_actions = 0;
|
||||
int i, ret, size_in_bytes;
|
||||
u32 pat_id, arg_id = 0;
|
||||
__be64 *new_pattern;
|
||||
size_t pat_max_sz;
|
||||
|
||||
pat_max_sz = MLX5HWS_ARG_CHUNK_SIZE_MAX * MLX5HWS_ARG_DATA_SIZE;
|
||||
+ hw_max_actions = pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE;
|
||||
size_in_bytes = pat_max_sz * sizeof(__be64);
|
||||
new_pattern = kcalloc(num_of_patterns, size_in_bytes, GFP_KERNEL);
|
||||
if (!new_pattern)
|
||||
@@ -1211,10 +1212,14 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action,
|
||||
|
||||
cur_num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE;
|
||||
|
||||
- mlx5hws_pat_calc_nop(pattern[i].data, cur_num_actions,
|
||||
- pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE,
|
||||
- &new_num_actions, &nop_locations,
|
||||
- &new_pattern[i * pat_max_sz]);
|
||||
+ ret = mlx5hws_pat_calc_nop(pattern[i].data, cur_num_actions,
|
||||
+ hw_max_actions, &new_num_actions,
|
||||
+ &nop_locations,
|
||||
+ &new_pattern[i * pat_max_sz]);
|
||||
+ if (ret) {
|
||||
+ mlx5hws_err(ctx, "Too many actions after nop insertion\n");
|
||||
+ goto free_new_pat;
|
||||
+ }
|
||||
|
||||
action[i].modify_header.nop_locations = nop_locations;
|
||||
action[i].modify_header.num_of_actions = new_num_actions;
|
||||
@@ -2116,10 +2121,12 @@ static void hws_action_modify_write(struct mlx5hws_send_engine *queue,
|
||||
if (unlikely(!new_arg_data))
|
||||
return;
|
||||
|
||||
- for (i = 0, j = 0; i < num_of_actions; i++, j++) {
|
||||
- memcpy(&new_arg_data[j], arg_data, MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
+ for (i = 0, j = 0; j < num_of_actions; i++, j++) {
|
||||
if (BIT(i) & nop_locations)
|
||||
j++;
|
||||
+ memcpy(&new_arg_data[j * MLX5HWS_MODIFY_ACTION_SIZE],
|
||||
+ &arg_data[i * MLX5HWS_MODIFY_ACTION_SIZE],
|
||||
+ MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
index 78de19c074a7..51e4c551e0ef 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
|
||||
@@ -490,8 +490,8 @@ hws_action_modify_get_target_fields(u8 action_type, __be64 *pattern,
|
||||
switch (action_type) {
|
||||
case MLX5_ACTION_TYPE_SET:
|
||||
case MLX5_ACTION_TYPE_ADD:
|
||||
- *src_field = MLX5_GET(set_action_in, pattern, field);
|
||||
- *dst_field = INVALID_FIELD;
|
||||
+ *src_field = INVALID_FIELD;
|
||||
+ *dst_field = MLX5_GET(set_action_in, pattern, field);
|
||||
break;
|
||||
case MLX5_ACTION_TYPE_COPY:
|
||||
*src_field = MLX5_GET(copy_action_in, pattern, src_field);
|
||||
@@ -522,57 +522,59 @@ bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], s
|
||||
return true;
|
||||
}
|
||||
|
||||
-void mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
- size_t max_actions, size_t *new_size,
|
||||
- u32 *nop_locations, __be64 *new_pat)
|
||||
+int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
+ size_t max_actions, size_t *new_size,
|
||||
+ u32 *nop_locations, __be64 *new_pat)
|
||||
{
|
||||
- u16 prev_src_field = 0, prev_dst_field = 0;
|
||||
+ u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD;
|
||||
u16 src_field, dst_field;
|
||||
u8 action_type;
|
||||
+ bool dependent;
|
||||
size_t i, j;
|
||||
|
||||
*new_size = num_actions;
|
||||
*nop_locations = 0;
|
||||
|
||||
if (num_actions == 1)
|
||||
- return;
|
||||
+ return 0;
|
||||
|
||||
for (i = 0, j = 0; i < num_actions; i++, j++) {
|
||||
- action_type = MLX5_GET(set_action_in, &pattern[i], action_type);
|
||||
+ if (j >= max_actions)
|
||||
+ return -EINVAL;
|
||||
|
||||
+ action_type = MLX5_GET(set_action_in, &pattern[i], action_type);
|
||||
hws_action_modify_get_target_fields(action_type, &pattern[i],
|
||||
&src_field, &dst_field);
|
||||
- if (i % 2) {
|
||||
- if (action_type == MLX5_ACTION_TYPE_COPY &&
|
||||
- (prev_src_field == src_field ||
|
||||
- prev_dst_field == dst_field)) {
|
||||
- /* need Nop */
|
||||
- *new_size += 1;
|
||||
- *nop_locations |= BIT(i);
|
||||
- memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
- MLX5_SET(set_action_in, &new_pat[j],
|
||||
- action_type,
|
||||
- MLX5_MODIFICATION_TYPE_NOP);
|
||||
- j++;
|
||||
- } else if (prev_src_field == src_field) {
|
||||
- /* need Nop */
|
||||
- *new_size += 1;
|
||||
- *nop_locations |= BIT(i);
|
||||
- MLX5_SET(set_action_in, &new_pat[j],
|
||||
- action_type,
|
||||
- MLX5_MODIFICATION_TYPE_NOP);
|
||||
- j++;
|
||||
- }
|
||||
- }
|
||||
- memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
- /* check if no more space */
|
||||
- if (j > max_actions) {
|
||||
- *new_size = num_actions;
|
||||
- *nop_locations = 0;
|
||||
- return;
|
||||
+
|
||||
+ /* For every action, look at it and the previous one. The two
|
||||
+ * actions are dependent if:
|
||||
+ */
|
||||
+ dependent =
|
||||
+ (i > 0) &&
|
||||
+ /* At least one of the actions is a write and */
|
||||
+ (dst_field != INVALID_FIELD ||
|
||||
+ prev_dst_field != INVALID_FIELD) &&
|
||||
+ /* One reads from the other's source */
|
||||
+ (dst_field == prev_src_field ||
|
||||
+ src_field == prev_dst_field ||
|
||||
+ /* Or both write to the same destination */
|
||||
+ dst_field == prev_dst_field);
|
||||
+
|
||||
+ if (dependent) {
|
||||
+ *new_size += 1;
|
||||
+ *nop_locations |= BIT(i);
|
||||
+ memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
+ MLX5_SET(set_action_in, &new_pat[j], action_type,
|
||||
+ MLX5_MODIFICATION_TYPE_NOP);
|
||||
+ j++;
|
||||
+ if (j >= max_actions)
|
||||
+ return -EINVAL;
|
||||
}
|
||||
|
||||
+ memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE);
|
||||
prev_src_field = src_field;
|
||||
prev_dst_field = dst_field;
|
||||
}
|
||||
+
|
||||
+ return 0;
|
||||
}
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
index 91bd2572a341..7fbd8dc7aa18 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
|
||||
@@ -96,7 +96,7 @@ int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx,
|
||||
u8 *arg_data,
|
||||
size_t data_size);
|
||||
|
||||
-void mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
- size_t max_actions, size_t *new_size,
|
||||
- u32 *nop_locations, __be64 *new_pat);
|
||||
+int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions,
|
||||
+ size_t max_actions, size_t *new_size,
|
||||
+ u32 *nop_locations, __be64 *new_pat);
|
||||
#endif /* MLX5HWS_PAT_ARG_H_ */
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,64 @@
|
||||
From 07d72a58ed409c2ba3a0099febcaf5e9186274e4 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:07 -0400
|
||||
Subject: [PATCH] net/mlx5_core: Add error handling
|
||||
inmlx5_query_nic_vport_qkey_viol_cntr()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit f0b50730bdd8f2734e548de541e845c0d40dceb6
|
||||
Author: Wentao Liang <vulab@iscas.ac.cn>
|
||||
Date: Wed May 21 21:36:20 2025 +0800
|
||||
|
||||
net/mlx5_core: Add error handling inmlx5_query_nic_vport_qkey_viol_cntr()
|
||||
|
||||
The function mlx5_query_nic_vport_qkey_viol_cntr() calls the function
|
||||
mlx5_query_nic_vport_context() but does not check its return value. This
|
||||
could lead to undefined behavior if the query fails. A proper
|
||||
implementation can be found in mlx5_nic_vport_query_local_lb().
|
||||
|
||||
Add error handling for mlx5_query_nic_vport_context(). If it fails, free
|
||||
the out buffer via kvfree() and return error code.
|
||||
|
||||
Fixes: 9efa75254593 ("net/mlx5_core: Introduce access functions to query vport RoCE fields")
|
||||
Cc: stable@vger.kernel.org # v4.5
|
||||
Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250521133620.912-1-vulab@iscas.ac.cn
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
index d10d4c396040..a3c57bb8b521 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
@@ -519,19 +519,22 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
|
||||
{
|
||||
u32 *out;
|
||||
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
|
||||
+ int err;
|
||||
|
||||
out = kvzalloc(outlen, GFP_KERNEL);
|
||||
if (!out)
|
||||
return -ENOMEM;
|
||||
|
||||
- mlx5_query_nic_vport_context(mdev, 0, out);
|
||||
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
|
||||
+ if (err)
|
||||
+ goto out;
|
||||
|
||||
*qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
|
||||
nic_vport_context.qkey_violation_counter);
|
||||
-
|
||||
+out:
|
||||
kvfree(out);
|
||||
|
||||
- return 0;
|
||||
+ return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
From 8c0cb6c3c0865ac0421987596b2b3d90ec2f2510 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:08 -0400
|
||||
Subject: [PATCH] net/mlx5e: Allow setting MAC address of representors
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit f95633adc177416ac21f16db9ce1e75c74db805a
|
||||
Author: Mark Bloch <mbloch@nvidia.com>
|
||||
Date: Thu May 22 10:13:56 2025 +0300
|
||||
|
||||
net/mlx5e: Allow setting MAC address of representors
|
||||
|
||||
A representor netdev does not correspond to real hardware that needs to
|
||||
be updated when setting the MAC address. The default eth_mac_addr() is
|
||||
sufficient for simply updating the netdev's MAC address with validation.
|
||||
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Simon Horman <horms@kernel.org>
|
||||
Link: https://patch.msgid.link/1747898036-1121904-1-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
|
||||
index 58cd153ccc61..2640cace0f76 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
|
||||
@@ -803,6 +803,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
|
||||
.ndo_stop = mlx5e_rep_close,
|
||||
.ndo_start_xmit = mlx5e_xmit,
|
||||
.ndo_setup_tc = mlx5e_rep_setup_tc,
|
||||
+ .ndo_set_mac_address = eth_mac_addr,
|
||||
.ndo_get_stats64 = mlx5e_rep_get_stats,
|
||||
.ndo_has_offload_stats = mlx5e_rep_has_offload_stats,
|
||||
.ndo_get_offload_stats = mlx5e_rep_get_offload_stats,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
From dd1d14ed8fcc14c857f5bcd97ad842a291d390e6 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:08 -0400
|
||||
Subject: [PATCH] net/mlx5: Add error handling in
|
||||
mlx5_query_nic_vport_node_guid()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit c6bb8a21cdad8c975a3a646b9e5c8df01ad29783
|
||||
Author: Wentao Liang <vulab@iscas.ac.cn>
|
||||
Date: Sun May 25 00:34:25 2025 +0800
|
||||
|
||||
net/mlx5: Add error handling in mlx5_query_nic_vport_node_guid()
|
||||
|
||||
The function mlx5_query_nic_vport_node_guid() calls the function
|
||||
mlx5_query_nic_vport_context() but does not check its return value.
|
||||
A proper implementation can be found in mlx5_nic_vport_query_local_lb().
|
||||
|
||||
Add error handling for mlx5_query_nic_vport_context(). If it fails, free
|
||||
the out buffer via kvfree() and return error code.
|
||||
|
||||
Fixes: 9efa75254593 ("net/mlx5_core: Introduce access functions to query vport RoCE fields")
|
||||
Cc: stable@vger.kernel.org # v4.5
|
||||
Signed-off-by: Wentao Liang <vulab@iscas.ac.cn>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250524163425.1695-1-vulab@iscas.ac.cn
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
index a3c57bb8b521..da5c24fc7b30 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
|
||||
@@ -465,19 +465,22 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
|
||||
{
|
||||
u32 *out;
|
||||
int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
|
||||
+ int err;
|
||||
|
||||
out = kvzalloc(outlen, GFP_KERNEL);
|
||||
if (!out)
|
||||
return -ENOMEM;
|
||||
|
||||
- mlx5_query_nic_vport_context(mdev, 0, out);
|
||||
+ err = mlx5_query_nic_vport_context(mdev, 0, out);
|
||||
+ if (err)
|
||||
+ goto out;
|
||||
|
||||
*node_guid = MLX5_GET64(query_nic_vport_context_out, out,
|
||||
nic_vport_context.node_guid);
|
||||
-
|
||||
+out:
|
||||
kvfree(out);
|
||||
|
||||
- return 0;
|
||||
+ return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,43 @@
|
||||
From e2f7050c9a027152fc3001519b825cfd333320c0 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:08 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Fix an error code in
|
||||
mlx5hws_bwc_rule_create_complex()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit a540ee75945a96f606c6ac955bfed5410d318f7d
|
||||
Author: Dan Carpenter <dan.carpenter@linaro.org>
|
||||
Date: Fri May 23 19:00:12 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Fix an error code in mlx5hws_bwc_rule_create_complex()
|
||||
|
||||
This was intended to be negative -ENOMEM but the '-' character was left
|
||||
off accidentally. This typo doesn't affect runtime because the caller
|
||||
treats all non-zero returns the same.
|
||||
|
||||
Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers")
|
||||
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/aDCbjNcquNC68Hyj@stanley.mountain
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
index 5d30c5b094fc..70768953a4f6 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
@@ -1188,7 +1188,7 @@ int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!match_buf_2)) {
|
||||
mlx5hws_err(ctx, "Complex rule: failed allocating match_buf\n");
|
||||
- ret = ENOMEM;
|
||||
+ ret = -ENOMEM;
|
||||
goto hash_node_put;
|
||||
}
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
From 15be556a20263870bb8b7e0974d3452f4d4b3616 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:08 -0400
|
||||
Subject: [PATCH] net/mlx5: Ensure fw pages are always allocated on same NUMA
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit f37258133c1e95e61db532e14067e28b4881bf24
|
||||
Author: Moshe Shemesh <moshe@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:06 2025 +0300
|
||||
|
||||
net/mlx5: Ensure fw pages are always allocated on same NUMA
|
||||
|
||||
When firmware asks the driver to allocate more pages, using event of
|
||||
give_pages, the driver should always allocate it from same NUMA, the
|
||||
original device NUMA. Current code uses dev_to_node() which can result
|
||||
in different NUMA as it is changed by other driver flows, such as
|
||||
mlx5_dma_zalloc_coherent_node(). Instead, use saved numa node for
|
||||
allocating firmware pages.
|
||||
|
||||
Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node")
|
||||
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-2-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
|
||||
index 972e8e9df585..9bc9bd83c232 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
|
||||
@@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function)
|
||||
static int alloc_system_page(struct mlx5_core_dev *dev, u32 function)
|
||||
{
|
||||
struct device *device = mlx5_core_dma_dev(dev);
|
||||
- int nid = dev_to_node(device);
|
||||
+ int nid = dev->priv.numa_node;
|
||||
struct page *page;
|
||||
u64 zero_addr = 1;
|
||||
u64 addr;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,64 @@
|
||||
From 92a46cdb282adb03ef6c38e4e163528e9a19f3d7 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:09 -0400
|
||||
Subject: [PATCH] net/mlx5: Fix return value when searching for existing flow
|
||||
group
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 8ec40e3f1f72bf8f8accf18020d487caa99f46a4
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:08 2025 +0300
|
||||
|
||||
net/mlx5: Fix return value when searching for existing flow group
|
||||
|
||||
When attempting to add a rule to an existing flow group, if a matching
|
||||
flow group exists but is not active, the error code returned should be
|
||||
EAGAIN, so that the rule can be added to the matching flow group once
|
||||
it is active, rather than ENOENT, which indicates that no matching
|
||||
flow group was found.
|
||||
|
||||
Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel")
|
||||
Signed-off-by: Gavi Teitz <gavi@nvidia.com>
|
||||
Signed-off-by: Roi Dayan <roid@nvidia.com>
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-4-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
index c330b64a506b..5f0f546fa126 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
@@ -2228,6 +2228,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
|
||||
struct mlx5_flow_handle *rule;
|
||||
struct match_list *iter;
|
||||
bool take_write = false;
|
||||
+ bool try_again = false;
|
||||
struct fs_fte *fte;
|
||||
u64 version = 0;
|
||||
int err;
|
||||
@@ -2292,6 +2293,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
|
||||
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
|
||||
|
||||
if (!g->node.active) {
|
||||
+ try_again = true;
|
||||
up_write_ref_node(&g->node, false);
|
||||
continue;
|
||||
}
|
||||
@@ -2313,7 +2315,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
|
||||
tree_put_node(&fte->node, false);
|
||||
return rule;
|
||||
}
|
||||
- rule = ERR_PTR(-ENOENT);
|
||||
+ err = try_again ? -EAGAIN : -ENOENT;
|
||||
+ rule = ERR_PTR(err);
|
||||
out:
|
||||
kmem_cache_free(steering->ftes_cache, fte);
|
||||
return rule;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
From 3d6361f488b417481996a88647f6d958c09986d9 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:09 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Init mutex on the correct path
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit a002602676cdae0c9996adb75b9310559b718a93
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:09 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Init mutex on the correct path
|
||||
|
||||
The newly introduced mutex is only used for reformat actions, but it was
|
||||
initialized for modify header instead.
|
||||
|
||||
The struct that contains the mutex is zero-initialized and an all-zero
|
||||
mutex is valid, so the issue only shows up with CONFIG_DEBUG_MUTEXES.
|
||||
|
||||
Fixes: b206d9ec19df ("net/mlx5: HWS, register reformat actions with fw")
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-5-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
index 9d1c0e4b224a..372e2be90706 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
@@ -1357,6 +1357,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
|
||||
pkt_reformat->fs_hws_action.pr_data = pr_data;
|
||||
}
|
||||
|
||||
+ mutex_init(&pkt_reformat->fs_hws_action.lock);
|
||||
pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS;
|
||||
pkt_reformat->fs_hws_action.hws_action = hws_action;
|
||||
return 0;
|
||||
@@ -1503,7 +1504,6 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
|
||||
err = -ENOMEM;
|
||||
goto release_mh;
|
||||
}
|
||||
- mutex_init(&modify_hdr->fs_hws_action.lock);
|
||||
modify_hdr->fs_hws_action.mh_data = mh_data;
|
||||
modify_hdr->fs_hws_action.fs_pool = pool;
|
||||
modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,40 @@
|
||||
From ba815e2e82f00d7211164de9c0409a7e53173ba8 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:10 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, fix missing ip_version handling in definer
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b5e3c76f35ee7e814c2469c73406c5bbf110d89c
|
||||
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:10 2025 +0300
|
||||
|
||||
net/mlx5: HWS, fix missing ip_version handling in definer
|
||||
|
||||
Fix missing field handling in definer - outer IP version.
|
||||
|
||||
Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling")
|
||||
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-6-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
index 5cc0dc002ac1..d45e1145d197 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
|
||||
@@ -785,6 +785,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd,
|
||||
HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O,
|
||||
outer_headers.ip_protocol,
|
||||
eth_l3_outer.protocol_next_header);
|
||||
+ HWS_SET_HDR(fc, match_param, IP_VERSION_O,
|
||||
+ outer_headers.ip_version,
|
||||
+ eth_l3_outer.ip_version);
|
||||
HWS_SET_HDR(fc, match_param, IP_TTL_O,
|
||||
outer_headers.ttl_hoplimit,
|
||||
eth_l3_outer.time_to_live_hop_limit);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,98 @@
|
||||
From cb2e3bb95bee469a17238ac30011a055ff897886 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:10 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, make sure the uplink is the last destination
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b8335829518ec5988294280e37d735799209d70d
|
||||
Author: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:11 2025 +0300
|
||||
|
||||
net/mlx5: HWS, make sure the uplink is the last destination
|
||||
|
||||
When there are more than one destinations, we create a FW flow
|
||||
table and provide it with all the destinations. FW requires to
|
||||
have wire as the last destination in the list (if it exists),
|
||||
otherwise the operation fails with FW syndrome.
|
||||
|
||||
This patch fixes the destination array action creation: if it
|
||||
contains a wire destination, it is moved to the end.
|
||||
|
||||
Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling")
|
||||
Signed-off-by: Vlad Dogaru <vdogaru@nvidia.com>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-7-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
index fb62f3bc4bd4..447ea3f8722c 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
|
||||
@@ -1370,8 +1370,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
|
||||
struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
|
||||
struct mlx5hws_cmd_forward_tbl *fw_island;
|
||||
struct mlx5hws_action *action;
|
||||
- u32 i /*, packet_reformat_id*/;
|
||||
- int ret;
|
||||
+ int ret, last_dest_idx = -1;
|
||||
+ u32 i;
|
||||
|
||||
if (num_dest <= 1) {
|
||||
mlx5hws_err(ctx, "Action must have multiple dests\n");
|
||||
@@ -1401,11 +1401,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
|
||||
dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id;
|
||||
fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
|
||||
fte_attr.ignore_flow_level = ignore_flow_level;
|
||||
- /* ToDo: In SW steering we have a handling of 'go to WIRE'
|
||||
- * destination here by upper layer setting 'is_wire_ft' flag
|
||||
- * if the destination is wire.
|
||||
- * This is because uplink should be last dest in the list.
|
||||
- */
|
||||
+ if (dests[i].is_wire_ft)
|
||||
+ last_dest_idx = i;
|
||||
break;
|
||||
case MLX5HWS_ACTION_TYP_VPORT:
|
||||
dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
|
||||
@@ -1429,6 +1426,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
+ if (last_dest_idx != -1)
|
||||
+ swap(dest_list[last_dest_idx], dest_list[num_dest - 1]);
|
||||
+
|
||||
fte_attr.dests_num = num_dest;
|
||||
fte_attr.dests = dest_list;
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
index 372e2be90706..bf4643d0ce17 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
|
||||
@@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns,
|
||||
switch (attr->type) {
|
||||
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
|
||||
dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst);
|
||||
+ if (dst->dest_attr.ft->flags &
|
||||
+ MLX5_FLOW_TABLE_UPLINK_VPORT)
|
||||
+ dest_actions[num_dest_actions].is_wire_ft = true;
|
||||
break;
|
||||
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
|
||||
dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx,
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
index 9bbadc4d8a0b..d8ac6c196211 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h
|
||||
@@ -213,6 +213,7 @@ struct mlx5hws_action_dest_attr {
|
||||
struct mlx5hws_action *dest;
|
||||
/* Optional reformat action */
|
||||
struct mlx5hws_action *reformat;
|
||||
+ bool is_wire_ft;
|
||||
};
|
||||
|
||||
/**
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,81 @@
|
||||
From 7af56f3f1bdf8cc9ba3e4a85456cedb7a69a8b86 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:10 -0400
|
||||
Subject: [PATCH] net/mlx5e: Fix leak of Geneve TLV option object
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit aa9c44b842096c553871bc68a8cebc7861fa192b
|
||||
Author: Jianbo Liu <jianbol@nvidia.com>
|
||||
Date: Tue Jun 10 18:15:13 2025 +0300
|
||||
|
||||
net/mlx5e: Fix leak of Geneve TLV option object
|
||||
|
||||
Previously, a unique tunnel id was added for the matching on TC
|
||||
non-zero chains, to support inner header rewrite with goto action.
|
||||
Later, it was used to support VF tunnel offload for vxlan, then for
|
||||
Geneve and GRE. To support VF tunnel, a temporary mlx5_flow_spec is
|
||||
used to parse tunnel options. For Geneve, if there is TLV option, a
|
||||
object is created, or refcnt is added if already exists. But the
|
||||
temporary mlx5_flow_spec is directly freed after parsing, which causes
|
||||
the leak because no information regarding the object is saved in
|
||||
flow's mlx5_flow_spec, which is used to free the object when deleting
|
||||
the flow.
|
||||
|
||||
To fix the leak, call mlx5_geneve_tlv_option_del() before free the
|
||||
temporary spec if it has TLV object.
|
||||
|
||||
Fixes: 521933cdc4aa ("net/mlx5e: Support Geneve and GRE with VF tunnel offload")
|
||||
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Alex Lazar <alazar@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610151514.1094735-9-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
|
||||
index f1d908f61134..fef418e1ed1a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
|
||||
@@ -2028,9 +2028,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
|
||||
return err;
|
||||
}
|
||||
|
||||
-static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
|
||||
+static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec)
|
||||
{
|
||||
- struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
|
||||
void *headers_v = MLX5_ADDR_OF(fte_match_param,
|
||||
spec->match_value,
|
||||
misc_parameters_3);
|
||||
@@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
|
||||
}
|
||||
complete_all(&flow->del_hw_done);
|
||||
|
||||
- if (mlx5_flow_has_geneve_opt(flow))
|
||||
+ if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec))
|
||||
mlx5_geneve_tlv_option_del(priv->mdev->geneve);
|
||||
|
||||
if (flow->decap_route)
|
||||
@@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
|
||||
|
||||
err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
|
||||
if (err) {
|
||||
- kvfree(tmp_spec);
|
||||
NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
|
||||
netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
|
||||
- return err;
|
||||
+ } else {
|
||||
+ err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
|
||||
}
|
||||
- err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
|
||||
+ if (mlx5_flow_has_geneve_opt(tmp_spec))
|
||||
+ mlx5_geneve_tlv_option_del(priv->mdev->geneve);
|
||||
kvfree(tmp_spec);
|
||||
if (err)
|
||||
return err;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,78 @@
|
||||
From c4f4fb210193420862f49e61b6a6865512ee9636 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:10 -0400
|
||||
Subject: [PATCH] net/mlx5: HWS, Add error checking to
|
||||
hws_bwc_rule_complex_hash_node_get()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 1619bdf4389c829f16af5c7d5b4fa5f1673614d7
|
||||
Author: Dan Carpenter <dan.carpenter@linaro.org>
|
||||
Date: Wed Jun 11 16:14:32 2025 +0300
|
||||
|
||||
net/mlx5: HWS, Add error checking to hws_bwc_rule_complex_hash_node_get()
|
||||
|
||||
Check for if ida_alloc() or rhashtable_lookup_get_insert_fast() fails.
|
||||
|
||||
Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers")
|
||||
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
|
||||
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
||||
Link: https://patch.msgid.link/aEmBONjyiF6z5yCV@stanley.mountain
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
index 70768953a4f6..ca7501c57468 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
||||
@@ -1070,7 +1070,7 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
|
||||
struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node;
|
||||
struct rhashtable *refcount_hash;
|
||||
- int i;
|
||||
+ int ret, i;
|
||||
|
||||
bwc_rule->complex_hash_node = NULL;
|
||||
|
||||
@@ -1078,7 +1078,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
if (unlikely(!node))
|
||||
return -ENOMEM;
|
||||
|
||||
- node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL);
|
||||
+ ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL);
|
||||
+ if (ret < 0)
|
||||
+ goto err_free_node;
|
||||
+ node->tag = ret;
|
||||
+
|
||||
refcount_set(&node->refcount, 1);
|
||||
|
||||
/* Clear match buffer - turn off all the unrelated fields
|
||||
@@ -1094,6 +1098,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
old_node = rhashtable_lookup_get_insert_fast(refcount_hash,
|
||||
&node->hash_node,
|
||||
hws_refcount_hash);
|
||||
+ if (IS_ERR(old_node)) {
|
||||
+ ret = PTR_ERR(old_node);
|
||||
+ goto err_free_ida;
|
||||
+ }
|
||||
+
|
||||
if (old_node) {
|
||||
/* Rule with the same tag already exists - update refcount */
|
||||
refcount_inc(&old_node->refcount);
|
||||
@@ -1112,6 +1121,12 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule,
|
||||
|
||||
bwc_rule->complex_hash_node = node;
|
||||
return 0;
|
||||
+
|
||||
+err_free_ida:
|
||||
+ ida_free(&bwc_matcher->complex->metadata_ida, node->tag);
|
||||
+err_free_node:
|
||||
+ kfree(node);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,101 @@
|
||||
From ad54bc890c4892fa93557d437d96dd787a30b98b Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:11 -0400
|
||||
Subject: [PATCH] net/mlx5e: Fix race between DIM disable and net_dim()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit eb41a264a3a576dc040ee37c3d9d6b7e2d9be968
|
||||
Author: Carolina Jubran <cjubran@nvidia.com>
|
||||
Date: Thu Jul 10 16:53:43 2025 +0300
|
||||
|
||||
net/mlx5e: Fix race between DIM disable and net_dim()
|
||||
|
||||
There's a race between disabling DIM and NAPI callbacks using the dim
|
||||
pointer on the RQ or SQ.
|
||||
|
||||
If NAPI checks the DIM state bit and sees it still set, it assumes
|
||||
`rq->dim` or `sq->dim` is valid. But if DIM gets disabled right after
|
||||
that check, the pointer might already be set to NULL, leading to a NULL
|
||||
pointer dereference in net_dim().
|
||||
|
||||
Fix this by calling `synchronize_net()` before freeing the DIM context.
|
||||
This ensures all in-progress NAPI callbacks are finished before the
|
||||
pointer is cleared.
|
||||
|
||||
Kernel log:
|
||||
|
||||
BUG: kernel NULL pointer dereference, address: 0000000000000000
|
||||
...
|
||||
RIP: 0010:net_dim+0x23/0x190
|
||||
...
|
||||
Call Trace:
|
||||
<TASK>
|
||||
? __die+0x20/0x60
|
||||
? page_fault_oops+0x150/0x3e0
|
||||
? common_interrupt+0xf/0xa0
|
||||
? sysvec_call_function_single+0xb/0x90
|
||||
? exc_page_fault+0x74/0x130
|
||||
? asm_exc_page_fault+0x22/0x30
|
||||
? net_dim+0x23/0x190
|
||||
? mlx5e_poll_ico_cq+0x41/0x6f0 [mlx5_core]
|
||||
? sysvec_apic_timer_interrupt+0xb/0x90
|
||||
mlx5e_handle_rx_dim+0x92/0xd0 [mlx5_core]
|
||||
mlx5e_napi_poll+0x2cd/0xac0 [mlx5_core]
|
||||
? mlx5e_poll_ico_cq+0xe5/0x6f0 [mlx5_core]
|
||||
busy_poll_stop+0xa2/0x200
|
||||
? mlx5e_napi_poll+0x1d9/0xac0 [mlx5_core]
|
||||
? mlx5e_trigger_irq+0x130/0x130 [mlx5_core]
|
||||
__napi_busy_loop+0x345/0x3b0
|
||||
? sysvec_call_function_single+0xb/0x90
|
||||
? asm_sysvec_call_function_single+0x16/0x20
|
||||
? sysvec_apic_timer_interrupt+0xb/0x90
|
||||
? pcpu_free_area+0x1e4/0x2e0
|
||||
napi_busy_loop+0x11/0x20
|
||||
xsk_recvmsg+0x10c/0x130
|
||||
sock_recvmsg+0x44/0x70
|
||||
__sys_recvfrom+0xbc/0x130
|
||||
? __schedule+0x398/0x890
|
||||
__x64_sys_recvfrom+0x20/0x30
|
||||
do_syscall_64+0x4c/0x100
|
||||
entry_SYSCALL_64_after_hwframe+0x4b/0x53
|
||||
...
|
||||
---[ end trace 0000000000000000 ]---
|
||||
...
|
||||
---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
|
||||
|
||||
Fixes: 445a25f6e1a2 ("net/mlx5e: Support updating coalescing configuration without resetting channels")
|
||||
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
|
||||
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
|
||||
Link: https://patch.msgid.link/1752155624-24095-3-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
|
||||
index 298bb74ec5e9..d1d629697e28 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
|
||||
@@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable)
|
||||
__set_bit(MLX5E_RQ_STATE_DIM, &rq->state);
|
||||
} else {
|
||||
__clear_bit(MLX5E_RQ_STATE_DIM, &rq->state);
|
||||
-
|
||||
+ synchronize_net();
|
||||
mlx5e_dim_disable(rq->dim);
|
||||
rq->dim = NULL;
|
||||
}
|
||||
@@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable)
|
||||
__set_bit(MLX5E_SQ_STATE_DIM, &sq->state);
|
||||
} else {
|
||||
__clear_bit(MLX5E_SQ_STATE_DIM, &sq->state);
|
||||
-
|
||||
+ synchronize_net();
|
||||
mlx5e_dim_disable(sq->dim);
|
||||
sq->dim = NULL;
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
116
SOURCES/1392-net-mlx5e-add-new-prio-for-promiscuous-mode.patch
Normal file
116
SOURCES/1392-net-mlx5e-add-new-prio-for-promiscuous-mode.patch
Normal file
@ -0,0 +1,116 @@
|
||||
From bd88819cc929d1193d943400f4977a8f573b5d15 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:11 -0400
|
||||
Subject: [PATCH] net/mlx5e: Add new prio for promiscuous mode
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 4c9fce56fa702059bbc5ab737265b68f79cbaac4
|
||||
Author: Jianbo Liu <jianbol@nvidia.com>
|
||||
Date: Thu Jul 10 16:53:44 2025 +0300
|
||||
|
||||
net/mlx5e: Add new prio for promiscuous mode
|
||||
|
||||
An optimization for promiscuous mode adds a high-priority steering
|
||||
table with a single catch-all rule to steer all traffic directly to
|
||||
the TTC table.
|
||||
|
||||
However, a gap exists between the creation of this table and the
|
||||
insertion of the catch-all rule. Packets arriving in this brief window
|
||||
would miss as no rule was inserted yet, unnecessarily incrementing the
|
||||
'rx_steer_missed_packets' counter and dropped.
|
||||
|
||||
This patch resolves the issue by introducing a new prio for this
|
||||
table, placing it between MLX5E_TC_PRIO and MLX5E_NIC_PRIO. By doing
|
||||
so, packets arriving during the window now fall through to the next
|
||||
prio (at MLX5E_NIC_PRIO) instead of being dropped.
|
||||
|
||||
Fixes: 1c46d7409f30 ("net/mlx5e: Optimize promiscuous mode")
|
||||
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
|
||||
Link: https://patch.msgid.link/1752155624-24095-4-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
|
||||
index b5c3a2a9d2a5..9560fcba643f 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
|
||||
@@ -18,7 +18,8 @@ enum {
|
||||
|
||||
enum {
|
||||
MLX5E_TC_PRIO = 0,
|
||||
- MLX5E_NIC_PRIO
|
||||
+ MLX5E_PROMISC_PRIO,
|
||||
+ MLX5E_NIC_PRIO,
|
||||
};
|
||||
|
||||
struct mlx5e_flow_table {
|
||||
@@ -68,9 +69,13 @@ struct mlx5e_l2_table {
|
||||
MLX5_HASH_FIELD_SEL_DST_IP |\
|
||||
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
|
||||
|
||||
-/* NIC prio FTS */
|
||||
+/* NIC promisc FT level */
|
||||
enum {
|
||||
MLX5E_PROMISC_FT_LEVEL,
|
||||
+};
|
||||
+
|
||||
+/* NIC prio FTS */
|
||||
+enum {
|
||||
MLX5E_VLAN_FT_LEVEL,
|
||||
MLX5E_L2_FT_LEVEL,
|
||||
MLX5E_TTC_FT_LEVEL,
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
|
||||
index 05058710d2c7..537e732085b2 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
|
||||
@@ -776,7 +776,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs)
|
||||
ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE;
|
||||
ft_attr.autogroup.max_num_groups = 1;
|
||||
ft_attr.level = MLX5E_PROMISC_FT_LEVEL;
|
||||
- ft_attr.prio = MLX5E_NIC_PRIO;
|
||||
+ ft_attr.prio = MLX5E_PROMISC_PRIO;
|
||||
|
||||
ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr);
|
||||
if (IS_ERR(ft->t)) {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
index 5f0f546fa126..b29e67466701 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
@@ -113,13 +113,16 @@
|
||||
#define ETHTOOL_PRIO_NUM_LEVELS 1
|
||||
#define ETHTOOL_NUM_PRIOS 11
|
||||
#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
|
||||
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy,
|
||||
+/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy,
|
||||
* {IPsec RoCE MPV,Alias table},IPsec RoCE policy
|
||||
*/
|
||||
-#define KERNEL_NIC_PRIO_NUM_LEVELS 11
|
||||
+#define KERNEL_NIC_PRIO_NUM_LEVELS 10
|
||||
#define KERNEL_NIC_NUM_PRIOS 1
|
||||
-/* One more level for tc */
|
||||
-#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
|
||||
+/* One more level for tc, and one more for promisc */
|
||||
+#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2)
|
||||
+
|
||||
+#define KERNEL_NIC_PROMISC_NUM_PRIOS 1
|
||||
+#define KERNEL_NIC_PROMISC_NUM_LEVELS 1
|
||||
|
||||
#define KERNEL_NIC_TC_NUM_PRIOS 1
|
||||
#define KERNEL_NIC_TC_NUM_LEVELS 3
|
||||
@@ -187,6 +190,8 @@ static struct init_tree_node {
|
||||
ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
|
||||
ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS,
|
||||
KERNEL_NIC_TC_NUM_LEVELS),
|
||||
+ ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS,
|
||||
+ KERNEL_NIC_PROMISC_NUM_LEVELS),
|
||||
ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS,
|
||||
KERNEL_NIC_PRIO_NUM_LEVELS))),
|
||||
ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,86 @@
|
||||
From 07084a10593c81a6071d6f045927f1c2f52ac5b3 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:11 -0400
|
||||
Subject: [PATCH] net/mlx5: Correctly set gso_size when LRO is used
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 531d0d32de3e1b6b77a87bd37de0c2c6e17b496a
|
||||
Author: Christoph Paasch <cpaasch@openai.com>
|
||||
Date: Tue Jul 15 13:20:53 2025 -0700
|
||||
|
||||
net/mlx5: Correctly set gso_size when LRO is used
|
||||
|
||||
gso_size is expected by the networking stack to be the size of the
|
||||
payload (thus, not including ethernet/IP/TCP-headers). However, cqe_bcnt
|
||||
is the full sized frame (including the headers). Dividing cqe_bcnt by
|
||||
lro_num_seg will then give incorrect results.
|
||||
|
||||
For example, running a bpftrace higher up in the TCP-stack
|
||||
(tcp_event_data_recv), we commonly have gso_size set to 1450 or 1451 even
|
||||
though in reality the payload was only 1448 bytes.
|
||||
|
||||
This can have unintended consequences:
|
||||
- In tcp_measure_rcv_mss() len will be for example 1450, but. rcv_mss
|
||||
will be 1448 (because tp->advmss is 1448). Thus, we will always
|
||||
recompute scaling_ratio each time an LRO-packet is received.
|
||||
- In tcp_gro_receive(), it will interfere with the decision whether or
|
||||
not to flush and thus potentially result in less gro'ed packets.
|
||||
|
||||
So, we need to discount the protocol headers from cqe_bcnt so we can
|
||||
actually divide the payload by lro_num_seg to get the real gso_size.
|
||||
|
||||
v2:
|
||||
- Use "(unsigned char *)tcp + tcp->doff * 4 - skb->data)" to compute header-len
|
||||
(Tariq Toukan <tariqt@nvidia.com>)
|
||||
- Improve commit-message (Gal Pressman <gal@nvidia.com>)
|
||||
|
||||
Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
|
||||
Signed-off-by: Christoph Paasch <cpaasch@openai.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Gal Pressman <gal@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250715-cpaasch-pf-925-investigate-incorrect-gso_size-on-cx-7-nic-v2-1-e06c3475f3ac@openai.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
index 12ca0a3e8514..382679838113 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
@@ -1156,8 +1156,9 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
|
||||
}
|
||||
}
|
||||
|
||||
-static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
|
||||
- u32 cqe_bcnt)
|
||||
+static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb,
|
||||
+ struct mlx5_cqe64 *cqe,
|
||||
+ u32 cqe_bcnt)
|
||||
{
|
||||
struct ethhdr *eth = (struct ethhdr *)(skb->data);
|
||||
struct tcphdr *tcp;
|
||||
@@ -1207,6 +1208,8 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
|
||||
tcp->check = tcp_v6_check(payload_len, &ipv6->saddr,
|
||||
&ipv6->daddr, check);
|
||||
}
|
||||
+
|
||||
+ return (unsigned int)((unsigned char *)tcp + tcp->doff * 4 - skb->data);
|
||||
}
|
||||
|
||||
static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index)
|
||||
@@ -1563,8 +1566,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
|
||||
mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
|
||||
|
||||
if (lro_num_seg > 1) {
|
||||
- mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
|
||||
- skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
|
||||
+ unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
|
||||
+
|
||||
+ skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg);
|
||||
/* Subtract one since we already counted this as one
|
||||
* "regular" packet in mlx5e_complete_rx_cqe()
|
||||
*/
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
49
SOURCES/1394-net-mlx5-fix-memory-leak-in-cmd-exec.patch
Normal file
49
SOURCES/1394-net-mlx5-fix-memory-leak-in-cmd-exec.patch
Normal file
@ -0,0 +1,49 @@
|
||||
From 0e6b8d1b57695231dc16f426ca329c7fc415ff43 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:12 -0400
|
||||
Subject: [PATCH] net/mlx5: Fix memory leak in cmd_exec()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 3afa3ae3db52e3c216d77bd5907a5a86833806cc
|
||||
Author: Chiara Meiohas <cmeiohas@nvidia.com>
|
||||
Date: Thu Jul 17 15:06:09 2025 +0300
|
||||
|
||||
net/mlx5: Fix memory leak in cmd_exec()
|
||||
|
||||
If cmd_exec() is called with callback and mlx5_cmd_invoke() returns an
|
||||
error, resources allocated in cmd_exec() will not be freed.
|
||||
|
||||
Fix the code to release the resources if mlx5_cmd_invoke() returns an
|
||||
error.
|
||||
|
||||
Fixes: f086470122d5 ("net/mlx5: cmdif, Return value improvements")
|
||||
Reported-by: Alex Tereshkin <atereshkin@nvidia.com>
|
||||
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
|
||||
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
|
||||
Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Simon Horman <horms@kernel.org>
|
||||
Link: https://patch.msgid.link/1752753970-261832-2-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
index b1aeea7c4a91..e395ef5f356e 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
|
||||
@@ -1947,8 +1947,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
|
||||
|
||||
err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
|
||||
pages_queue, token, force_polling);
|
||||
- if (callback)
|
||||
- return err;
|
||||
+ if (callback && !err)
|
||||
+ return 0;
|
||||
|
||||
if (err > 0) /* Failed in FW, command didn't execute */
|
||||
err = deliv_status_to_err(err);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,247 @@
|
||||
From 52b9ece6153bacd7511923119dae3562495686df Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:42:12 -0400
|
||||
Subject: [PATCH] net/mlx5: E-Switch, Fix peer miss rules to use peer eswitch
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 5b4c56ad4da0aa00b258ab50b1f5775b7d3108c7
|
||||
Author: Shahar Shitrit <shshitrit@nvidia.com>
|
||||
Date: Thu Jul 17 15:06:10 2025 +0300
|
||||
|
||||
net/mlx5: E-Switch, Fix peer miss rules to use peer eswitch
|
||||
|
||||
In the original design, it is assumed local and peer eswitches have the
|
||||
same number of vfs. However, in new firmware, local and peer eswitches
|
||||
can have different number of vfs configured by mlxconfig. In such
|
||||
configuration, it is incorrect to derive the number of vfs from the
|
||||
local device's eswitch.
|
||||
|
||||
Fix this by updating the peer miss rules add and delete functions to use
|
||||
the peer device's eswitch and vf count instead of the local device's
|
||||
information, ensuring correct behavior regardless of vf configuration
|
||||
differences.
|
||||
|
||||
Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules")
|
||||
Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Reviewed-by: Simon Horman <horms@kernel.org>
|
||||
Link: https://patch.msgid.link/1752753970-261832-3-git-send-email-tariqt@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
||||
index 0e3a977d5332..bee906661282 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
||||
@@ -1182,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
|
||||
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
struct mlx5_core_dev *peer_dev)
|
||||
{
|
||||
+ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
|
||||
struct mlx5_flow_destination dest = {};
|
||||
struct mlx5_flow_act flow_act = {0};
|
||||
struct mlx5_flow_handle **flows;
|
||||
- /* total vports is the same for both e-switches */
|
||||
- int nvports = esw->total_vports;
|
||||
struct mlx5_flow_handle *flow;
|
||||
+ struct mlx5_vport *peer_vport;
|
||||
struct mlx5_flow_spec *spec;
|
||||
- struct mlx5_vport *vport;
|
||||
int err, pfindex;
|
||||
unsigned long i;
|
||||
void *misc;
|
||||
|
||||
- if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev))
|
||||
+ if (!MLX5_VPORT_MANAGER(peer_dev) &&
|
||||
+ !mlx5_core_is_ecpf_esw_manager(peer_dev))
|
||||
return 0;
|
||||
|
||||
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
|
||||
@@ -1203,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
|
||||
peer_miss_rules_setup(esw, peer_dev, spec, &dest);
|
||||
|
||||
- flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL);
|
||||
+ flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL);
|
||||
if (!flows) {
|
||||
err = -ENOMEM;
|
||||
goto alloc_flows_err;
|
||||
@@ -1213,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
|
||||
misc_parameters);
|
||||
|
||||
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
|
||||
- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
|
||||
- spec, MLX5_VPORT_PF);
|
||||
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
|
||||
+ esw_set_peer_miss_rule_source_port(esw, peer_esw, spec,
|
||||
+ MLX5_VPORT_PF);
|
||||
|
||||
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
|
||||
spec, &flow_act, &dest, 1);
|
||||
@@ -1224,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
err = PTR_ERR(flow);
|
||||
goto add_pf_flow_err;
|
||||
}
|
||||
- flows[vport->index] = flow;
|
||||
+ flows[peer_vport->index] = flow;
|
||||
}
|
||||
|
||||
- if (mlx5_ecpf_vport_exists(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
|
||||
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
|
||||
MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
|
||||
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
|
||||
spec, &flow_act, &dest, 1);
|
||||
@@ -1236,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
err = PTR_ERR(flow);
|
||||
goto add_ecpf_flow_err;
|
||||
}
|
||||
- flows[vport->index] = flow;
|
||||
+ flows[peer_vport->index] = flow;
|
||||
}
|
||||
|
||||
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
|
||||
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_vfs(peer_dev)) {
|
||||
esw_set_peer_miss_rule_source_port(esw,
|
||||
- peer_dev->priv.eswitch,
|
||||
- spec, vport->vport);
|
||||
+ peer_esw,
|
||||
+ spec, peer_vport->vport);
|
||||
|
||||
flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw),
|
||||
spec, &flow_act, &dest, 1);
|
||||
@@ -1250,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
err = PTR_ERR(flow);
|
||||
goto add_vf_flow_err;
|
||||
}
|
||||
- flows[vport->index] = flow;
|
||||
+ flows[peer_vport->index] = flow;
|
||||
}
|
||||
|
||||
- if (mlx5_core_ec_sriov_enabled(esw->dev)) {
|
||||
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
|
||||
- if (i >= mlx5_core_max_ec_vfs(peer_dev))
|
||||
- break;
|
||||
- esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
|
||||
- spec, vport->vport);
|
||||
+ if (mlx5_core_ec_sriov_enabled(peer_dev)) {
|
||||
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_ec_vfs(peer_dev)) {
|
||||
+ esw_set_peer_miss_rule_source_port(esw, peer_esw,
|
||||
+ spec,
|
||||
+ peer_vport->vport);
|
||||
flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
|
||||
spec, &flow_act, &dest, 1);
|
||||
if (IS_ERR(flow)) {
|
||||
err = PTR_ERR(flow);
|
||||
goto add_ec_vf_flow_err;
|
||||
}
|
||||
- flows[vport->index] = flow;
|
||||
+ flows[peer_vport->index] = flow;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1282,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
return 0;
|
||||
|
||||
add_ec_vf_flow_err:
|
||||
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
|
||||
- if (!flows[vport->index])
|
||||
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_ec_vfs(peer_dev)) {
|
||||
+ if (!flows[peer_vport->index])
|
||||
continue;
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
add_vf_flow_err:
|
||||
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
|
||||
- if (!flows[vport->index])
|
||||
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_vfs(peer_dev)) {
|
||||
+ if (!flows[peer_vport->index])
|
||||
continue;
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
- if (mlx5_ecpf_vport_exists(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
add_ecpf_flow_err:
|
||||
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
add_pf_flow_err:
|
||||
esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
|
||||
@@ -1313,37 +1316,34 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
|
||||
struct mlx5_core_dev *peer_dev)
|
||||
{
|
||||
+ struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch;
|
||||
u16 peer_index = mlx5_get_dev_index(peer_dev);
|
||||
struct mlx5_flow_handle **flows;
|
||||
- struct mlx5_vport *vport;
|
||||
+ struct mlx5_vport *peer_vport;
|
||||
unsigned long i;
|
||||
|
||||
flows = esw->fdb_table.offloads.peer_miss_rules[peer_index];
|
||||
if (!flows)
|
||||
return;
|
||||
|
||||
- if (mlx5_core_ec_sriov_enabled(esw->dev)) {
|
||||
- mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) {
|
||||
- /* The flow for a particular vport could be NULL if the other ECPF
|
||||
- * has fewer or no VFs enabled
|
||||
- */
|
||||
- if (!flows[vport->index])
|
||||
- continue;
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
- }
|
||||
+ if (mlx5_core_ec_sriov_enabled(peer_dev)) {
|
||||
+ mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_ec_vfs(peer_dev))
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
|
||||
- mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport,
|
||||
+ mlx5_core_max_vfs(peer_dev))
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
|
||||
- if (mlx5_ecpf_vport_exists(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ if (mlx5_ecpf_vport_exists(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
|
||||
- if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
|
||||
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
|
||||
- mlx5_del_flow_rules(flows[vport->index]);
|
||||
+ if (mlx5_core_is_ecpf_esw_manager(peer_dev)) {
|
||||
+ peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF);
|
||||
+ mlx5_del_flow_rules(flows[peer_vport->index]);
|
||||
}
|
||||
|
||||
kvfree(flows);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
From dc192079bc2276b1a7c8163940448e003bf856c7 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:25 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: convert timeouts to secs_to_jiffies()
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 16b82367aa28bd31795e720548421b58824108e1
|
||||
Author: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
|
||||
Date: Wed Feb 19 21:36:40 2025 +0000
|
||||
|
||||
RDMA/mlx5: convert timeouts to secs_to_jiffies()
|
||||
|
||||
Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced
|
||||
secs_to_jiffies(). As the value here is a multiple of 1000, use
|
||||
secs_to_jiffies() instead of msecs_to_jiffies to avoid the multiplication.
|
||||
|
||||
This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with
|
||||
the following Coccinelle rules:
|
||||
|
||||
@depends on patch@
|
||||
expression E;
|
||||
@@
|
||||
|
||||
-msecs_to_jiffies(E * 1000)
|
||||
+secs_to_jiffies(E)
|
||||
|
||||
-msecs_to_jiffies(E * MSEC_PER_SEC)
|
||||
+secs_to_jiffies(E)
|
||||
|
||||
Link: https://patch.msgid.link/r/20250219-rdma-secs-to-jiffies-v1-2-b506746561a9@linux.microsoft.com
|
||||
Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
|
||||
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
|
||||
index 247f7248a0c0..5a7b234bdfd9 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/mr.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/mr.c
|
||||
@@ -525,7 +525,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
|
||||
ent->fill_to_high_water = false;
|
||||
if (ent->pending)
|
||||
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
|
||||
- msecs_to_jiffies(1000));
|
||||
+ secs_to_jiffies(1));
|
||||
else
|
||||
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
|
||||
}
|
||||
@@ -576,7 +576,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
|
||||
"add keys command failed, err %d\n",
|
||||
err);
|
||||
queue_delayed_work(cache->wq, &ent->dwork,
|
||||
- msecs_to_jiffies(1000));
|
||||
+ secs_to_jiffies(1));
|
||||
}
|
||||
}
|
||||
} else if (ent->mkeys_queue.ci > 2 * ent->limit) {
|
||||
@@ -2080,7 +2080,7 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
|
||||
ent->in_use--;
|
||||
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
|
||||
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
|
||||
- msecs_to_jiffies(30 * 1000));
|
||||
+ secs_to_jiffies(30));
|
||||
ent->tmp_cleanup_scheduled = true;
|
||||
}
|
||||
spin_unlock_irq(&ent->mkeys_queue.lock);
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,103 @@
|
||||
From a146abefb34ccd688e9923429cd51b9b7b1a3442 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Remove the redundant MLX5_IB_STAGE_UAR stage
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 972db388d40ded1a5ef8ce09d92ef1e2b9e40f07
|
||||
Author: Yishai Hadas <yishaih@nvidia.com>
|
||||
Date: Tue May 13 14:02:40 2025 +0300
|
||||
|
||||
RDMA/mlx5: Remove the redundant MLX5_IB_STAGE_UAR stage
|
||||
|
||||
The MLX5_IB_STAGE_UAR stage in the RDMA driver is redundant and should
|
||||
be removed.
|
||||
|
||||
Responsibility for initializing the device's UAR pointer
|
||||
(mdev->priv.uar) lies with mlx5_core, which already sets it during the
|
||||
mlx5_load() process.
|
||||
|
||||
At present, the RDMA UAR stage overwrites this pointer, which was
|
||||
correctly initialized by mlx5_core, creating the risk of inconsistency.
|
||||
|
||||
Ownership and management of the UAR pointer should remain exclusively
|
||||
within mlx5_core.
|
||||
|
||||
In the current upstream code, we luckily receive the same pointer, since
|
||||
mlx5_get_uars_page() still finds available BF registers for that UAR,
|
||||
allowing it to be shared.
|
||||
|
||||
However, future changes in mlx5_core may expose this flaw.
|
||||
For instance, if mlx5_alloc_bfreg() is invoked twice before the RDMA UAR
|
||||
stage runs, the RDMA driver may overwrite the UAR allocated by
|
||||
mlx5_core.
|
||||
|
||||
This could lead to real bugs. For example, if mlx5_ib is unloaded
|
||||
(rmmod), it might free the UAR, leaving mlx5_core with a dangling
|
||||
reference to an invalid UAR.
|
||||
|
||||
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
|
||||
Reviewed-by: Fan Li <fanl@nvidia.com>
|
||||
Link: https://patch.msgid.link/feaa84ec6f20468b4935c439923e9266122a93d0.1747134130.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
|
||||
index 64f1e0fafd46..6a75c5a2f6c8 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/main.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/main.c
|
||||
@@ -4422,17 +4422,6 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
|
||||
mlx5_core_native_port_num(dev->mdev) - 1);
|
||||
}
|
||||
|
||||
-static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
|
||||
-{
|
||||
- dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
|
||||
- return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
|
||||
-}
|
||||
-
|
||||
-static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
|
||||
-{
|
||||
- mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
|
||||
-}
|
||||
-
|
||||
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
|
||||
{
|
||||
int err;
|
||||
@@ -4661,9 +4650,6 @@ static const struct mlx5_ib_profile pf_profile = {
|
||||
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
mlx5_ib_stage_cong_debugfs_init,
|
||||
mlx5_ib_stage_cong_debugfs_cleanup),
|
||||
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
|
||||
- mlx5_ib_stage_uar_init,
|
||||
- mlx5_ib_stage_uar_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
|
||||
mlx5_ib_stage_bfrag_init,
|
||||
mlx5_ib_stage_bfrag_cleanup),
|
||||
@@ -4721,9 +4707,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
|
||||
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
mlx5_ib_stage_cong_debugfs_init,
|
||||
mlx5_ib_stage_cong_debugfs_cleanup),
|
||||
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
|
||||
- mlx5_ib_stage_uar_init,
|
||||
- mlx5_ib_stage_uar_cleanup),
|
||||
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
|
||||
mlx5_ib_stage_bfrag_init,
|
||||
mlx5_ib_stage_bfrag_cleanup),
|
||||
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
|
||||
index c84ef94bb9fc..54ca6e010bd4 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
|
||||
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
|
||||
@@ -1002,7 +1002,6 @@ enum mlx5_ib_stages {
|
||||
MLX5_IB_STAGE_ODP,
|
||||
MLX5_IB_STAGE_COUNTERS,
|
||||
MLX5_IB_STAGE_CONG_DEBUGFS,
|
||||
- MLX5_IB_STAGE_UAR,
|
||||
MLX5_IB_STAGE_BFREG,
|
||||
MLX5_IB_STAGE_PRE_IB_REG_UMR,
|
||||
MLX5_IB_STAGE_WHITELIST_UID,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
From 1e3839f66a3dc069dc1f0b26277180792852653c Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Add support for 200Gbps per lane speeds
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit d00d16bcbc2553a3ac9acccf2d6444cda5502adf
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Tue May 13 14:03:41 2025 +0300
|
||||
|
||||
RDMA/mlx5: Add support for 200Gbps per lane speeds
|
||||
|
||||
Add support for 200Gbps per lane speeds speed when querying PTYS and
|
||||
report it back correctly when needed.
|
||||
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
|
||||
Link: https://patch.msgid.link/b842d2f523e9b82e221378c444ebd5860d612959.1747134197.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
|
||||
index 6a75c5a2f6c8..d5f44c83b667 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/main.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/main.c
|
||||
@@ -485,6 +485,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
|
||||
*active_width = IB_WIDTH_2X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
|
||||
+ *active_width = IB_WIDTH_1X;
|
||||
+ *active_speed = IB_SPEED_XDR;
|
||||
+ break;
|
||||
case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
|
||||
*active_width = IB_WIDTH_8X;
|
||||
*active_speed = IB_SPEED_HDR;
|
||||
@@ -493,10 +497,18 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
|
||||
*active_width = IB_WIDTH_4X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
|
||||
+ *active_width = IB_WIDTH_2X;
|
||||
+ *active_speed = IB_SPEED_XDR;
|
||||
+ break;
|
||||
case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
|
||||
*active_width = IB_WIDTH_8X;
|
||||
*active_speed = IB_SPEED_NDR;
|
||||
break;
|
||||
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
|
||||
+ *active_width = IB_WIDTH_4X;
|
||||
+ *active_speed = IB_SPEED_XDR;
|
||||
+ break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
109
SOURCES/1399-rdma-mlx5-avoid-flexible-array-warning.patch
Normal file
109
SOURCES/1399-rdma-mlx5-avoid-flexible-array-warning.patch
Normal file
@ -0,0 +1,109 @@
|
||||
From 08a08451012e72442d1813506229d85d3f785535 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Avoid flexible array warning
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit e91fb8b9d0edec86a1ef26490bc80af96210863d
|
||||
Author: Leon Romanovsky <leon@kernel.org>
|
||||
Date: Wed May 21 14:34:58 2025 +0300
|
||||
|
||||
RDMA/mlx5: Avoid flexible array warning
|
||||
|
||||
The following warning is reported by sparse tool:
|
||||
drivers/infiniband/hw/mlx5/fs.c:1664:26: warning: array of flexible
|
||||
structures
|
||||
|
||||
Avoid it by simply splitting array into two separate structs.
|
||||
|
||||
Link: https://patch.msgid.link/7b891b96a9fc053d01284c184d25ae98d35db2d4.1747827041.git.leon@kernel.org
|
||||
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
|
||||
index 0ff9f18a71e8..680627f1de33 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/fs.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/fs.c
|
||||
@@ -1645,11 +1645,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
|
||||
return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
|
||||
}
|
||||
|
||||
-enum {
|
||||
- LEFTOVERS_MC,
|
||||
- LEFTOVERS_UC,
|
||||
-};
|
||||
-
|
||||
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_ib_flow_prio *ft_prio,
|
||||
struct ib_flow_attr *flow_attr,
|
||||
@@ -1659,43 +1654,32 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
|
||||
struct mlx5_ib_flow_handler *handler = NULL;
|
||||
|
||||
static struct {
|
||||
- struct ib_flow_attr flow_attr;
|
||||
struct ib_flow_spec_eth eth_flow;
|
||||
- } leftovers_specs[] = {
|
||||
- [LEFTOVERS_MC] = {
|
||||
- .flow_attr = {
|
||||
- .num_of_specs = 1,
|
||||
- .size = sizeof(leftovers_specs[0])
|
||||
- },
|
||||
- .eth_flow = {
|
||||
- .type = IB_FLOW_SPEC_ETH,
|
||||
- .size = sizeof(struct ib_flow_spec_eth),
|
||||
- .mask = {.dst_mac = {0x1} },
|
||||
- .val = {.dst_mac = {0x1} }
|
||||
- }
|
||||
- },
|
||||
- [LEFTOVERS_UC] = {
|
||||
- .flow_attr = {
|
||||
- .num_of_specs = 1,
|
||||
- .size = sizeof(leftovers_specs[0])
|
||||
- },
|
||||
- .eth_flow = {
|
||||
- .type = IB_FLOW_SPEC_ETH,
|
||||
- .size = sizeof(struct ib_flow_spec_eth),
|
||||
- .mask = {.dst_mac = {0x1} },
|
||||
- .val = {.dst_mac = {} }
|
||||
- }
|
||||
- }
|
||||
- };
|
||||
+ struct ib_flow_attr flow_attr;
|
||||
+ } leftovers_wc = { .flow_attr = { .num_of_specs = 1,
|
||||
+ .size = sizeof(leftovers_wc) },
|
||||
+ .eth_flow = {
|
||||
+ .type = IB_FLOW_SPEC_ETH,
|
||||
+ .size = sizeof(struct ib_flow_spec_eth),
|
||||
+ .mask = { .dst_mac = { 0x1 } },
|
||||
+ .val = { .dst_mac = { 0x1 } } } };
|
||||
|
||||
- handler = create_flow_rule(dev, ft_prio,
|
||||
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
|
||||
- dst);
|
||||
+ static struct {
|
||||
+ struct ib_flow_spec_eth eth_flow;
|
||||
+ struct ib_flow_attr flow_attr;
|
||||
+ } leftovers_uc = { .flow_attr = { .num_of_specs = 1,
|
||||
+ .size = sizeof(leftovers_uc) },
|
||||
+ .eth_flow = {
|
||||
+ .type = IB_FLOW_SPEC_ETH,
|
||||
+ .size = sizeof(struct ib_flow_spec_eth),
|
||||
+ .mask = { .dst_mac = { 0x1 } },
|
||||
+ .val = { .dst_mac = {} } } };
|
||||
+
|
||||
+ handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
|
||||
if (!IS_ERR(handler) &&
|
||||
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
|
||||
handler_ucast = create_flow_rule(dev, ft_prio,
|
||||
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
|
||||
- dst);
|
||||
+ &leftovers_uc.flow_attr, dst);
|
||||
if (IS_ERR(handler_ucast)) {
|
||||
mlx5_del_flow_rules(handler->rule);
|
||||
ft_prio->refcount--;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,103 @@
|
||||
From 06a22c7cc0922e8dbc79ec9dad2cec1493943213 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Initialize obj_event->obj_sub_list before
|
||||
xa_insert
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 8edab8a72d67742f87e9dc2e2b0cdfddda5dc29a
|
||||
Author: Mark Zhang <markzhang@nvidia.com>
|
||||
Date: Tue Jun 17 11:13:55 2025 +0300
|
||||
|
||||
RDMA/mlx5: Initialize obj_event->obj_sub_list before xa_insert
|
||||
|
||||
The obj_event may be loaded immediately after inserted, then if the
|
||||
list_head is not initialized then we may get a poisonous pointer. This
|
||||
fixes the crash below:
|
||||
|
||||
mlx5_core 0000:03:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(2048) RxCqeCmprss(0 enhanced)
|
||||
mlx5_core.sf mlx5_core.sf.4: firmware version: 32.38.3056
|
||||
mlx5_core 0000:03:00.0 en3f0pf0sf2002: renamed from eth0
|
||||
mlx5_core.sf mlx5_core.sf.4: Rate limit: 127 rates are supported, range: 0Mbps to 195312Mbps
|
||||
IPv6: ADDRCONF(NETDEV_CHANGE): en3f0pf0sf2002: link becomes ready
|
||||
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060
|
||||
Mem abort info:
|
||||
ESR = 0x96000006
|
||||
EC = 0x25: DABT (current EL), IL = 32 bits
|
||||
SET = 0, FnV = 0
|
||||
EA = 0, S1PTW = 0
|
||||
Data abort info:
|
||||
ISV = 0, ISS = 0x00000006
|
||||
CM = 0, WnR = 0
|
||||
user pgtable: 4k pages, 48-bit VAs, pgdp=00000007760fb000
|
||||
[0000000000000060] pgd=000000076f6d7003, p4d=000000076f6d7003, pud=0000000777841003, pmd=0000000000000000
|
||||
Internal error: Oops: 96000006 [#1] SMP
|
||||
Modules linked in: ipmb_host(OE) act_mirred(E) cls_flower(E) sch_ingress(E) mptcp_diag(E) udp_diag(E) raw_diag(E) unix_diag(E) tcp_diag(E) inet_diag(E) binfmt_misc(E) bonding(OE) rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) isofs(E) cdrom(E) mst_pciconf(OE) ib_umad(OE) mlx5_ib(OE) ipmb_dev_int(OE) mlx5_core(OE) kpatch_15237886(OEK) mlxdevm(OE) auxiliary(OE) ib_uverbs(OE) ib_core(OE) psample(E) mlxfw(OE) tls(E) sunrpc(E) vfat(E) fat(E) crct10dif_ce(E) ghash_ce(E) sha1_ce(E) sbsa_gwdt(E) virtio_console(E) ext4(E) mbcache(E) jbd2(E) xfs(E) libcrc32c(E) mmc_block(E) virtio_net(E) net_failover(E) failover(E) sha2_ce(E) sha256_arm64(E) nvme(OE) nvme_core(OE) gpio_mlxbf3(OE) mlx_compat(OE) mlxbf_pmc(OE) i2c_mlxbf(OE) sdhci_of_dwcmshc(OE) pinctrl_mlxbf3(OE) mlxbf_pka(OE) gpio_generic(E) i2c_core(E) mmc_core(E) mlxbf_gige(OE) vitesse(E) pwr_mlxbf(OE) mlxbf_tmfifo(OE) micrel(E) mlxbf_bootctl(OE) virtio_ring(E) virtio(E) ipmi_devintf(E) ipmi_msghandler(E)
|
||||
[last unloaded: mst_pci]
|
||||
CPU: 11 PID: 20913 Comm: rte-worker-11 Kdump: loaded Tainted: G OE K 5.10.134-13.1.an8.aarch64 #1
|
||||
Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.2.2.12968 Oct 26 2023
|
||||
pstate: a0400089 (NzCv daIf +PAN -UAO -TCO BTYPE=--)
|
||||
pc : dispatch_event_fd+0x68/0x300 [mlx5_ib]
|
||||
lr : devx_event_notifier+0xcc/0x228 [mlx5_ib]
|
||||
sp : ffff80001005bcf0
|
||||
x29: ffff80001005bcf0 x28: 0000000000000001
|
||||
x27: ffff244e0740a1d8 x26: ffff244e0740a1d0
|
||||
x25: ffffda56beff5ae0 x24: ffffda56bf911618
|
||||
x23: ffff244e0596a480 x22: ffff244e0596a480
|
||||
x21: ffff244d8312ad90 x20: ffff244e0596a480
|
||||
x19: fffffffffffffff0 x18: 0000000000000000
|
||||
x17: 0000000000000000 x16: ffffda56be66d620
|
||||
x15: 0000000000000000 x14: 0000000000000000
|
||||
x13: 0000000000000000 x12: 0000000000000000
|
||||
x11: 0000000000000040 x10: ffffda56bfcafb50
|
||||
x9 : ffffda5655c25f2c x8 : 0000000000000010
|
||||
x7 : 0000000000000000 x6 : ffff24545a2e24b8
|
||||
x5 : 0000000000000003 x4 : ffff80001005bd28
|
||||
x3 : 0000000000000000 x2 : 0000000000000000
|
||||
x1 : ffff244e0596a480 x0 : ffff244d8312ad90
|
||||
Call trace:
|
||||
dispatch_event_fd+0x68/0x300 [mlx5_ib]
|
||||
devx_event_notifier+0xcc/0x228 [mlx5_ib]
|
||||
atomic_notifier_call_chain+0x58/0x80
|
||||
mlx5_eq_async_int+0x148/0x2b0 [mlx5_core]
|
||||
atomic_notifier_call_chain+0x58/0x80
|
||||
irq_int_handler+0x20/0x30 [mlx5_core]
|
||||
__handle_irq_event_percpu+0x60/0x220
|
||||
handle_irq_event_percpu+0x3c/0x90
|
||||
handle_irq_event+0x58/0x158
|
||||
handle_fasteoi_irq+0xfc/0x188
|
||||
generic_handle_irq+0x34/0x48
|
||||
...
|
||||
|
||||
Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX")
|
||||
Link: https://patch.msgid.link/r/3ce7f20e0d1a03dc7de6e57494ec4b8eaf1f05c2.1750147949.git.leon@kernel.org
|
||||
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
|
||||
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
|
||||
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
|
||||
index 6485ce3208ce..fae11535b1a0 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/devx.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/devx.c
|
||||
@@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
|
||||
/* Level1 is valid for future use, no need to free */
|
||||
return -ENOMEM;
|
||||
|
||||
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
|
||||
err = xa_insert(&event->object_ids,
|
||||
key_level2,
|
||||
obj_event,
|
||||
@@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
|
||||
kfree(obj_event);
|
||||
return err;
|
||||
}
|
||||
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
|
||||
}
|
||||
|
||||
return 0;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
From 1607e340f1d5df6e422bdd53e34444bf73986864 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Fix HW counters query for non-representor devices
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 3cc1dbfddf88dc5ecce0a75185061403b1f7352d
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Mon Jun 16 12:14:52 2025 +0300
|
||||
|
||||
RDMA/mlx5: Fix HW counters query for non-representor devices
|
||||
|
||||
To get the device HW counters, a non-representor switchdev device
|
||||
should use the mlx5_ib_query_q_counters() function and query all of
|
||||
the available counters. While a representor device in switchdev mode
|
||||
should use the mlx5_ib_query_q_counters_vport() function and query only
|
||||
the Q_Counters without the PPCNT counters and congestion control counters,
|
||||
since they aren't relevant for a representor device.
|
||||
|
||||
Currently a non-representor switchdev device skips querying the PPCNT
|
||||
counters and congestion control counters, leaving them unupdated.
|
||||
Fix that by properly querying those counters for non-representor devices.
|
||||
|
||||
Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics")
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
|
||||
Link: https://patch.msgid.link/56bf8af4ca8c58e3fb9f7e47b1dca2009eeeed81.1750064969.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
|
||||
index b847084dcd99..943e9eb2ad20 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/counters.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/counters.c
|
||||
@@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
|
||||
return ret;
|
||||
|
||||
/* We don't expose device counters over Vports */
|
||||
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
|
||||
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
|
||||
goto done;
|
||||
|
||||
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
40
SOURCES/1402-rdma-mlx5-fix-cc-counters-query-for-mpv.patch
Normal file
40
SOURCES/1402-rdma-mlx5-fix-cc-counters-query-for-mpv.patch
Normal file
@ -0,0 +1,40 @@
|
||||
From daaee9898eeb1ee187247fd1ed5452440eba9edc Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Fix CC counters query for MPV
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit acd245b1e33fc4b9d0f2e3372021d632f7ee0652
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Mon Jun 16 12:14:53 2025 +0300
|
||||
|
||||
RDMA/mlx5: Fix CC counters query for MPV
|
||||
|
||||
In case, CC counters are querying for the second port use the correct
|
||||
core device for the query instead of always using the master core device.
|
||||
|
||||
Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE")
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
|
||||
Link: https://patch.msgid.link/9cace74dcf106116118bebfa9146d40d4166c6b0.1750064969.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
|
||||
index 943e9eb2ad20..a506fafd2b15 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/counters.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/counters.c
|
||||
@@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
|
||||
*/
|
||||
goto done;
|
||||
}
|
||||
- ret = mlx5_lag_query_cong_counters(dev->mdev,
|
||||
+ ret = mlx5_lag_query_cong_counters(mdev,
|
||||
stats->value +
|
||||
cnts->num_q_counters,
|
||||
cnts->num_cong_counters,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,89 @@
|
||||
From b58e4be62a28810484d1d1203db45cc311c6d48f Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 17:43:26 -0400
|
||||
Subject: [PATCH] RDMA/mlx5: Fix vport loopback for MPV device
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit a9a9e68954f29b1e197663f76289db4879fd51bb
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Mon Jun 16 12:14:54 2025 +0300
|
||||
|
||||
RDMA/mlx5: Fix vport loopback for MPV device
|
||||
|
||||
Always enable vport loopback for both MPV devices on driver start.
|
||||
|
||||
Previously in some cases related to MPV RoCE, packets weren't correctly
|
||||
executing loopback check at vport in FW, since it was disabled.
|
||||
Due to complexity of identifying such cases for MPV always enable vport
|
||||
loopback for both GVMIs when binding the slave to the master port.
|
||||
|
||||
Fixes: 0042f9e458a5 ("RDMA/mlx5: Enable vport loopback when user context or QP mandate")
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/d4298f5ebb2197459e9e7221c51ecd6a34699847.1750064969.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
|
||||
index d5f44c83b667..f463b8e7cfca 100644
|
||||
--- a/drivers/infiniband/hw/mlx5/main.c
|
||||
+++ b/drivers/infiniband/hw/mlx5/main.c
|
||||
@@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
|
||||
context->devx_uid);
|
||||
}
|
||||
|
||||
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
|
||||
+ struct mlx5_core_dev *slave)
|
||||
+{
|
||||
+ int err;
|
||||
+
|
||||
+ err = mlx5_nic_vport_update_local_lb(master, true);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
+ err = mlx5_nic_vport_update_local_lb(slave, true);
|
||||
+ if (err)
|
||||
+ goto out;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+out:
|
||||
+ mlx5_nic_vport_update_local_lb(master, false);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
|
||||
+ struct mlx5_core_dev *slave)
|
||||
+{
|
||||
+ mlx5_nic_vport_update_local_lb(slave, false);
|
||||
+ mlx5_nic_vport_update_local_lb(master, false);
|
||||
+}
|
||||
+
|
||||
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
|
||||
{
|
||||
int err = 0;
|
||||
@@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
|
||||
|
||||
lockdep_assert_held(&mlx5_ib_multiport_mutex);
|
||||
|
||||
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
|
||||
+
|
||||
mlx5_core_mp_event_replay(ibdev->mdev,
|
||||
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
|
||||
NULL);
|
||||
@@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
|
||||
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
|
||||
&key);
|
||||
|
||||
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
|
||||
+ if (err)
|
||||
+ goto unbind;
|
||||
+
|
||||
return true;
|
||||
|
||||
unbind:
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,132 @@
|
||||
From c49aebd76637b284358e3705c4642286f996b607 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:17 -0400
|
||||
Subject: [PATCH] net/mlx5: Expose serial numbers in devlink info
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 18667214b955ef89f208d451820c39a5dfd77f27
|
||||
Author: Jiri Pirko <jiri@resnulli.us>
|
||||
Date: Tue Jun 10 04:51:28 2025 +0200
|
||||
|
||||
net/mlx5: Expose serial numbers in devlink info
|
||||
|
||||
Devlink info allows to expose serial number and board serial number
|
||||
Get the values from PCI VPD and expose it.
|
||||
|
||||
$ devlink dev info
|
||||
pci/0000:08:00.0:
|
||||
driver mlx5_core
|
||||
serial_number e4397f872caeed218000846daa7d2f49
|
||||
board.serial_number MT2314XZ00YA
|
||||
versions:
|
||||
fixed:
|
||||
fw.psid MT_0000000894
|
||||
running:
|
||||
fw.version 28.41.1000
|
||||
fw 28.41.1000
|
||||
stored:
|
||||
fw.version 28.41.1000
|
||||
fw 28.41.1000
|
||||
auxiliary/mlx5_core.eth.0:
|
||||
driver mlx5_core.eth
|
||||
pci/0000:08:00.1:
|
||||
driver mlx5_core
|
||||
serial_number e4397f872caeed218000846daa7d2f49
|
||||
board.serial_number MT2314XZ00YA
|
||||
versions:
|
||||
fixed:
|
||||
fw.psid MT_0000000894
|
||||
running:
|
||||
fw.version 28.41.1000
|
||||
fw 28.41.1000
|
||||
stored:
|
||||
fw.version 28.41.1000
|
||||
fw 28.41.1000
|
||||
auxiliary/mlx5_core.eth.1:
|
||||
driver mlx5_core.eth
|
||||
|
||||
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
|
||||
Reviewed-by: Parav Pandit <parav@nvidia.com>
|
||||
Reviewed-by: Simon Horman <horms@kernel.org>
|
||||
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
|
||||
Acked-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250610025128.109232-1-jiri@resnulli.us
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
||||
index 3b27da79ba94..4b536b384fc0 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
||||
@@ -35,6 +35,55 @@ static u16 mlx5_fw_ver_subminor(u32 version)
|
||||
return version & 0xffff;
|
||||
}
|
||||
|
||||
+static int mlx5_devlink_serial_numbers_put(struct mlx5_core_dev *dev,
|
||||
+ struct devlink_info_req *req,
|
||||
+ struct netlink_ext_ack *extack)
|
||||
+{
|
||||
+ struct pci_dev *pdev = dev->pdev;
|
||||
+ unsigned int vpd_size, kw_len;
|
||||
+ char *str, *end;
|
||||
+ u8 *vpd_data;
|
||||
+ int err = 0;
|
||||
+ int start;
|
||||
+
|
||||
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
|
||||
+ if (IS_ERR(vpd_data))
|
||||
+ return 0;
|
||||
+
|
||||
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
|
||||
+ PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
|
||||
+ if (start >= 0) {
|
||||
+ str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
|
||||
+ if (!str) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto end;
|
||||
+ }
|
||||
+ end = strchrnul(str, ' ');
|
||||
+ *end = '\0';
|
||||
+ err = devlink_info_board_serial_number_put(req, str);
|
||||
+ kfree(str);
|
||||
+ if (err)
|
||||
+ goto end;
|
||||
+ }
|
||||
+
|
||||
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len);
|
||||
+ if (start >= 0) {
|
||||
+ str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
|
||||
+ if (!str) {
|
||||
+ err = -ENOMEM;
|
||||
+ goto end;
|
||||
+ }
|
||||
+ err = devlink_info_serial_number_put(req, str);
|
||||
+ kfree(str);
|
||||
+ if (err)
|
||||
+ goto end;
|
||||
+ }
|
||||
+
|
||||
+end:
|
||||
+ kfree(vpd_data);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
#define DEVLINK_FW_STRING_LEN 32
|
||||
|
||||
static int
|
||||
@@ -49,6 +98,10 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
|
||||
if (!mlx5_core_is_pf(dev))
|
||||
return 0;
|
||||
|
||||
+ err = mlx5_devlink_serial_numbers_put(dev, req, extack);
|
||||
+ if (err)
|
||||
+ return err;
|
||||
+
|
||||
err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
|
||||
if (err)
|
||||
return err;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,242 @@
|
||||
From 9440e62748742ac2e252b1559c6575de37da4e90 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:17 -0400
|
||||
Subject: [PATCH] net/mlx5e: SHAMPO: Reorganize mlx5_rq_shampo_alloc
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit af4312c4c9c11da84b13b3aa8f472ab287cf1f0b
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:33 2025 +0300
|
||||
|
||||
net/mlx5e: SHAMPO: Reorganize mlx5_rq_shampo_alloc
|
||||
|
||||
Drop redundant SHAMPO structure alloc/free functions.
|
||||
|
||||
Gather together function calls pertaining to header split info, pass
|
||||
header per WQE (hd_per_wqe) as parameter to those function to avoid use
|
||||
before initialization future mistakes.
|
||||
|
||||
Allocate HW GRO related info outside of the header related info scope.
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-5-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
index 5b0d03b3efe8..211ea429ea89 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
@@ -638,7 +638,6 @@ struct mlx5e_shampo_hd {
|
||||
struct mlx5e_frag_page *pages;
|
||||
u32 hd_per_wq;
|
||||
u16 hd_per_wqe;
|
||||
- u16 pages_per_wq;
|
||||
unsigned long *bitmap;
|
||||
u16 pi;
|
||||
u16 ci;
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index 9bd166f489e7..a074f1eac3f4 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -330,47 +330,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
|
||||
ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
|
||||
}
|
||||
|
||||
-static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
|
||||
-{
|
||||
- rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
|
||||
- GFP_KERNEL, node);
|
||||
- if (!rq->mpwqe.shampo)
|
||||
- return -ENOMEM;
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
|
||||
-{
|
||||
- kvfree(rq->mpwqe.shampo);
|
||||
-}
|
||||
-
|
||||
-static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
|
||||
-{
|
||||
- struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
|
||||
-
|
||||
- shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
|
||||
- node);
|
||||
- shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq,
|
||||
- sizeof(*shampo->pages)),
|
||||
- GFP_KERNEL, node);
|
||||
- if (!shampo->bitmap || !shampo->pages)
|
||||
- goto err_nomem;
|
||||
-
|
||||
- return 0;
|
||||
-
|
||||
-err_nomem:
|
||||
- bitmap_free(shampo->bitmap);
|
||||
- kvfree(shampo->pages);
|
||||
-
|
||||
- return -ENOMEM;
|
||||
-}
|
||||
-
|
||||
-static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
|
||||
-{
|
||||
- bitmap_free(rq->mpwqe.shampo->bitmap);
|
||||
- kvfree(rq->mpwqe.shampo->pages);
|
||||
-}
|
||||
-
|
||||
static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
|
||||
{
|
||||
int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
|
||||
@@ -583,19 +542,18 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
|
||||
}
|
||||
|
||||
static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
|
||||
- struct mlx5e_rq *rq)
|
||||
+ u16 hd_per_wq, u32 *umr_mkey)
|
||||
{
|
||||
u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
|
||||
|
||||
- if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) {
|
||||
+ if (max_ksm_size < hd_per_wq) {
|
||||
mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
|
||||
- max_ksm_size, rq->mpwqe.shampo->hd_per_wq);
|
||||
+ max_ksm_size, hd_per_wq);
|
||||
return -EINVAL;
|
||||
}
|
||||
-
|
||||
- return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
|
||||
+ return mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq,
|
||||
MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE,
|
||||
- &rq->mpwqe.shampo->mkey);
|
||||
+ umr_mkey);
|
||||
}
|
||||
|
||||
static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
|
||||
@@ -757,6 +715,35 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
|
||||
xdp_frag_size);
|
||||
}
|
||||
|
||||
+static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq,
|
||||
+ int node)
|
||||
+{
|
||||
+ struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
|
||||
+
|
||||
+ shampo->hd_per_wq = hd_per_wq;
|
||||
+
|
||||
+ shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node);
|
||||
+ shampo->pages = kvzalloc_node(array_size(hd_per_wq,
|
||||
+ sizeof(*shampo->pages)),
|
||||
+ GFP_KERNEL, node);
|
||||
+ if (!shampo->bitmap || !shampo->pages)
|
||||
+ goto err_nomem;
|
||||
+
|
||||
+ return 0;
|
||||
+
|
||||
+err_nomem:
|
||||
+ kvfree(shampo->pages);
|
||||
+ bitmap_free(shampo->bitmap);
|
||||
+
|
||||
+ return -ENOMEM;
|
||||
+}
|
||||
+
|
||||
+static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
|
||||
+{
|
||||
+ kvfree(rq->mpwqe.shampo->pages);
|
||||
+ bitmap_free(rq->mpwqe.shampo->bitmap);
|
||||
+}
|
||||
+
|
||||
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params,
|
||||
struct mlx5e_rq_param *rqp,
|
||||
@@ -764,42 +751,52 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
u32 *pool_size,
|
||||
int node)
|
||||
{
|
||||
+ void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
|
||||
+ u16 hd_per_wq;
|
||||
+ int wq_size;
|
||||
int err;
|
||||
|
||||
if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
|
||||
return 0;
|
||||
- err = mlx5e_rq_shampo_hd_alloc(rq, node);
|
||||
- if (err)
|
||||
- goto out;
|
||||
- rq->mpwqe.shampo->hd_per_wq =
|
||||
- mlx5e_shampo_hd_per_wq(mdev, params, rqp);
|
||||
- err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
|
||||
+
|
||||
+ rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
|
||||
+ GFP_KERNEL, node);
|
||||
+ if (!rq->mpwqe.shampo)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ /* split headers data structures */
|
||||
+ hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp);
|
||||
+ err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node);
|
||||
if (err)
|
||||
- goto err_shampo_hd;
|
||||
- err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
|
||||
+ goto err_shampo_hd_info_alloc;
|
||||
+
|
||||
+ err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq,
|
||||
+ &rq->mpwqe.shampo->mkey);
|
||||
if (err)
|
||||
- goto err_shampo_info;
|
||||
+ goto err_umr_mkey;
|
||||
+
|
||||
+ rq->mpwqe.shampo->key = cpu_to_be32(rq->mpwqe.shampo->mkey);
|
||||
+ rq->mpwqe.shampo->hd_per_wqe =
|
||||
+ mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
|
||||
+ wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
|
||||
+ *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
|
||||
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
|
||||
+
|
||||
+ /* gro only data structures */
|
||||
rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
|
||||
if (!rq->hw_gro_data) {
|
||||
err = -ENOMEM;
|
||||
goto err_hw_gro_data;
|
||||
}
|
||||
- rq->mpwqe.shampo->key =
|
||||
- cpu_to_be32(rq->mpwqe.shampo->mkey);
|
||||
- rq->mpwqe.shampo->hd_per_wqe =
|
||||
- mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
|
||||
- rq->mpwqe.shampo->pages_per_wq =
|
||||
- rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
|
||||
- *pool_size += rq->mpwqe.shampo->pages_per_wq;
|
||||
+
|
||||
return 0;
|
||||
|
||||
err_hw_gro_data:
|
||||
- mlx5e_rq_shampo_hd_info_free(rq);
|
||||
-err_shampo_info:
|
||||
mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
|
||||
-err_shampo_hd:
|
||||
- mlx5e_rq_shampo_hd_free(rq);
|
||||
-out:
|
||||
+err_umr_mkey:
|
||||
+ mlx5e_rq_shampo_hd_info_free(rq);
|
||||
+err_shampo_hd_info_alloc:
|
||||
+ kvfree(rq->mpwqe.shampo);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -811,7 +808,7 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
|
||||
kvfree(rq->hw_gro_data);
|
||||
mlx5e_rq_shampo_hd_info_free(rq);
|
||||
mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
|
||||
- mlx5e_rq_shampo_hd_free(rq);
|
||||
+ kvfree(rq->mpwqe.shampo);
|
||||
}
|
||||
|
||||
static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
113
SOURCES/1406-net-mlx5e-shampo-remove-redundant-params.patch
Normal file
113
SOURCES/1406-net-mlx5e-shampo-remove-redundant-params.patch
Normal file
@ -0,0 +1,113 @@
|
||||
From 675c166094cf502d5b037ae4a692e7c5c6f6f9fa Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5e: SHAMPO: Remove redundant params
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 16142defd304d5a8e591781efe24da498ccfa51f
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:34 2025 +0300
|
||||
|
||||
net/mlx5e: SHAMPO: Remove redundant params
|
||||
|
||||
Two SHAMPO params are static and always the same, remove them from the
|
||||
global mlx5e_params struct.
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-6-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
index 211ea429ea89..581eef34f512 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
@@ -278,10 +278,6 @@ enum packet_merge {
|
||||
struct mlx5e_packet_merge_param {
|
||||
enum packet_merge type;
|
||||
u32 timeout;
|
||||
- struct {
|
||||
- u8 match_criteria_type;
|
||||
- u8 alignment_granularity;
|
||||
- } shampo;
|
||||
};
|
||||
|
||||
struct mlx5e_params {
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
|
||||
index 58ec5e44aa7a..fc945bce933a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
|
||||
@@ -901,6 +901,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
|
||||
{
|
||||
void *rqc = param->rqc;
|
||||
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
|
||||
+ u32 lro_timeout;
|
||||
int ndsegs = 1;
|
||||
int err;
|
||||
|
||||
@@ -926,22 +927,25 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
|
||||
MLX5_SET(wq, wq, log_wqe_stride_size,
|
||||
log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
|
||||
MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
|
||||
- if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
|
||||
- MLX5_SET(wq, wq, shampo_enable, true);
|
||||
- MLX5_SET(wq, wq, log_reservation_size,
|
||||
- mlx5e_shampo_get_log_rsrv_size(mdev, params));
|
||||
- MLX5_SET(wq, wq,
|
||||
- log_max_num_of_packets_per_reservation,
|
||||
- mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
|
||||
- MLX5_SET(wq, wq, log_headers_entry_size,
|
||||
- mlx5e_shampo_get_log_hd_entry_size(mdev, params));
|
||||
- MLX5_SET(rqc, rqc, reservation_timeout,
|
||||
- mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT));
|
||||
- MLX5_SET(rqc, rqc, shampo_match_criteria_type,
|
||||
- params->packet_merge.shampo.match_criteria_type);
|
||||
- MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
|
||||
- params->packet_merge.shampo.alignment_granularity);
|
||||
- }
|
||||
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO)
|
||||
+ break;
|
||||
+
|
||||
+ MLX5_SET(wq, wq, shampo_enable, true);
|
||||
+ MLX5_SET(wq, wq, log_reservation_size,
|
||||
+ mlx5e_shampo_get_log_rsrv_size(mdev, params));
|
||||
+ MLX5_SET(wq, wq,
|
||||
+ log_max_num_of_packets_per_reservation,
|
||||
+ mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
|
||||
+ MLX5_SET(wq, wq, log_headers_entry_size,
|
||||
+ mlx5e_shampo_get_log_hd_entry_size(mdev, params));
|
||||
+ lro_timeout =
|
||||
+ mlx5e_choose_lro_timeout(mdev,
|
||||
+ MLX5E_DEFAULT_SHAMPO_TIMEOUT);
|
||||
+ MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout);
|
||||
+ MLX5_SET(rqc, rqc, shampo_match_criteria_type,
|
||||
+ MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED);
|
||||
+ MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity,
|
||||
+ MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE);
|
||||
break;
|
||||
}
|
||||
default: /* MLX5_WQ_TYPE_CYCLIC */
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index a074f1eac3f4..4809fc9e3522 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -4026,10 +4026,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
|
||||
|
||||
if (enable) {
|
||||
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
|
||||
- new_params.packet_merge.shampo.match_criteria_type =
|
||||
- MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
|
||||
- new_params.packet_merge.shampo.alignment_granularity =
|
||||
- MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
|
||||
} else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
|
||||
new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
|
||||
} else {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,68 @@
|
||||
From ce0ae8e829ab57ae2acd173f5053cf63c391a4e1 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5e: SHAMPO: Improve hw gro capability checking
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit d2760abdedde635b055a214b3a45dce3e4ecbfce
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:35 2025 +0300
|
||||
|
||||
net/mlx5e: SHAMPO: Improve hw gro capability checking
|
||||
|
||||
Add missing HW capabilities, declare the feature in
|
||||
netdev->vlan_features, similar to other features in mlx5e_build_nic_netdev.
|
||||
No functional change here as all by default disabled features are
|
||||
explicitly disabled at the bottom of the function.
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-7-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index 4809fc9e3522..e552dcf8f13a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -77,7 +77,8 @@
|
||||
|
||||
static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
- if (!MLX5_CAP_GEN(mdev, shampo))
|
||||
+ if (!MLX5_CAP_GEN(mdev, shampo) ||
|
||||
+ !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge))
|
||||
return false;
|
||||
|
||||
/* Our HW-GRO implementation relies on "KSM Mkey" for
|
||||
@@ -5489,17 +5490,17 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
|
||||
MLX5E_MPWRQ_UMR_MODE_ALIGNED))
|
||||
netdev->vlan_features |= NETIF_F_LRO;
|
||||
|
||||
+ if (mlx5e_hw_gro_supported(mdev) &&
|
||||
+ mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
|
||||
+ MLX5E_MPWRQ_UMR_MODE_ALIGNED))
|
||||
+ netdev->vlan_features |= NETIF_F_GRO_HW;
|
||||
+
|
||||
netdev->hw_features = netdev->vlan_features;
|
||||
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
|
||||
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
|
||||
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
|
||||
netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
|
||||
|
||||
- if (mlx5e_hw_gro_supported(mdev) &&
|
||||
- mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
|
||||
- MLX5E_MPWRQ_UMR_MODE_ALIGNED))
|
||||
- netdev->hw_features |= NETIF_F_GRO_HW;
|
||||
-
|
||||
if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
|
||||
netdev->hw_enc_features |= NETIF_F_HW_CSUM;
|
||||
netdev->hw_enc_features |= NETIF_F_TSO;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
304
SOURCES/1408-net-mlx5e-shampo-separate-pool-for-headers.patch
Normal file
304
SOURCES/1408-net-mlx5e-shampo-separate-pool-for-headers.patch
Normal file
@ -0,0 +1,304 @@
|
||||
From 65f74d0b4614133bc8ed318ed25a4532182f50fc Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5e: SHAMPO: Separate pool for headers
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit e225d9bd93ed0bb84014f5f8e241e8e456533e30
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:36 2025 +0300
|
||||
|
||||
net/mlx5e: SHAMPO: Separate pool for headers
|
||||
|
||||
Allow allocating a separate page pool for headers when SHAMPO is on.
|
||||
This will be useful for adding support to zc page pool, which has to be
|
||||
different from the headers page pool.
|
||||
For now, the pools are the same.
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-8-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
index 581eef34f512..c329de1d4f0a 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
|
||||
@@ -716,7 +716,11 @@ struct mlx5e_rq {
|
||||
struct bpf_prog __rcu *xdp_prog;
|
||||
struct mlx5e_xdpsq *xdpsq;
|
||||
DECLARE_BITMAP(flags, 8);
|
||||
+
|
||||
+ /* page pools */
|
||||
struct page_pool *page_pool;
|
||||
+ struct page_pool *hd_page_pool;
|
||||
+
|
||||
struct mlx5e_xdp_buff mxbuf;
|
||||
|
||||
/* AF_XDP zero-copy */
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index e552dcf8f13a..59e845367cfd 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <linux/if_bridge.h>
|
||||
#include <linux/filter.h>
|
||||
#include <net/netdev_queues.h>
|
||||
+#include <net/netdev_rx_queue.h>
|
||||
#include <net/page_pool/types.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
@@ -745,6 +746,11 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
|
||||
bitmap_free(rq->mpwqe.shampo->bitmap);
|
||||
}
|
||||
|
||||
+static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params,
|
||||
struct mlx5e_rq_param *rqp,
|
||||
@@ -753,6 +759,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
int node)
|
||||
{
|
||||
void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
|
||||
+ u32 hd_pool_size;
|
||||
u16 hd_per_wq;
|
||||
int wq_size;
|
||||
int err;
|
||||
@@ -780,8 +787,34 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
rq->mpwqe.shampo->hd_per_wqe =
|
||||
mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
|
||||
wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
|
||||
- *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
|
||||
- MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
|
||||
+ hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
|
||||
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
|
||||
+
|
||||
+ if (mlx5_rq_needs_separate_hd_pool(rq)) {
|
||||
+ /* Separate page pool for shampo headers */
|
||||
+ struct page_pool_params pp_params = { };
|
||||
+
|
||||
+ pp_params.order = 0;
|
||||
+ pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
|
||||
+ pp_params.pool_size = hd_pool_size;
|
||||
+ pp_params.nid = node;
|
||||
+ pp_params.dev = rq->pdev;
|
||||
+ pp_params.napi = rq->cq.napi;
|
||||
+ pp_params.netdev = rq->netdev;
|
||||
+ pp_params.dma_dir = rq->buff.map_dir;
|
||||
+ pp_params.max_len = PAGE_SIZE;
|
||||
+
|
||||
+ rq->hd_page_pool = page_pool_create(&pp_params);
|
||||
+ if (IS_ERR(rq->hd_page_pool)) {
|
||||
+ err = PTR_ERR(rq->hd_page_pool);
|
||||
+ rq->hd_page_pool = NULL;
|
||||
+ goto err_hds_page_pool;
|
||||
+ }
|
||||
+ } else {
|
||||
+ /* Common page pool, reserve space for headers. */
|
||||
+ *pool_size += hd_pool_size;
|
||||
+ rq->hd_page_pool = NULL;
|
||||
+ }
|
||||
|
||||
/* gro only data structures */
|
||||
rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
|
||||
@@ -793,6 +826,8 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
|
||||
return 0;
|
||||
|
||||
err_hw_gro_data:
|
||||
+ page_pool_destroy(rq->hd_page_pool);
|
||||
+err_hds_page_pool:
|
||||
mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
|
||||
err_umr_mkey:
|
||||
mlx5e_rq_shampo_hd_info_free(rq);
|
||||
@@ -807,6 +842,8 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
|
||||
return;
|
||||
|
||||
kvfree(rq->hw_gro_data);
|
||||
+ if (rq->hd_page_pool != rq->page_pool)
|
||||
+ page_pool_destroy(rq->hd_page_pool);
|
||||
mlx5e_rq_shampo_hd_info_free(rq);
|
||||
mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
|
||||
kvfree(rq->mpwqe.shampo);
|
||||
@@ -938,6 +975,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
|
||||
rq->page_pool = NULL;
|
||||
goto err_free_by_rq_type;
|
||||
}
|
||||
+ if (!rq->hd_page_pool)
|
||||
+ rq->hd_page_pool = rq->page_pool;
|
||||
if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
|
||||
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
|
||||
MEM_TYPE_PAGE_POOL, rq->page_pool);
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
index 382679838113..36a4780332d7 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
|
||||
@@ -273,12 +273,12 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
|
||||
|
||||
#define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64)
|
||||
|
||||
-static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq,
|
||||
+static int mlx5e_page_alloc_fragmented(struct page_pool *pool,
|
||||
struct mlx5e_frag_page *frag_page)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
- page = page_pool_dev_alloc_pages(rq->page_pool);
|
||||
+ page = page_pool_dev_alloc_pages(pool);
|
||||
if (unlikely(!page))
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -292,14 +292,14 @@ static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq,
|
||||
+static void mlx5e_page_release_fragmented(struct page_pool *pool,
|
||||
struct mlx5e_frag_page *frag_page)
|
||||
{
|
||||
u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags;
|
||||
struct page *page = frag_page->page;
|
||||
|
||||
if (page_pool_unref_page(page, drain_count) == 0)
|
||||
- page_pool_put_unrefed_page(rq->page_pool, page, -1, true);
|
||||
+ page_pool_put_unrefed_page(pool, page, -1, true);
|
||||
}
|
||||
|
||||
static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
|
||||
@@ -313,7 +313,8 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
|
||||
* offset) should just use the new one without replenishing again
|
||||
* by themselves.
|
||||
*/
|
||||
- err = mlx5e_page_alloc_fragmented(rq, frag->frag_page);
|
||||
+ err = mlx5e_page_alloc_fragmented(rq->page_pool,
|
||||
+ frag->frag_page);
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -332,7 +333,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *frag)
|
||||
{
|
||||
if (mlx5e_frag_can_release(frag))
|
||||
- mlx5e_page_release_fragmented(rq, frag->frag_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool, frag->frag_page);
|
||||
}
|
||||
|
||||
static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
|
||||
@@ -586,7 +587,8 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
|
||||
struct mlx5e_frag_page *frag_page;
|
||||
|
||||
frag_page = &wi->alloc_units.frag_pages[i];
|
||||
- mlx5e_page_release_fragmented(rq, frag_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool,
|
||||
+ frag_page);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -681,11 +683,10 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
|
||||
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
|
||||
u64 addr;
|
||||
|
||||
- err = mlx5e_page_alloc_fragmented(rq, frag_page);
|
||||
+ err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page);
|
||||
if (unlikely(err))
|
||||
goto err_unmap;
|
||||
|
||||
-
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
|
||||
for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) {
|
||||
@@ -717,7 +718,8 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
|
||||
if (!header_offset) {
|
||||
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index);
|
||||
|
||||
- mlx5e_page_release_fragmented(rq, frag_page);
|
||||
+ mlx5e_page_release_fragmented(rq->hd_page_pool,
|
||||
+ frag_page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -793,7 +795,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) {
|
||||
dma_addr_t addr;
|
||||
|
||||
- err = mlx5e_page_alloc_fragmented(rq, frag_page);
|
||||
+ err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page);
|
||||
if (unlikely(err))
|
||||
goto err_unmap;
|
||||
addr = page_pool_get_dma_addr(frag_page->page);
|
||||
@@ -838,7 +840,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
err_unmap:
|
||||
while (--i >= 0) {
|
||||
frag_page--;
|
||||
- mlx5e_page_release_fragmented(rq, frag_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool, frag_page);
|
||||
}
|
||||
|
||||
bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
|
||||
@@ -857,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
|
||||
if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
|
||||
struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index);
|
||||
|
||||
- mlx5e_page_release_fragmented(rq, frag_page);
|
||||
+ mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page);
|
||||
}
|
||||
clear_bit(header_index, shampo->bitmap);
|
||||
}
|
||||
@@ -1102,6 +1104,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
|
||||
|
||||
if (rq->page_pool)
|
||||
page_pool_nid_changed(rq->page_pool, numa_mem_id());
|
||||
+ if (rq->hd_page_pool)
|
||||
+ page_pool_nid_changed(rq->hd_page_pool, numa_mem_id());
|
||||
|
||||
head = rq->mpwqe.actual_wq_head;
|
||||
i = missing;
|
||||
@@ -2010,7 +2014,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
||||
if (prog) {
|
||||
/* area for bpf_xdp_[store|load]_bytes */
|
||||
net_prefetchw(page_address(frag_page->page) + frag_offset);
|
||||
- if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) {
|
||||
+ if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool,
|
||||
+ &wi->linear_page))) {
|
||||
rq->stats->buff_alloc_err++;
|
||||
return NULL;
|
||||
}
|
||||
@@ -2074,7 +2079,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
||||
|
||||
wi->linear_page.frags++;
|
||||
}
|
||||
- mlx5e_page_release_fragmented(rq, &wi->linear_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool,
|
||||
+ &wi->linear_page);
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
||||
@@ -2083,13 +2089,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
||||
mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0,
|
||||
mxbuf->xdp.data - mxbuf->xdp.data_meta);
|
||||
if (unlikely(!skb)) {
|
||||
- mlx5e_page_release_fragmented(rq, &wi->linear_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool,
|
||||
+ &wi->linear_page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
skb_mark_for_recycle(skb);
|
||||
wi->linear_page.frags++;
|
||||
- mlx5e_page_release_fragmented(rq, &wi->linear_page);
|
||||
+ mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page);
|
||||
|
||||
if (xdp_buff_has_frags(&mxbuf->xdp)) {
|
||||
struct mlx5e_frag_page *pagep;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,150 @@
|
||||
From d17c81e1bebf992831b10750cc1ee7c6fdc04339 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5e: Implement queue mgmt ops and single channel swap
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit b2588ea40ec9472688289c1a644627c0f4a1f33f
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:39 2025 +0300
|
||||
|
||||
net/mlx5e: Implement queue mgmt ops and single channel swap
|
||||
|
||||
The bulk of the work is done in mlx5e_queue_mem_alloc, where we allocate
|
||||
and create the new channel resources, similar to
|
||||
mlx5e_safe_switch_params, but here we do it for a single channel using
|
||||
existing params, sort of a clone channel.
|
||||
To swap the old channel with the new one, we deactivate and close the
|
||||
old channel then replace it with the new one, since the swap procedure
|
||||
doesn't fail in mlx5, we do it all in one place (mlx5e_queue_start).
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Acked-by: Mina Almasry <almasrymina@google.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-11-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index 59e845367cfd..9330d90c1f03 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -5479,6 +5479,103 @@ static const struct netdev_stat_ops mlx5e_stat_ops = {
|
||||
.get_base_stats = mlx5e_get_base_stats,
|
||||
};
|
||||
|
||||
+struct mlx5_qmgmt_data {
|
||||
+ struct mlx5e_channel *c;
|
||||
+ struct mlx5e_channel_param cparam;
|
||||
+};
|
||||
+
|
||||
+static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq,
|
||||
+ int queue_index)
|
||||
+{
|
||||
+ struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq;
|
||||
+ struct mlx5e_priv *priv = netdev_priv(dev);
|
||||
+ struct mlx5e_channels *chs = &priv->channels;
|
||||
+ struct mlx5e_params params = chs->params;
|
||||
+ struct mlx5_core_dev *mdev;
|
||||
+ int err;
|
||||
+
|
||||
+ mutex_lock(&priv->state_lock);
|
||||
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
|
||||
+ err = -ENODEV;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ if (queue_index >= chs->num) {
|
||||
+ err = -ERANGE;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) ||
|
||||
+ chs->params.ptp_rx ||
|
||||
+ chs->params.xdp_prog ||
|
||||
+ priv->htb) {
|
||||
+ netdev_err(priv->netdev,
|
||||
+ "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n");
|
||||
+ err = -EOPNOTSUPP;
|
||||
+ goto unlock;
|
||||
+ }
|
||||
+
|
||||
+ mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index);
|
||||
+ err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam);
|
||||
+ if (err)
|
||||
+ goto unlock;
|
||||
+
|
||||
+ err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c);
|
||||
+unlock:
|
||||
+ mutex_unlock(&priv->state_lock);
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static void mlx5e_queue_mem_free(struct net_device *dev, void *mem)
|
||||
+{
|
||||
+ struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem;
|
||||
+
|
||||
+ /* not supposed to happen since mlx5e_queue_start never fails
|
||||
+ * but this is how this should be implemented just in case
|
||||
+ */
|
||||
+ if (data->c)
|
||||
+ mlx5e_close_channel(data->c);
|
||||
+}
|
||||
+
|
||||
+static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index)
|
||||
+{
|
||||
+ /* In mlx5 a txq cannot be simply stopped in isolation, only restarted.
|
||||
+ * mlx5e_queue_start does not fail, we stop the old queue there.
|
||||
+ * TODO: Improve this.
|
||||
+ */
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int mlx5e_queue_start(struct net_device *dev, void *newq,
|
||||
+ int queue_index)
|
||||
+{
|
||||
+ struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq;
|
||||
+ struct mlx5e_priv *priv = netdev_priv(dev);
|
||||
+ struct mlx5e_channel *old;
|
||||
+
|
||||
+ mutex_lock(&priv->state_lock);
|
||||
+
|
||||
+ /* stop and close the old */
|
||||
+ old = priv->channels.c[queue_index];
|
||||
+ mlx5e_deactivate_priv_channels(priv);
|
||||
+ /* close old before activating new, to avoid napi conflict */
|
||||
+ mlx5e_close_channel(old);
|
||||
+
|
||||
+ /* start the new */
|
||||
+ priv->channels.c[queue_index] = new->c;
|
||||
+ mlx5e_activate_priv_channels(priv);
|
||||
+ mutex_unlock(&priv->state_lock);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = {
|
||||
+ .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data),
|
||||
+ .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc,
|
||||
+ .ndo_queue_mem_free = mlx5e_queue_mem_free,
|
||||
+ .ndo_queue_start = mlx5e_queue_start,
|
||||
+ .ndo_queue_stop = mlx5e_queue_stop,
|
||||
+};
|
||||
+
|
||||
static void mlx5e_build_nic_netdev(struct net_device *netdev)
|
||||
{
|
||||
struct mlx5e_priv *priv = netdev_priv(netdev);
|
||||
@@ -5489,6 +5586,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
|
||||
SET_NETDEV_DEV(netdev, mdev->device);
|
||||
|
||||
netdev->netdev_ops = &mlx5e_netdev_ops;
|
||||
+ netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops;
|
||||
netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops;
|
||||
netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops;
|
||||
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,141 @@
|
||||
From ccdbf67ee58fe08fc65b7fa79731868afdf21c63 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Wed, 22 Apr 2026 09:42:15 -0400
|
||||
Subject: [PATCH] net/mlx5e: Support ethtool tcp-data-split settings
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
Conflicts:
|
||||
Add "#include <linux/ethtool_netlink.h>" to avoid build failure.
|
||||
|
||||
commit 46bcce5dfd330c233e59cd5efd7eb43f049b0a82
|
||||
Author: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Date: Mon Jun 16 17:14:40 2025 +0300
|
||||
|
||||
net/mlx5e: Support ethtool tcp-data-split settings
|
||||
|
||||
In mlx5, tcp header-data split requires HW GRO to be on.
|
||||
|
||||
Enabling it fails when HW GRO is off.
|
||||
mlx5e_fix_features now keeps HW GRO on when tcp data split is enabled.
|
||||
Finally, when tcp data split is disabled, features are updated to maybe
|
||||
remove the forced HW GRO.
|
||||
|
||||
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
|
||||
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
|
||||
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250616141441.1243044-12-mbloch@nvidia.com
|
||||
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
index e6c9338ddae8..ff0b9ab2daa0 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
|
||||
@@ -32,6 +32,7 @@
|
||||
|
||||
#include <linux/dim.h>
|
||||
#include <linux/ethtool_netlink.h>
|
||||
+#include <net/netdev_queues.h>
|
||||
|
||||
#include "en.h"
|
||||
#include "en/channels.h"
|
||||
@@ -365,11 +366,6 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv,
|
||||
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
|
||||
param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames;
|
||||
param->tx_pending = 1 << priv->channels.params.log_sq_size;
|
||||
-
|
||||
- kernel_param->tcp_data_split =
|
||||
- (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ?
|
||||
- ETHTOOL_TCP_DATA_SPLIT_ENABLED :
|
||||
- ETHTOOL_TCP_DATA_SPLIT_DISABLED;
|
||||
}
|
||||
|
||||
static void mlx5e_get_ringparam(struct net_device *dev,
|
||||
@@ -382,6 +378,27 @@ static void mlx5e_get_ringparam(struct net_device *dev,
|
||||
mlx5e_ethtool_get_ringparam(priv, param, kernel_param);
|
||||
}
|
||||
|
||||
+static bool mlx5e_ethtool_set_tcp_data_split(struct mlx5e_priv *priv,
|
||||
+ u8 tcp_data_split,
|
||||
+ struct netlink_ext_ack *extack)
|
||||
+{
|
||||
+ struct net_device *dev = priv->netdev;
|
||||
+
|
||||
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
|
||||
+ !(dev->features & NETIF_F_GRO_HW)) {
|
||||
+ NL_SET_ERR_MSG_MOD(extack,
|
||||
+ "TCP-data-split is not supported when GRO HW is disabled");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ /* Might need to disable HW-GRO if it was kept on due to hds. */
|
||||
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED &&
|
||||
+ dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
|
||||
+ netdev_update_features(priv->netdev);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
|
||||
struct ethtool_ringparam *param,
|
||||
struct netlink_ext_ack *extack)
|
||||
@@ -440,6 +457,11 @@ static int mlx5e_set_ringparam(struct net_device *dev,
|
||||
{
|
||||
struct mlx5e_priv *priv = netdev_priv(dev);
|
||||
|
||||
+ if (!mlx5e_ethtool_set_tcp_data_split(priv,
|
||||
+ kernel_param->tcp_data_split,
|
||||
+ extack))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
return mlx5e_ethtool_set_ringparam(priv, param, extack);
|
||||
}
|
||||
|
||||
@@ -2645,6 +2667,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
|
||||
ETHTOOL_COALESCE_USE_ADAPTIVE |
|
||||
ETHTOOL_COALESCE_USE_CQE,
|
||||
.supported_input_xfrm = RXH_XFRM_SYM_OR_XOR,
|
||||
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
|
||||
.get_drvinfo = mlx5e_get_drvinfo,
|
||||
.get_link = ethtool_op_get_link,
|
||||
.get_link_ext_state = mlx5e_get_link_ext_state,
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
index 9330d90c1f03..4bbf10174fe8 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
|
||||
@@ -39,6 +39,7 @@
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/if_bridge.h>
|
||||
#include <linux/filter.h>
|
||||
+#include <linux/ethtool_netlink.h>
|
||||
#include <net/netdev_queues.h>
|
||||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/page_pool/types.h>
|
||||
@@ -4392,6 +4393,7 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev
|
||||
static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
|
||||
netdev_features_t features)
|
||||
{
|
||||
+ struct netdev_config *cfg = netdev->cfg_pending;
|
||||
struct mlx5e_priv *priv = netdev_priv(netdev);
|
||||
struct mlx5e_vlan_table *vlan;
|
||||
struct mlx5e_params *params;
|
||||
@@ -4458,6 +4460,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
|
||||
}
|
||||
}
|
||||
|
||||
+ /* The header-data split ring param requires HW GRO to stay enabled. */
|
||||
+ if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
|
||||
+ !(features & NETIF_F_GRO_HW)) {
|
||||
+ netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n");
|
||||
+ features |= NETIF_F_GRO_HW;
|
||||
+ }
|
||||
+
|
||||
if (mlx5e_is_uplink_rep(priv)) {
|
||||
features = mlx5e_fix_uplink_rep_features(netdev, features);
|
||||
netdev->netns_immutable = true;
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,103 @@
|
||||
From d243df0a0b8c463819ec69b746ed501da46f66e6 Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5: fs, add multiple prios to RDMA TRANSPORT steering
|
||||
domain
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit 52931f55159ea5c27ad4fe66fc0cb8ad75ab795b
|
||||
Author: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Date: Tue Jun 17 11:19:15 2025 +0300
|
||||
|
||||
net/mlx5: fs, add multiple prios to RDMA TRANSPORT steering domain
|
||||
|
||||
RDMA TRANSPORT domains were initially limited to a single priority.
|
||||
This change allows the domains to have multiple priorities, making
|
||||
it possible to add several rules and control the order in which
|
||||
they're evaluated.
|
||||
|
||||
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
|
||||
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/b299cbb4c8678a33da6e6b6988b5bf6145c54b88.1750148083.git.leon@kernel.org
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
index b29e67466701..2a855e50be95 100644
|
||||
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
|
||||
@@ -3250,34 +3250,48 @@ static int
|
||||
init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering,
|
||||
int vport_idx)
|
||||
{
|
||||
+ struct mlx5_flow_root_namespace *root_ns;
|
||||
struct fs_prio *prio;
|
||||
+ int i;
|
||||
|
||||
steering->rdma_transport_rx_root_ns[vport_idx] =
|
||||
create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX);
|
||||
if (!steering->rdma_transport_rx_root_ns[vport_idx])
|
||||
return -ENOMEM;
|
||||
|
||||
- /* create 1 prio*/
|
||||
- prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns,
|
||||
- MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
|
||||
- return PTR_ERR_OR_ZERO(prio);
|
||||
+ root_ns = steering->rdma_transport_rx_root_ns[vport_idx];
|
||||
+
|
||||
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) {
|
||||
+ prio = fs_create_prio(&root_ns->ns, i, 1);
|
||||
+ if (IS_ERR(prio))
|
||||
+ return PTR_ERR(prio);
|
||||
+ }
|
||||
+ set_prio_attrs(root_ns);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering,
|
||||
int vport_idx)
|
||||
{
|
||||
+ struct mlx5_flow_root_namespace *root_ns;
|
||||
struct fs_prio *prio;
|
||||
+ int i;
|
||||
|
||||
steering->rdma_transport_tx_root_ns[vport_idx] =
|
||||
create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX);
|
||||
if (!steering->rdma_transport_tx_root_ns[vport_idx])
|
||||
return -ENOMEM;
|
||||
|
||||
- /* create 1 prio*/
|
||||
- prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns,
|
||||
- MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
|
||||
- return PTR_ERR_OR_ZERO(prio);
|
||||
+ root_ns = steering->rdma_transport_tx_root_ns[vport_idx];
|
||||
+
|
||||
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) {
|
||||
+ prio = fs_create_prio(&root_ns->ns, i, 1);
|
||||
+ if (IS_ERR(prio))
|
||||
+ return PTR_ERR(prio);
|
||||
+ }
|
||||
+ set_prio_attrs(root_ns);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering)
|
||||
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
|
||||
index fb5f98fcc726..6ac76a0c3827 100644
|
||||
--- a/include/linux/mlx5/fs.h
|
||||
+++ b/include/linux/mlx5/fs.h
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
|
||||
|
||||
-#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0
|
||||
+#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 16
|
||||
#define MLX5_FS_MAX_POOL_SIZE BIT(30)
|
||||
|
||||
enum mlx5_flow_destination_type {
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
@ -0,0 +1,75 @@
|
||||
From a8ad8f1ee332d30aea6afc35f434dc416e5b574a Mon Sep 17 00:00:00 2001
|
||||
From: Kamal Heib <kheib@redhat.com>
|
||||
Date: Thu, 16 Apr 2026 18:03:18 -0400
|
||||
Subject: [PATCH] net/mlx5: Small refactor for general object capabilities
|
||||
|
||||
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
||||
|
||||
commit ebf8d47121b6ef3f38425a343a72f37c60fd6dbc
|
||||
Author: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Date: Thu Jun 19 14:37:17 2025 +0300
|
||||
|
||||
net/mlx5: Small refactor for general object capabilities
|
||||
|
||||
Make enum for capability bits of general object types depend on
|
||||
the type definitions themselves.
|
||||
|
||||
Make sure that capabilities in the [64,127] bit range are
|
||||
properly calculated (type id - 64).
|
||||
|
||||
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
|
||||
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
|
||||
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
||||
Link: https://patch.msgid.link/20250619113721.60201-2-mbloch@nvidia.com
|
||||
Signed-off-by: Leon Romanovsky <leon@kernel.org>
|
||||
|
||||
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
||||
|
||||
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
|
||||
index 9521159b0857..4077f0921039 100644
|
||||
--- a/include/linux/mlx5/mlx5_ifc.h
|
||||
+++ b/include/linux/mlx5/mlx5_ifc.h
|
||||
@@ -12500,17 +12500,6 @@ struct mlx5_ifc_affiliated_event_header_bits {
|
||||
u8 obj_id[0x20];
|
||||
};
|
||||
|
||||
-enum {
|
||||
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc),
|
||||
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13),
|
||||
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20),
|
||||
- MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24),
|
||||
-};
|
||||
-
|
||||
-enum {
|
||||
- MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13),
|
||||
-};
|
||||
-
|
||||
enum {
|
||||
MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
|
||||
MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
|
||||
@@ -12522,6 +12511,22 @@ enum {
|
||||
MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15,
|
||||
};
|
||||
|
||||
+enum {
|
||||
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY =
|
||||
+ BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY),
|
||||
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC =
|
||||
+ BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_IPSEC),
|
||||
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER =
|
||||
+ BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_SAMPLER),
|
||||
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO =
|
||||
+ BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO),
|
||||
+};
|
||||
+
|
||||
+enum {
|
||||
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL =
|
||||
+ BIT_ULL(MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL - 0x40),
|
||||
+};
|
||||
+
|
||||
enum {
|
||||
MLX5_IPSEC_OBJECT_ICV_LEN_16B,
|
||||
};
|
||||
--
|
||||
2.50.1 (Apple Git-155)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user