127 lines
4.8 KiB
Diff
127 lines
4.8 KiB
Diff
From 87f9d3bc475e376926bcd033f4d3779713a2f0e3 Mon Sep 17 00:00:00 2001
|
|
From: Kamal Heib <kheib@redhat.com>
|
|
Date: Fri, 17 Apr 2026 11:27:00 -0400
|
|
Subject: [PATCH] net/mlx5: HWS, fix complex rules rehash error flow
|
|
|
|
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
|
|
|
commit 4a842b1bf18a32ee0c25dd6dd98728b786a76fe4
|
|
Author: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
|
Date: Sun Aug 17 23:23:19 2025 +0300
|
|
|
|
net/mlx5: HWS, fix complex rules rehash error flow
|
|
|
|
Moving rules from matcher to matcher should not fail.
|
|
However, if it does fail due to various reasons, the error flow
|
|
should allow the kernel to continue functioning (albeit with broken
|
|
steering rules) instead of going into series of soft lock-ups or
|
|
some other problematic behaviour.
|
|
|
|
Similar to the simple rules, complex rules rehash logic suffers
|
|
from the same problems. This patch fixes the error flow for moving
|
|
complex rules:
|
|
- If new rule creation fails before it was even enqeued, do not
|
|
poll for completion
|
|
- If TIMEOUT happened while moving the rule, no point trying
|
|
to poll for completions for other rules. Something is broken,
|
|
completion won't come, just abort the rehash sequence.
|
|
- If some other completion with error received, don't give up.
|
|
Continue handling rest of the rules to minimize the damage.
|
|
- Make sure that the first error code that was received will
|
|
be actually returned to the caller instead of replacing it
|
|
with the generic error code.
|
|
|
|
All the aforementioned issues stem from the same bad error flow,
|
|
so no point fixing them one by one and leaving partially broken
|
|
code - fixing them in one patch.
|
|
|
|
Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers")
|
|
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
|
|
Reviewed-by: Vlad Dogaru <vdogaru@nvidia.com>
|
|
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
|
|
Link: https://patch.msgid.link/20250817202323.308604-4-mbloch@nvidia.com
|
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
|
|
|
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
|
index ca7501c57468..14e79579c719 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c
|
|
@@ -1328,11 +1328,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
|
|
{
|
|
struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx;
|
|
struct mlx5hws_matcher *matcher = bwc_matcher->matcher;
|
|
- bool move_error = false, poll_error = false;
|
|
u16 bwc_queues = mlx5hws_bwc_queues(ctx);
|
|
struct mlx5hws_bwc_rule *tmp_bwc_rule;
|
|
struct mlx5hws_rule_attr rule_attr;
|
|
struct mlx5hws_table *isolated_tbl;
|
|
+ int move_error = 0, poll_error = 0;
|
|
struct mlx5hws_rule *tmp_rule;
|
|
struct list_head *rules_list;
|
|
u32 expected_completions = 1;
|
|
@@ -1391,11 +1391,15 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
|
|
ret = mlx5hws_matcher_resize_rule_move(matcher,
|
|
tmp_rule,
|
|
&rule_attr);
|
|
- if (unlikely(ret && !move_error)) {
|
|
- mlx5hws_err(ctx,
|
|
- "Moving complex BWC rule failed (%d), attempting to move rest of the rules\n",
|
|
- ret);
|
|
- move_error = true;
|
|
+ if (unlikely(ret)) {
|
|
+ if (!move_error) {
|
|
+ mlx5hws_err(ctx,
|
|
+ "Moving complex BWC rule: move failed (%d), attempting to move rest of the rules\n",
|
|
+ ret);
|
|
+ move_error = ret;
|
|
+ }
|
|
+ /* Rule wasn't queued, no need to poll */
|
|
+ continue;
|
|
}
|
|
|
|
expected_completions = 1;
|
|
@@ -1403,11 +1407,19 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
|
|
rule_attr.queue_id,
|
|
&expected_completions,
|
|
true);
|
|
- if (unlikely(ret && !poll_error)) {
|
|
- mlx5hws_err(ctx,
|
|
- "Moving complex BWC rule: poll failed (%d), attempting to move rest of the rules\n",
|
|
- ret);
|
|
- poll_error = true;
|
|
+ if (unlikely(ret)) {
|
|
+ if (ret == -ETIMEDOUT) {
|
|
+ mlx5hws_err(ctx,
|
|
+ "Moving complex BWC rule: timeout polling for completions (%d), aborting rehash\n",
|
|
+ ret);
|
|
+ return ret;
|
|
+ }
|
|
+ if (!poll_error) {
|
|
+ mlx5hws_err(ctx,
|
|
+ "Moving complex BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n",
|
|
+ ret);
|
|
+ poll_error = ret;
|
|
+ }
|
|
}
|
|
|
|
/* Done moving the rule to the new matcher,
|
|
@@ -1422,8 +1434,11 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher)
|
|
}
|
|
}
|
|
|
|
- if (move_error || poll_error)
|
|
- ret = -EINVAL;
|
|
+ /* Return the first error that happened */
|
|
+ if (unlikely(move_error))
|
|
+ return move_error;
|
|
+ if (unlikely(poll_error))
|
|
+ return poll_error;
|
|
|
|
return ret;
|
|
}
|
|
--
|
|
2.50.1 (Apple Git-155)
|
|
|