209 lines
8.1 KiB
Diff
209 lines
8.1 KiB
Diff
From 377d452a9758e2011a101d4919bb498a14e4075b Mon Sep 17 00:00:00 2001
|
|
From: Kamal Heib <kheib@redhat.com>
|
|
Date: Mon, 20 Apr 2026 15:08:58 -0400
|
|
Subject: [PATCH] net/mlx5: Serialize firmware reset with devlink
|
|
|
|
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
|
|
|
commit 367e501f8b095eca08d2eb0ba4ccea5b5e82c169
|
|
Author: Shay Drory <shayd@nvidia.com>
|
|
Date: Tue Dec 9 14:56:13 2025 +0200
|
|
|
|
net/mlx5: Serialize firmware reset with devlink
|
|
|
|
The firmware reset mechanism can be triggered by asynchronous events,
|
|
which may race with other devlink operations like devlink reload or
|
|
devlink dev eswitch set, potentially leading to inconsistent states.
|
|
|
|
This patch addresses the race by using the devl_lock to serialize the
|
|
firmware reset against other devlink operations. When a reset is
|
|
requested, the driver attempts to acquire the lock. If successful, it
|
|
sets a flag to block devlink reload or eswitch changes, ACKs the reset
|
|
to firmware and then releases the lock. If the lock is already held by
|
|
another operation, the driver NACKs the firmware reset request,
|
|
indicating that the reset cannot proceed.
|
|
|
|
Firmware reset does not keep the devl_lock and instead uses an internal
|
|
firmware reset bit. This is because firmware resets can be triggered by
|
|
asynchronous events, and processed in different threads. It is illegal
|
|
and unsafe to acquire a lock in one thread and attempt to release it in
|
|
another, as lock ownership is intrinsically thread-specific.
|
|
|
|
This change ensures that firmware resets and other devlink operations
|
|
are mutually exclusive during the critical reset request phase,
|
|
preventing race conditions.
|
|
|
|
Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
|
|
Signed-off-by: Shay Drory <shayd@nvidia.com>
|
|
Reviewed-by: Mateusz Berezecki <mberezecki@nvidia.com>
|
|
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
|
|
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
|
Link: https://patch.msgid.link/1765284977-1363052-6-git-send-email-tariqt@nvidia.com
|
|
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
|
|
|
|
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
|
index c204c707b850..9fb39f42a670 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
|
|
@@ -197,6 +197,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
|
|
struct pci_dev *pdev = dev->pdev;
|
|
int ret = 0;
|
|
|
|
+ if (mlx5_fw_reset_in_progress(dev)) {
|
|
+ NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset");
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
if (mlx5_dev_is_lightweight(dev)) {
|
|
if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
|
|
return -EOPNOTSUPP;
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
|
index 8ebca0d17f65..575b12079933 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
|
|
@@ -52,6 +52,7 @@
|
|
#include "devlink.h"
|
|
#include "lag/lag.h"
|
|
#include "en/tc/post_meter.h"
|
|
+#include "fw_reset.h"
|
|
|
|
/* There are two match-all miss flows, one for unicast dst mac and
|
|
* one for multicast.
|
|
@@ -3990,6 +3991,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
|
|
if (IS_ERR(esw))
|
|
return PTR_ERR(esw);
|
|
|
|
+ if (mlx5_fw_reset_in_progress(esw->dev)) {
|
|
+ NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset");
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
if (esw_mode_from_devlink(mode, &mlx5_mode))
|
|
return -EINVAL;
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
index 33df0418e575..4544f1968f73 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
@@ -15,6 +15,7 @@ enum {
|
|
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
|
|
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED,
|
|
MLX5_FW_RESET_FLAGS_UNLOAD_EVENT,
|
|
+ MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS,
|
|
};
|
|
|
|
struct mlx5_fw_reset {
|
|
@@ -127,6 +128,16 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
|
|
return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
|
|
}
|
|
|
|
+bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev)
|
|
+{
|
|
+ struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
|
|
+
|
|
+ if (!fw_reset)
|
|
+ return false;
|
|
+
|
|
+ return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
|
|
+}
|
|
+
|
|
static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
|
|
u8 *reset_method)
|
|
{
|
|
@@ -242,6 +253,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
|
|
BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
|
|
devl_unlock(devlink);
|
|
}
|
|
+
|
|
+ clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
|
|
}
|
|
|
|
static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
|
|
@@ -461,27 +474,48 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
|
|
struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
|
|
reset_request_work);
|
|
struct mlx5_core_dev *dev = fw_reset->dev;
|
|
+ bool nack_request = false;
|
|
+ struct devlink *devlink;
|
|
int err;
|
|
|
|
err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
|
|
- if (err)
|
|
+ if (err) {
|
|
+ nack_request = true;
|
|
mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
|
|
+ } else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) ||
|
|
+ test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
|
|
+ &fw_reset->reset_flags)) {
|
|
+ nack_request = true;
|
|
+ }
|
|
|
|
- if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
|
|
- !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) {
|
|
+ devlink = priv_to_devlink(dev);
|
|
+ /* For external resets, try to acquire devl_lock. Skip if devlink reset is
|
|
+ * pending (lock already held)
|
|
+ */
|
|
+ if (nack_request ||
|
|
+ (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP,
|
|
+ &fw_reset->reset_flags) &&
|
|
+ !devl_trylock(devlink))) {
|
|
err = mlx5_fw_reset_set_reset_sync_nack(dev);
|
|
mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
|
|
err ? "Failed" : "Sent");
|
|
return;
|
|
}
|
|
+
|
|
if (mlx5_sync_reset_set_reset_requested(dev))
|
|
- return;
|
|
+ goto unlock;
|
|
+
|
|
+ set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
|
|
|
|
err = mlx5_fw_reset_set_reset_sync_ack(dev);
|
|
if (err)
|
|
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
|
|
else
|
|
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
|
|
+
|
|
+unlock:
|
|
+ if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
|
|
+ devl_unlock(devlink);
|
|
}
|
|
|
|
static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
|
|
@@ -721,6 +755,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
|
|
|
|
if (mlx5_sync_reset_clear_reset_requested(dev, true))
|
|
return;
|
|
+
|
|
+ clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
|
|
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
|
|
}
|
|
|
|
@@ -757,6 +793,7 @@ static void mlx5_sync_reset_timeout_work(struct work_struct *work)
|
|
|
|
if (mlx5_sync_reset_clear_reset_requested(dev, true))
|
|
return;
|
|
+ clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags);
|
|
mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
|
|
index d5b28525c960..2d96b2adc1cd 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
|
|
@@ -10,6 +10,7 @@ int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_ty
|
|
int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
|
|
struct netlink_ext_ack *extack);
|
|
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
|
|
+bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev);
|
|
|
|
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
|
|
void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked);
|
|
--
|
|
2.50.1 (Apple Git-155)
|
|
|