100 lines
3.7 KiB
Diff
100 lines
3.7 KiB
Diff
From 4f09ee86bf6f20ac7470200bfb81f70ef9f10779 Mon Sep 17 00:00:00 2001
|
|
From: Kamal Heib <kheib@redhat.com>
|
|
Date: Sun, 19 Apr 2026 18:24:30 -0400
|
|
Subject: [PATCH] net/mlx5: fw reset, add reset timeout work
|
|
|
|
JIRA: https://redhat.atlassian.net/browse/RHEL-169055
|
|
|
|
commit 5cfbe7ebfa42fd3c517a701dab5bd73524da9088
|
|
Author: Moshe Shemesh <moshe@nvidia.com>
|
|
Date: Mon Sep 29 00:02:09 2025 +0300
|
|
|
|
net/mlx5: fw reset, add reset timeout work
|
|
|
|
Add sync reset timeout to stop poll_sync_reset in case there was no
|
|
reset done or abort event within timeout. Otherwise poll sync reset will
|
|
just continue and in case of fw fatal error no health reporting will be
|
|
done.
|
|
|
|
Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event")
|
|
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
|
|
Reviewed-by: Shay Drori <shayd@nvidia.com>
|
|
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
|
|
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
|
|
|
|
Signed-off-by: Kamal Heib <kheib@redhat.com>
|
|
|
|
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
index 22995131824a..89e399606877 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
|
|
@@ -27,6 +27,7 @@ struct mlx5_fw_reset {
|
|
struct work_struct reset_reload_work;
|
|
struct work_struct reset_now_work;
|
|
struct work_struct reset_abort_work;
|
|
+ struct delayed_work reset_timeout_work;
|
|
unsigned long reset_flags;
|
|
u8 reset_method;
|
|
struct timer_list timer;
|
|
@@ -259,6 +260,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool
|
|
return -EALREADY;
|
|
}
|
|
|
|
+ if (current_work() != &fw_reset->reset_timeout_work.work)
|
|
+ cancel_delayed_work(&fw_reset->reset_timeout_work);
|
|
mlx5_stop_sync_reset_poll(dev);
|
|
if (poll_health)
|
|
mlx5_start_health_poll(dev);
|
|
@@ -330,6 +333,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
|
|
}
|
|
mlx5_stop_health_poll(dev, true);
|
|
mlx5_start_sync_reset_poll(dev);
|
|
+
|
|
+ if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
|
|
+ &fw_reset->reset_flags))
|
|
+ schedule_delayed_work(&fw_reset->reset_timeout_work,
|
|
+ msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)));
|
|
return 0;
|
|
}
|
|
|
|
@@ -739,6 +747,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
|
|
}
|
|
}
|
|
|
|
+static void mlx5_sync_reset_timeout_work(struct work_struct *work)
|
|
+{
|
|
+ struct delayed_work *dwork = container_of(work, struct delayed_work,
|
|
+ work);
|
|
+ struct mlx5_fw_reset *fw_reset =
|
|
+ container_of(dwork, struct mlx5_fw_reset, reset_timeout_work);
|
|
+ struct mlx5_core_dev *dev = fw_reset->dev;
|
|
+
|
|
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
|
|
+ return;
|
|
+ mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
|
|
+}
|
|
+
|
|
static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
|
|
{
|
|
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
|
|
@@ -822,6 +843,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
|
|
cancel_work_sync(&fw_reset->reset_reload_work);
|
|
cancel_work_sync(&fw_reset->reset_now_work);
|
|
cancel_work_sync(&fw_reset->reset_abort_work);
|
|
+ cancel_delayed_work(&fw_reset->reset_timeout_work);
|
|
}
|
|
|
|
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
|
|
@@ -865,6 +887,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
|
|
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
|
|
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
|
|
INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
|
|
+ INIT_DELAYED_WORK(&fw_reset->reset_timeout_work,
|
|
+ mlx5_sync_reset_timeout_work);
|
|
|
|
init_completion(&fw_reset->done);
|
|
return 0;
|
|
--
|
|
2.50.1 (Apple Git-155)
|
|
|