summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet
diff options
context:
space:
mode:
authorMoshe Shemesh <moshe@mellanox.com>2019-08-13 11:49:13 +0200
committerSaeed Mahameed <saeedm@mellanox.com>2019-08-22 22:38:47 +0200
commita6633e11e8732b9c000774746a2c1827a7e3c316 (patch)
tree88633cef3109fff24a2c90bb430d0c35af1c545c /drivers/net/ethernet
parentnet/mlx5: Fix crdump chunks print (diff)
downloadlinux-a6633e11e8732b9c000774746a2c1827a7e3c316.tar.xz
linux-a6633e11e8732b9c000774746a2c1827a7e3c316.zip
net/mlx5: Fix delay in fw fatal report handling due to fw report
When fw fatal error occurs, poll health() first detects and reports on a fw error. Afterwards, it detects and reports on the fw fatal error itself. That can cause a long delay in fw fatal error handling which waits in a queue for the fw error handling to be finished. The fw error handle will try asking for fw core dump command while fw in fatal state may not respond and driver will wait for command timeout. Changing the flow to detect and handle first fw fatal errors and only if no fatal error detected look for a fw error to handle. Fixes: d1bf0e2cc4a6 ("net/mlx5: Report devlink health on FW issues") Signed-off-by: Moshe Shemesh <moshe@mellanox.com> Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c19
1 files changed, 10 insertions, 9 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index cc5887f52679..d685122d9ff7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -701,6 +701,16 @@ static void poll_health(struct timer_list *t)
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto out;
+ fatal_error = check_fatal_sensors(dev);
+
+ if (fatal_error && !health->fatal_error) {
+ mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
+ dev->priv.health.fatal_error = fatal_error;
+ print_health_info(dev);
+ mlx5_trigger_health_work(dev);
+ goto out;
+ }
+
count = ioread32be(health->health_counter);
if (count == health->prev)
++health->miss_counter;
@@ -719,15 +729,6 @@ static void poll_health(struct timer_list *t)
if (health->synd && health->synd != prev_synd)
queue_work(health->wq, &health->report_work);
- fatal_error = check_fatal_sensors(dev);
-
- if (fatal_error && !health->fatal_error) {
- mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
- dev->priv.health.fatal_error = fatal_error;
- print_health_info(dev);
- mlx5_trigger_health_work(dev);
- }
-
out:
mod_timer(&health->timer, get_next_poll_jiffies());
}