summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-01-10 01:25:14 +0100
committerJakub Kicinski <kuba@kernel.org>2021-01-10 01:25:14 +0100
commit26c49f0d108fdc3645e75611f8148f3dd9809d6e (patch)
tree591f344b6d1cf6aa0ff76f0878e777db3a30f3e0
parenttipc: fix NULL deref in tipc_link_xmit() (diff)
parentmlxsw: core: Increase critical threshold for ASIC thermal zone (diff)
downloadlinux-26c49f0d108fdc3645e75611f8148f3dd9809d6e.tar.xz
linux-26c49f0d108fdc3645e75611f8148f3dd9809d6e.zip
Merge branch 'mlxsw-core-thermal-control-fixes'
Ido Schimmel says: ==================== mlxsw: core: Thermal control fixes This series includes two fixes for thermal control in mlxsw. Patch #1 validates that the alarm temperature threshold read from a transceiver is above the warning temperature threshold. If not, the current thresholds are maintained. It was observed that some transceiver might be unreliable and sometimes report a too low alarm temperature threshold which would result in thermal shutdown of the system. Patch #2 increases the temperature threshold above which thermal shutdown is triggered for the ASIC thermal zone. It is currently too low and might result in thermal shutdown under perfectly fine operational conditions. ==================== Link: https://lore.kernel.org/r/20210108145210.1229820-1-idosch@idosch.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 8fa286ccdd6b..bf85ce9835d7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -19,7 +19,7 @@
#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */
#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */
#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */
-#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */
+#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
#define MLXSW_THERMAL_ZONE_MAX_NAME 16
@@ -176,6 +176,12 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
if (err)
return err;
+ if (crit_temp > emerg_temp) {
+ dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
+ tz->tzdev->type, crit_temp, emerg_temp);
+ return 0;
+ }
+
/* According to the system thermal requirements, the thermal zones are
* defined with four trip points. The critical and emergency
* temperature thresholds, provided by QSFP module are set as "active"
@@ -190,11 +196,8 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
- if (emerg_temp > crit_temp)
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
+ tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
MLXSW_THERMAL_MODULE_TEMP_SHIFT;
- else
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp;
return 0;
}