summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c7
-rw-r--r--drivers/misc/habanalabs/gaudi2/gaudi2.c7
2 files changed, 8 insertions, 6 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9f5e208701ba..ae78f838f987 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7584,7 +7584,7 @@ static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
}
-static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
+static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
ktime_t zero_time = ktime_set(0, 0);
@@ -7612,6 +7612,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
@@ -7619,6 +7620,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -7887,8 +7889,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
break;
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
- event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
- gaudi_print_clk_change_info(hdev, event_type);
+ gaudi_print_clk_change_info(hdev, event_type, &event_mask);
hl_fw_unmask_irq(hdev, event_type);
break;
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index e793fb2bdcbe..c14b3bb16f96 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8603,7 +8603,7 @@ static void gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data
hbm_mc_spi[i].cause);
}
-static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
+static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
ktime_t zero_time = ktime_set(0, 0);
@@ -8629,12 +8629,14 @@ static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type)
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
break;
case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
break;
@@ -9085,8 +9087,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
- gaudi2_print_clk_change_info(hdev, event_type);
- event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
break;
case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: