summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/gaudi
diff options
context:
space:
mode:
authorOfir Bitton <obitton@habana.ai>2022-11-24 10:01:44 +0100
committerOded Gabbay <ogabbay@kernel.org>2023-01-26 09:56:20 +0100
commit6710444cfeb533f8c7817c7ec1adfb52206a6d00 (patch)
treeb959ad24dba7eecbbc4ead7c91926d384bfed629 /drivers/misc/habanalabs/gaudi
parenthabanalabs: abort waiting user threads upon error (diff)
downloadlinux-6710444cfeb533f8c7817c7ec1adfb52206a6d00.tar.xz
linux-6710444cfeb533f8c7817c7ec1adfb52206a6d00.zip
habanalabs: don't notify user about clk throttling due to power
As clock throttling due to high power consumption can happen very frequently and there is no real reason to notify the user about it, we skip this notification in all asics. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi')
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c7
1 files changed, 4 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9f5e208701ba..ae78f838f987 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7584,7 +7584,7 @@ static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
}
-static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
+static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
ktime_t zero_time = ktime_set(0, 0);
@@ -7612,6 +7612,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev,
"Clock throttling due to overheating\n");
break;
@@ -7619,6 +7620,7 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
case GAUDI_EVENT_FIX_THERMAL_ENV_E:
hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
dev_info_ratelimited(hdev->dev,
"Thermal envelop is safe, back to optimal clock\n");
break;
@@ -7887,8 +7889,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
break;
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
- event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
- gaudi_print_clk_change_info(hdev, event_type);
+ gaudi_print_clk_change_info(hdev, event_type, &event_mask);
hl_fw_unmask_irq(hdev, event_type);
break;