summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
authorOded Gabbay <ogabbay@kernel.org>2021-04-08 08:38:32 +0200
committerOded Gabbay <ogabbay@kernel.org>2021-04-09 13:10:32 +0200
commitb575a7673e3d0396992fc72fce850723d39264e3 (patch)
treec7b0a84fcbb782bf16c3d1ad14fbcbcc516a6081 /drivers/misc/habanalabs
parenthabanalabs: update to latest F/W communication header (diff)
downloadlinux-b575a7673e3d0396992fc72fce850723d39264e3.tar.xz
linux-b575a7673e3d0396992fc72fce850723d39264e3.zip
habanalabs: print f/w boot unknown error
We need to print a message to the kernel log in case we encounter an unknown error in the f/w boot to help the user understand what happened. In addition, we shouldn't print unknown error in case of known errors. Moreover, in case of warnings/info, we shouldn't return -EIO that will fail the initialization and mark the device as disabled Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c84
1 files changed, 68 insertions, 16 deletions
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 652571d3b8e6..832dd5c5bb06 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -293,6 +293,7 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
u32 cpu_security_boot_status_reg)
{
u32 err_val, security_val;
+ bool err_exists = false;
/* Some of the firmware status codes are deprecated in newer f/w
* versions. In those versions, the errors are reported
@@ -307,51 +308,102 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
if (!(err_val & CPU_BOOT_ERR0_ENABLED))
return 0;
- if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - DRAM initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
- if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
dev_err(hdev->dev,
"Device boot error - Thermal Sensor initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
+ }
if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
- if (hdev->bmc_enable)
- dev_warn(hdev->dev,
+ if (hdev->bmc_enable) {
+ dev_err(hdev->dev,
"Device boot error - Skipped waiting for BMC\n");
- else
+ err_exists = true;
+ } else {
+ dev_info(hdev->dev,
+ "Device boot message - Skipped waiting for BMC\n");
+ /* This is an info so we don't want it to disable the
+ * device
+ */
err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+ }
}
- if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not available\n");
- if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
dev_err(hdev->dev,
"Device boot error - NIC F/W initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
dev_warn(hdev->dev,
"Device boot warning - security not ready\n");
- if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
dev_err(hdev->dev, "Device boot error - security failure\n");
- if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
- if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
dev_err(hdev->dev, "Device boot error - PLL failure\n");
- if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL)
+ err_exists = true;
+ }
+
+ if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
dev_err(hdev->dev,
- "Device boot error - device unusable failure\n");
+ "Device boot error - device unusable\n");
+ err_exists = true;
+ }
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
- if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+ if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
+ dev_err(hdev->dev,
+ "Device boot error - unknown error 0x%08x\n",
+ err_val);
+ err_exists = true;
+ }
+
+ if (err_exists)
return -EIO;
return 0;