summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c77
-rw-r--r--drivers/misc/habanalabs/common/device.c19
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c65
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h5
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c1
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c9
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c5
-rw-r--r--drivers/misc/habanalabs/common/memory.c10
-rw-r--r--drivers/misc/habanalabs/common/mmu.c6
-rw-r--r--drivers/misc/habanalabs/common/mmu_v1.c12
-rw-r--r--drivers/misc/habanalabs/common/pci.c28
11 files changed, 177 insertions, 60 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index beb482310a58..b2b3d2b0f808 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cntr = &hdev->aggregated_cs_counters;
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
- if (!cs)
+ if (!cs) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
return -ENOMEM;
+ }
cs->ctx = ctx;
cs->submitted = false;
@@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
if (!cs_cmpl) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_cs;
}
@@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
if (!cs->jobs_in_queue_cnt) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_fence;
}
@@ -562,7 +569,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]);
- /* Make sure we don't have leftovers in the H/W queues mirror list */
+ /* Make sure we don't have leftovers in the CS mirror list */
list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
cs_get(cs);
cs->aborted = true;
@@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
static int hl_cs_copy_chunk_array(struct hl_device *hdev,
struct hl_cs_chunk **cs_chunk_array,
- void __user *chunks, u32 num_chunks)
+ void __user *chunks, u32 num_chunks,
+ struct hl_ctx *ctx)
{
u32 size_to_copy;
if (num_chunks > HL_MAX_JOBS_PER_CS) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Number of chunks can NOT be larger than %d\n",
HL_MAX_JOBS_PER_CS);
@@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
GFP_ATOMIC);
- if (!*cs_chunk_array)
+ if (!*cs_chunk_array) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
return -ENOMEM;
+ }
size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
kfree(*cs_chunk_array);
return -EFAULT;
@@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_chunk *cs_chunk_array;
struct hl_cs_counters_atomic *cntr;
+ struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
@@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
cntr = &hdev->aggregated_cs_counters;
*cs_seq = ULLONG_MAX;
- rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+ hpriv->ctx);
if (rc)
goto out;
@@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
if (rc) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
goto free_cs_object;
}
@@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
atomic64_inc(
- &hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ &ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
goto free_cs_object;
}
@@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
- atomic64_inc(
- &hpriv->ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
@@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = cs_parser(hpriv, job);
if (rc) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.parsing_drop_cnt);
atomic64_inc(&cntr->parsing_drop_cnt);
dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
@@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
if (int_queues_only) {
- atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt);
- atomic64_inc(&cntr->parsing_drop_cnt);
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence);
@@ -1042,7 +1058,7 @@ out:
}
static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
- struct hl_cs_chunk *chunk, u64 *signal_seq)
+ struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
{
u64 *signal_seq_arr = NULL;
u32 size_to_copy, signal_seq_arr_len;
@@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
/* currently only one signal seq is supported */
if (signal_seq_arr_len != 1) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Wait for signal CS supports only one signal CS seq\n");
return -EINVAL;
@@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
signal_seq_arr = kmalloc_array(signal_seq_arr_len,
sizeof(*signal_seq_arr),
GFP_ATOMIC);
- if (!signal_seq_arr)
+ if (!signal_seq_arr) {
+ atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
return -ENOMEM;
+ }
size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
if (copy_from_user(signal_seq_arr,
u64_to_user_ptr(chunk->signal_seq_arr),
size_to_copy)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt);
dev_err(hdev->dev,
"Failed to copy signal seq array from user\n");
rc = -EFAULT;
@@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_device *hdev = hpriv->hdev;
struct hl_cs_compl *sig_waitcs_cmpl;
u32 q_idx, collective_engine_id = 0;
+ struct hl_cs_counters_atomic *cntr;
struct hl_fence *sig_fence = NULL;
struct hl_ctx *ctx = hpriv->ctx;
enum hl_queue_type q_type;
@@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
u64 signal_seq;
int rc;
+ cntr = &hdev->aggregated_cs_counters;
*cs_seq = ULLONG_MAX;
- rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+ rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+ ctx);
if (rc)
goto out;
@@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
chunk = &cs_chunk_array[0];
if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
rc = -EINVAL;
@@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
q_type = hw_queue_prop->type;
if (!hw_queue_prop->supports_sync_stream) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d does not support sync stream operations\n",
q_idx);
@@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
@@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
}
if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
- rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq);
+ rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
if (rc)
goto free_cs_chunk_array;
sig_fence = hl_ctx_get_fence(ctx, signal_seq);
if (IS_ERR(sig_fence)) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"Failed to get signal CS with seq 0x%llx\n",
signal_seq);
@@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
container_of(sig_fence, struct hl_cs_compl, base_fence);
if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n",
signal_seq);
@@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
cs, q_idx, collective_engine_id);
- else
+ else {
+ atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+ atomic64_inc(&cntr->validation_drop_cnt);
rc = -EINVAL;
+ }
if (rc)
goto free_cs_object;
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5871162a8442..69d04eca767f 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
- if (hdev->disabled)
- status = HL_DEVICE_STATUS_MALFUNCTION;
- else if (atomic_read(&hdev->in_reset))
+ if (atomic_read(&hdev->in_reset))
status = HL_DEVICE_STATUS_IN_RESET;
else if (hdev->needs_reset)
status = HL_DEVICE_STATUS_NEEDS_RESET;
+ else if (hdev->disabled)
+ status = HL_DEVICE_STATUS_MALFUNCTION;
else
status = HL_DEVICE_STATUS_OPERATIONAL;
@@ -1037,7 +1037,7 @@ kill_processes:
if (hard_reset) {
/* Release kernel context */
- if (hl_ctx_put(hdev->kernel_ctx) == 1)
+ if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
hdev->kernel_ctx = NULL;
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
@@ -1092,6 +1092,7 @@ kill_processes:
GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
+ hl_mmu_fini(hdev);
goto out_err;
}
@@ -1103,6 +1104,7 @@ kill_processes:
"failed to init kernel ctx in hard reset\n");
kfree(hdev->kernel_ctx);
hdev->kernel_ctx = NULL;
+ hl_mmu_fini(hdev);
goto out_err;
}
}
@@ -1485,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev)
}
}
+ /* Disable PCI access from device F/W so it won't send us additional
+ * interrupts. We disable MSI/MSI-X at the halt_engines function and we
+ * can't have the F/W sending us interrupts after that. We need to
+ * disable the access here because if the device is marked disable, the
+ * message won't be send. Also, in case of heartbeat, the device CPU is
+ * marked as disable so this message won't be sent
+ */
+ hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+
/* Mark device as disabled */
hdev->disabled = true;
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 0e1c629e9800..c9a12980218a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
}
counters->rx_throughput = result;
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
+ CPUCP_PKT_CTL_OPCODE_SHIFT);
+
/* Fetch PCI tx counter */
pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
counters->tx_throughput = result;
/* Fetch PCI replay counter */
+ memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
@@ -627,25 +632,38 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
security_status = RREG32(cpu_security_boot_status_reg);
/* We read security status multiple times during boot:
- * 1. preboot - we check if fw security feature is supported
- * 2. boot cpu - we get boot cpu security status
- * 3. FW application - we get FW application security status
+ * 1. preboot - a. Check whether the security status bits are valid
+ * b. Check whether fw security is enabled
+ * c. Check whether hard reset is done by preboot
+ * 2. boot cpu - a. Fetch boot cpu security status
+ * b. Check whether hard reset is done by boot cpu
+ * 3. FW application - a. Fetch fw application security status
+ * b. Check whether hard reset is done by fw app
*
* Preboot:
* Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
* check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
*/
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
- hdev->asic_prop.fw_security_status_valid = 1;
- prop->fw_security_disabled =
- !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN);
+ prop->fw_security_status_valid = 1;
+
+ if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
+ prop->fw_security_disabled = false;
+ else
+ prop->fw_security_disabled = true;
+
+ if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+ prop->hard_reset_done_by_fw = true;
} else {
- hdev->asic_prop.fw_security_status_valid = 0;
+ prop->fw_security_status_valid = 0;
prop->fw_security_disabled = true;
}
+ dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
dev_info(hdev->dev, "firmware-level security is %s\n",
- prop->fw_security_disabled ? "disabled" : "enabled");
+ prop->fw_security_disabled ? "disabled" : "enabled");
return 0;
}
@@ -655,6 +673,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 status;
int rc;
@@ -723,11 +742,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
/* Read U-Boot version now in case we will later fail */
hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ /* Clear reset status since we need to read it again from boot CPU */
+ prop->hard_reset_done_by_fw = false;
+
/* Read boot_cpu security bits */
- if (hdev->asic_prop.fw_security_status_valid)
- hdev->asic_prop.fw_boot_cpu_security_map =
+ if (prop->fw_security_status_valid) {
+ prop->fw_boot_cpu_security_map =
RREG32(cpu_security_boot_status_reg);
+ if (prop->fw_boot_cpu_security_map &
+ CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
+ prop->hard_reset_done_by_fw = true;
+ }
+
+ dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
+
if (rc) {
detect_cpu_boot_status(hdev, status);
rc = -EIO;
@@ -796,18 +826,21 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
goto out;
}
+ /* Clear reset status since we need to read again from app */
+ prop->hard_reset_done_by_fw = false;
+
/* Read FW application security bits */
- if (hdev->asic_prop.fw_security_status_valid) {
- hdev->asic_prop.fw_app_security_map =
+ if (prop->fw_security_status_valid) {
+ prop->fw_app_security_map =
RREG32(cpu_security_boot_status_reg);
- if (hdev->asic_prop.fw_app_security_map &
+ if (prop->fw_app_security_map &
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
- hdev->asic_prop.hard_reset_done_by_fw = true;
+ prop->hard_reset_done_by_fw = true;
}
- dev_dbg(hdev->dev, "Firmware hard-reset is %s\n",
- hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled");
+ dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
+ prop->hard_reset_done_by_fw ? "enabled" : "disabled");
dev_info(hdev->dev, "Successfully loaded firmware to device\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 571eda6ef5ab..60e16dc4bcac 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -944,7 +944,7 @@ struct hl_asic_funcs {
u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev);
u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
- u32 size);
+ u32 size, bool eb);
u32 (*gen_wait_cb)(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data);
@@ -1000,6 +1000,7 @@ struct hl_va_range {
* @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ * @validation_drop_cnt: dropped due to error in validation
*/
struct hl_cs_counters_atomic {
atomic64_t out_of_mem_drop_cnt;
@@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic {
atomic64_t queue_full_drop_cnt;
atomic64_t device_in_reset_drop_cnt;
atomic64_t max_cs_in_flight_drop_cnt;
+ atomic64_t validation_drop_cnt;
};
/**
@@ -2180,6 +2182,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops);
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 6bbb6bca6860..032d114f01ea 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = {
.id_table = ids,
.probe = hl_pci_probe,
.remove = hl_pci_remove,
+ .shutdown = hl_pci_remove,
.driver.pm = &hl_pm_ops,
.err_handler = &hl_pci_err_handler,
};
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 32e6af1db4e3..d25892d61ec9 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask_ext, NULL);
+ hw_idle.busy_engines_mask =
+ lower_32_bits(hw_idle.busy_engines_mask_ext);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -335,6 +337,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
atomic64_read(&cntr->device_in_reset_drop_cnt);
cs_counters.total_max_cs_in_flight_drop_cnt =
atomic64_read(&cntr->max_cs_in_flight_drop_cnt);
+ cs_counters.total_validation_drop_cnt =
+ atomic64_read(&cntr->validation_drop_cnt);
if (hpriv->ctx) {
cs_counters.ctx_out_of_mem_drop_cnt =
@@ -352,6 +356,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
cs_counters.ctx_max_cs_in_flight_drop_cnt =
atomic64_read(
&hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt);
+ cs_counters.ctx_validation_drop_cnt =
+ atomic64_read(
+ &hpriv->ctx->cs_counters.validation_drop_cnt);
}
return copy_to_user(out, &cs_counters,
@@ -406,7 +413,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
- struct hl_pll_frequency_info freq_info = {0};
+ struct hl_pll_frequency_info freq_info = { {0} };
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 7caf868d1585..76217258780a 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev,
"generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+ /* we set an EB since we must make sure all oeprations are done
+ * when sending the signal
+ */
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id, 0);
+ cs_cmpl->hw_sob->sob_id, 0, true);
kref_get(&hw_sob->kref);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index cbe9da4e0211..5d4fbdcaefe3 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -886,8 +886,10 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
{
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr, i;
+ bool is_host_addr;
u32 page_size;
+ is_host_addr = !hl_is_dram_va(hdev, vaddr);
page_size = phys_pg_pack->page_size;
next_vaddr = vaddr;
@@ -900,9 +902,13 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
/*
* unmapping on Palladium can be really long, so avoid a CPU
* soft lockup bug by sleeping a little between unmapping pages
+ *
+ * In addition, when unmapping host memory we pass through
+ * the Linux kernel to unpin the pages and that takes a long
+ * time. Therefore, sleep every 32K pages to avoid soft lockup
*/
- if (hdev->pldm)
- usleep_range(500, 1000);
+ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+ usleep_range(50, 200);
}
}
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index 33ae953d3a36..28a4638741d8 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -9,7 +9,7 @@
#include "habanalabs.h"
-static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -156,7 +156,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = is_dram_va(hdev, virt_addr);
+ is_dram_addr = hl_is_dram_va(hdev, virt_addr);
if (is_dram_addr)
mmu_prop = &prop->dmmu;
@@ -236,7 +236,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (!hdev->mmu_enable)
return 0;
- is_dram_addr = is_dram_va(hdev, virt_addr);
+ is_dram_addr = hl_is_dram_va(hdev, virt_addr);
if (is_dram_addr)
mmu_prop = &prop->dmmu;
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c
index 2ce6ea89d4fa..06d8a44dd5d4 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu_v1.c
@@ -467,8 +467,16 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
{
/* MMU H/W fini was already done in device hw_fini() */
- kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+ if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+ }
+
+ /* Make sure that if we arrive here again without init was called we
+ * won't cause kernel panic. This can happen for example if we fail
+ * during hard reset code at certain points
+ */
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
}
/**
diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c
index 923b2606e29f..b4725e6101f6 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
return 0;
- if (val & PCI_CONFIG_ELBI_STS_ERR) {
- dev_err(hdev->dev, "Error writing to ELBI\n");
+ if (val & PCI_CONFIG_ELBI_STS_ERR)
return -EIO;
- }
if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
dev_err(hdev->dev, "ELBI write didn't finish in time\n");
@@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
dbi_offset = addr & 0xFFF;
- rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+ /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+
+ rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
data);
if (rc)
@@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+ /* Return the DBI window to the default location
+ * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
if (rc)
dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
@@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
/* Enable */
rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+ /* Return the DBI window to the default location
+ * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail
+ * in case the firmware security is enabled
+ */
+ hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
return rc;
}