summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-07-08 16:02:13 +0200
committerJens Axboe <axboe@kernel.dk>2020-07-08 16:02:13 +0200
commit482c6b614a4750f71ed9c928bb5b2007a05dd694 (patch)
tree60f9140b0c24fb4b2b1059afcce8ac77ecc3d3c1 /drivers/misc/habanalabs
parentblock: remove a bogus warning in __submit_bio_noacct_mq (diff)
parentLinux 5.8-rc4 (diff)
downloadlinux-482c6b614a4750f71ed9c928bb5b2007a05dd694.tar.xz
linux-482c6b614a4750f71ed9c928bb5b2007a05dd694.zip
Merge tag 'v5.8-rc4' into for-5.9/drivers
Merge in 5.8-rc4 for-5.9/block to setup for-5.9/drivers, to provide a clean base and making the life for the NVMe changes easier. Signed-off-by: Jens Axboe <axboe@kernel.dk> * tag 'v5.8-rc4': (732 commits) Linux 5.8-rc4 x86/ldt: use "pr_info_once()" instead of open-coding it badly MIPS: Do not use smp_processor_id() in preemptible code MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen .gitignore: Do not track `defconfig` from `make savedefconfig` io_uring: fix regression with always ignoring signals in io_cqring_wait() x86/ldt: Disable 16-bit segments on Xen PV x86/entry/32: Fix #MC and #DB wiring on x86_32 x86/entry/xen: Route #DB correctly on Xen PV x86/entry, selftests: Further improve user entry sanity checks x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER i2c: mlxcpld: check correct size of maximum RECV_LEN packet i2c: add Kconfig help text for slave mode i2c: slave-eeprom: update documentation i2c: eg20t: Load module automatically if ID matches i2c: designware: platdrv: Set class based on DMI i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665 mm/page_alloc: fix documentation error vmalloc: fix the owner argument for the new __vmalloc_node_range callers mm/cma.c: use exact_nid true to fix possible per-numa cma leak ...
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/command_submission.c13
-rw-r--r--drivers/misc/habanalabs/debugfs.c4
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c37
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h3
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_packets.h3
5 files changed, 56 insertions, 4 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index f82974a916c3..b0f62cbbdc87 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -62,6 +62,12 @@ static void hl_fence_release(struct dma_fence *fence)
container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev;
+ /* EBUSY means the CS was never submitted and hence we don't have
+ * an attached hw_sob object that we should handle here
+ */
+ if (fence->error == -EBUSY)
+ goto free;
+
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
(hl_cs_cmpl->type == CS_TYPE_WAIT)) {
@@ -92,6 +98,7 @@ static void hl_fence_release(struct dma_fence *fence)
kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
}
+free:
kfree_rcu(hl_cs_cmpl, base_fence.rcu);
}
@@ -328,10 +335,16 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx);
+ /* We need to mark an error for not submitted because in that case
+ * the dma fence release flow is different. Mainly, we don't need
+ * to handle hw_sob for signal/wait
+ */
if (cs->timedout)
dma_fence_set_error(cs->fence, -ETIMEDOUT);
else if (cs->aborted)
dma_fence_set_error(cs->fence, -EIO);
+ else if (!cs->submitted)
+ dma_fence_set_error(cs->fence, -EBUSY);
dma_fence_signal(cs->fence);
dma_fence_put(cs->fence);
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 3c8dcdfba20c..fc4372c18ce2 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -480,7 +480,7 @@ out:
return 0;
}
-static ssize_t mmu_write(struct file *file, const char __user *buf,
+static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct seq_file *s = file->private_data;
@@ -1125,7 +1125,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"command_submission_jobs", command_submission_jobs_show, NULL},
{"userptr", userptr_show, NULL},
{"vm", vm_show, NULL},
- {"mmu", mmu_show, mmu_write},
+ {"mmu", mmu_show, mmu_asid_va_write},
{"engines", engines_show, NULL}
};
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 61f88e9884ce..834470d10b46 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -96,7 +96,7 @@
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
-#define GAUDI_ARB_WDT_TIMEOUT 0x400000
+#define GAUDI_ARB_WDT_TIMEOUT 0x1000000
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
@@ -1893,6 +1893,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
+ WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
+
/* The following configuration is needed only once per QMAN */
if (qman_id == 0) {
/* Configure RAZWI IRQ */
@@ -2725,6 +2727,12 @@ static int gaudi_mmu_init(struct hl_device *hdev)
WREG32(mmSTLB_HOP_CONFIGURATION,
hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
+ /*
+ * The H/W expects the first PI after init to be 1. After wraparound
+ * we'll write 0.
+ */
+ gaudi->mmu_cache_inv_pi = 1;
+
gaudi->hw_cap_initialized |= HW_CAP_MMU;
return 0;
@@ -3790,6 +3798,25 @@ static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
src_in_host);
}
+static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
+ struct hl_cs_parser *parser,
+ struct packet_load_and_exe *user_pkt)
+{
+ u32 cfg;
+
+ cfg = le32_to_cpu(user_pkt->cfg);
+
+ if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
+ dev_err(hdev->dev,
+ "User not allowed to use Load and Execute\n");
+ return -EPERM;
+ }
+
+ parser->patched_cb_size += sizeof(struct packet_load_and_exe);
+
+ return 0;
+}
+
static int gaudi_validate_cb(struct hl_device *hdev,
struct hl_cs_parser *parser, bool is_mmu)
{
@@ -3838,6 +3865,11 @@ static int gaudi_validate_cb(struct hl_device *hdev,
rc = -EPERM;
break;
+ case PACKET_LOAD_AND_EXE:
+ rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
+ (struct packet_load_and_exe *) user_pkt);
+ break;
+
case PACKET_LIN_DMA:
parser->contains_dma_pkt = true;
if (is_mmu)
@@ -3855,7 +3887,6 @@ static int gaudi_validate_cb(struct hl_device *hdev,
case PACKET_FENCE:
case PACKET_NOP:
case PACKET_ARB_POINT:
- case PACKET_LOAD_AND_EXE:
parser->patched_cb_size += pkt_size;
break;
@@ -5994,6 +6025,8 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
mutex_lock(&hdev->mmu_cache_lock);
/* L0 & L1 invalidation */
+ WREG32(mmSTLB_INV_PS, 3);
+ WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
WREG32(mmSTLB_INV_PS, 2);
rc = hl_poll_timeout(
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index a46530d375fa..41a8d9bff6bf 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -229,6 +229,8 @@ struct gaudi_internal_qman_info {
* @multi_msi_mode: whether we are working in multi MSI single MSI mode.
* Multi MSI is possible only with IOMMU enabled.
* @ext_queue_idx: helper index for external queues initialization.
+ * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
+ * 8-bit value so use u8.
*/
struct gaudi_device {
int (*armcp_info_get)(struct hl_device *hdev);
@@ -248,6 +250,7 @@ struct gaudi_device {
u32 hw_cap_initialized;
u8 multi_msi_mode;
u8 ext_queue_idx;
+ u8 mmu_cache_inv_pi;
};
void gaudi_init_security(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
index 9a5800b0086b..0f0cd067bb43 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
@@ -197,6 +197,9 @@ struct packet_wait {
__le32 ctl;
};
+#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_SHIFT 0
+#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK 0x00000001
+
struct packet_load_and_exe {
__le32 cfg;
__le32 ctl;