From 0bbeb01a4fafbf8422e5c8882d461d6ac4f71e15 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 10 Dec 2018 09:58:56 +0800
Subject: iommu/vt-d: Manage scalalble mode PASID tables

In scalable mode, pasid structure is a two level table with
a pasid directory table and a pasid table. Any pasid entry
can be identified by a pasid value in below way.

   1
   9                       6 5      0
    .-----------------------.-------.
    |              PASID    |       |
    '-----------------------'-------'    .-------------.
             |                    |      |             |
             |                    |      |             |
             |                    |      |             |
             |     .-----------.  |      .-------------.
             |     |           |  |----->| PASID Entry |
             |     |           |  |      '-------------'
             |     |           |  |Plus  |             |
             |     .-----------.  |      |             |
             |---->| DIR Entry |-------->|             |
             |     '-----------'         '-------------'
.---------.  |Plus |           |
| Context |  |     |           |
|  Entry  |------->|           |
'---------'        '-----------'

This changes the pasid table APIs to support scalable mode
PASID directory and PASID table. It also adds a helper to
get the PASID table entry according to the pasid value.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'drivers/iommu/intel-svm.c')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index db301efe126d..d6c99935d5d9 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -65,8 +65,6 @@ int intel_svm_init(struct intel_iommu *iommu)
 
 	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 	if (ecap_dis(iommu->ecap)) {
-		/* Just making it explicit... */
-		BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
 		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
 		if (pages)
 			iommu->pasid_state_table = page_address(pages);
@@ -405,9 +403,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 			pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
 
 		entry = intel_pasid_get_entry(dev, svm->pasid);
-		entry->val = pasid_entry_val;
-
-		wmb();
+		WRITE_ONCE(entry->val[0], pasid_entry_val);
 
 		/*
 		 * Flush PASID cache when a PASID table entry becomes
-- 
cgit v1.2.3


From 5d308fc1ecf5351418a4f003ccb74dc91b424bd1 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 10 Dec 2018 09:58:58 +0800
Subject: iommu/vt-d: Add 256-bit invalidation descriptor support

Intel vt-d spec rev3.0 requires software to use 256-bit
descriptors in invalidation queue. As the spec reads in
section 6.5.2:

Remapping hardware supporting Scalable Mode Translations
(ECAP_REG.SMTS=1) allow software to additionally program
the width of the descriptors (128-bits or 256-bits) that
will be written into the Queue. Software should setup the
Invalidation Queue for 256-bit descriptors before progra-
mming remapping hardware for scalable-mode translation as
128-bit descriptors are treated as invalid descriptors
(see Table 21 in Section 6.5.2.10) in scalable-mode.

This patch adds 256-bit invalidation descriptor support
if the hardware presents scalable mode capability.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c                | 91 +++++++++++++++++++++++++------------
 drivers/iommu/intel-svm.c           | 76 ++++++++++++++++++++-----------
 drivers/iommu/intel_irq_remapping.c |  6 ++-
 include/linux/intel-iommu.h         |  9 +++-
 4 files changed, 121 insertions(+), 61 deletions(-)

(limited to 'drivers/iommu/intel-svm.c')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index d9c748b6f9e4..9511f9aeb77c 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1160,6 +1160,7 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	int head, tail;
 	struct q_inval *qi = iommu->qi;
 	int wait_index = (index + 1) % QI_LENGTH;
+	int shift = qi_shift(iommu);
 
 	if (qi->desc_status[wait_index] == QI_ABORT)
 		return -EAGAIN;
@@ -1173,13 +1174,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	 */
 	if (fault & DMA_FSTS_IQE) {
 		head = readl(iommu->reg + DMAR_IQH_REG);
-		if ((head >> DMAR_IQ_SHIFT) == index) {
-			pr_err("VT-d detected invalid descriptor: "
-				"low=%llx, high=%llx\n",
-				(unsigned long long)qi->desc[index].low,
-				(unsigned long long)qi->desc[index].high);
-			memcpy(&qi->desc[index], &qi->desc[wait_index],
-					sizeof(struct qi_desc));
+		if ((head >> shift) == index) {
+			struct qi_desc *desc = qi->desc + head;
+
+			/*
+			 * desc->qw2 and desc->qw3 are either reserved or
+			 * used by software as private data. We won't print
+			 * out these two qw's for security consideration.
+			 */
+			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
+			       (unsigned long long)desc->qw0,
+			       (unsigned long long)desc->qw1);
+			memcpy(desc, qi->desc + (wait_index << shift),
+			       1 << shift);
 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
 			return -EINVAL;
 		}
@@ -1191,10 +1198,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 	 */
 	if (fault & DMA_FSTS_ITE) {
 		head = readl(iommu->reg + DMAR_IQH_REG);
-		head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
 		head |= 1;
 		tail = readl(iommu->reg + DMAR_IQT_REG);
-		tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
+		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
 
 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
 
@@ -1222,15 +1229,14 @@ int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 {
 	int rc;
 	struct q_inval *qi = iommu->qi;
-	struct qi_desc *hw, wait_desc;
+	int offset, shift, length;
+	struct qi_desc wait_desc;
 	int wait_index, index;
 	unsigned long flags;
 
 	if (!qi)
 		return 0;
 
-	hw = qi->desc;
-
 restart:
 	rc = 0;
 
@@ -1243,16 +1249,21 @@ restart:
 
 	index = qi->free_head;
 	wait_index = (index + 1) % QI_LENGTH;
+	shift = qi_shift(iommu);
+	length = 1 << shift;
 
 	qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
 
-	hw[index] = *desc;
-
-	wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
+	offset = index << shift;
+	memcpy(qi->desc + offset, desc, length);
+	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
-	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
+	wait_desc.qw2 = 0;
+	wait_desc.qw3 = 0;
 
-	hw[wait_index] = wait_desc;
+	offset = wait_index << shift;
+	memcpy(qi->desc + offset, &wait_desc, length);
 
 	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 	qi->free_cnt -= 2;
@@ -1261,7 +1272,7 @@ restart:
 	 * update the HW tail register indicating the presence of
 	 * new descriptors.
 	 */
-	writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
+	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
 
 	while (qi->desc_status[wait_index] != QI_DONE) {
 		/*
@@ -1298,8 +1309,10 @@ void qi_global_iec(struct intel_iommu *iommu)
 {
 	struct qi_desc desc;
 
-	desc.low = QI_IEC_TYPE;
-	desc.high = 0;
+	desc.qw0 = QI_IEC_TYPE;
+	desc.qw1 = 0;
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	/* should never fail */
 	qi_submit_sync(&desc, iommu);
@@ -1310,9 +1323,11 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
 {
 	struct qi_desc desc;
 
-	desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
+	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
 			| QI_CC_GRAN(type) | QI_CC_TYPE;
-	desc.high = 0;
+	desc.qw1 = 0;
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	qi_submit_sync(&desc, iommu);
 }
@@ -1331,10 +1346,12 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 	if (cap_read_drain(iommu->cap))
 		dr = 1;
 
-	desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
+	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
-	desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
+	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
 		| QI_IOTLB_AM(size_order);
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	qi_submit_sync(&desc, iommu);
 }
@@ -1347,15 +1364,17 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 	if (mask) {
 		WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1));
 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
-		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
+		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
 	} else
-		desc.high = QI_DEV_IOTLB_ADDR(addr);
+		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
 
 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
 		qdep = 0;
 
-	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
+	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	qi_submit_sync(&desc, iommu);
 }
@@ -1403,16 +1422,24 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
 	u32 sts;
 	unsigned long flags;
 	struct q_inval *qi = iommu->qi;
+	u64 val = virt_to_phys(qi->desc);
 
 	qi->free_head = qi->free_tail = 0;
 	qi->free_cnt = QI_LENGTH;
 
+	/*
+	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
+	 * is present.
+	 */
+	if (ecap_smts(iommu->ecap))
+		val |= (1 << 11) | 1;
+
 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
 
 	/* write zero to the tail reg */
 	writel(0, iommu->reg + DMAR_IQT_REG);
 
-	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
 
 	iommu->gcmd |= DMA_GCMD_QIE;
 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -1448,8 +1475,12 @@ int dmar_enable_qi(struct intel_iommu *iommu)
 
 	qi = iommu->qi;
 
-
-	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
+	/*
+	 * Need two pages to accommodate 256 descriptors of 256 bits each
+	 * if the remapping hardware supports scalable mode translation.
+	 */
+	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
+				     !!ecap_smts(iommu->ecap));
 	if (!desc_page) {
 		kfree(qi);
 		iommu->qi = NULL;
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index d6c99935d5d9..b7f1d12e24b0 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -161,27 +161,40 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 		 * because that's the only option the hardware gives us. Despite
 		 * the fact that they are actually only accessible through one. */
 		if (gl)
-			desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
-				QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
+			desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+					QI_EIOTLB_DID(sdev->did) |
+					QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) |
+					QI_EIOTLB_TYPE;
 		else
-			desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
-				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
-		desc.high = 0;
+			desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+					QI_EIOTLB_DID(sdev->did) |
+					QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+					QI_EIOTLB_TYPE;
+		desc.qw1 = 0;
 	} else {
 		int mask = ilog2(__roundup_pow_of_two(pages));
 
-		desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
-			QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
-		desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
-			QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
+		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+				QI_EIOTLB_DID(sdev->did) |
+				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
+				QI_EIOTLB_TYPE;
+		desc.qw1 = QI_EIOTLB_ADDR(address) |
+				QI_EIOTLB_GL(gl) |
+				QI_EIOTLB_IH(ih) |
+				QI_EIOTLB_AM(mask);
 	}
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 	qi_submit_sync(&desc, svm->iommu);
 
 	if (sdev->dev_iotlb) {
-		desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
-			QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
+		desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
+				QI_DEV_EIOTLB_SID(sdev->sid) |
+				QI_DEV_EIOTLB_QDEP(sdev->qdep) |
+				QI_DEIOTLB_TYPE;
 		if (pages == -1) {
-			desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
+			desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
+					QI_DEV_EIOTLB_SIZE;
 		} else if (pages > 1) {
 			/* The least significant zero bit indicates the size. So,
 			 * for example, an "address" value of 0x12345f000 will
@@ -189,10 +202,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 			unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
 			unsigned long mask = __rounddown_pow_of_two(address ^ last);
 
-			desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
+			desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
+					(mask - 1)) | QI_DEV_EIOTLB_SIZE;
 		} else {
-			desc.high = QI_DEV_EIOTLB_ADDR(address);
+			desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
 		}
+		desc.qw2 = 0;
+		desc.qw3 = 0;
 		qi_submit_sync(&desc, svm->iommu);
 	}
 }
@@ -237,8 +253,11 @@ static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *s
 {
 	struct qi_desc desc;
 
-	desc.high = 0;
-	desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+	desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) |
+			QI_PC_PASID_SEL | QI_PC_PASID(pasid);
+	desc.qw1 = 0;
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	qi_submit_sync(&desc, svm->iommu);
 }
@@ -667,24 +686,27 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 	no_pasid:
 		if (req->lpig) {
 			/* Page Group Response */
-			resp.low = QI_PGRP_PASID(req->pasid) |
+			resp.qw0 = QI_PGRP_PASID(req->pasid) |
 				QI_PGRP_DID((req->bus << 8) | req->devfn) |
 				QI_PGRP_PASID_P(req->pasid_present) |
 				QI_PGRP_RESP_TYPE;
-			resp.high = QI_PGRP_IDX(req->prg_index) |
-				QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
-
-			qi_submit_sync(&resp, iommu);
+			resp.qw1 = QI_PGRP_IDX(req->prg_index) |
+				QI_PGRP_PRIV(req->private) |
+				QI_PGRP_RESP_CODE(result);
 		} else if (req->srr) {
 			/* Page Stream Response */
-			resp.low = QI_PSTRM_IDX(req->prg_index) |
-				QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
-				QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
-			resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
+			resp.qw0 = QI_PSTRM_IDX(req->prg_index) |
+				QI_PSTRM_PRIV(req->private) |
+				QI_PSTRM_BUS(req->bus) |
+				QI_PSTRM_PASID(req->pasid) |
+				QI_PSTRM_RESP_TYPE;
+			resp.qw1 = QI_PSTRM_ADDR(address) |
+				QI_PSTRM_DEVFN(req->devfn) |
 				QI_PSTRM_RESP_CODE(result);
-
-			qi_submit_sync(&resp, iommu);
 		}
+		resp.qw2 = 0;
+		resp.qw3 = 0;
+		qi_submit_sync(&resp, iommu);
 
 		head = (head + sizeof(*req)) & PRQ_RING_MASK;
 	}
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index c2d6c11431de..24d45b07f425 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -145,9 +145,11 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 {
 	struct qi_desc desc;
 
-	desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
+	desc.qw0 = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
 		   | QI_IEC_SELECTIVE;
-	desc.high = 0;
+	desc.qw1 = 0;
+	desc.qw2 = 0;
+	desc.qw3 = 0;
 
 	return qi_submit_sync(&desc, iommu);
 }
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index b4da61385ebf..08ff588a4df7 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -401,13 +401,18 @@ enum {
 #define QI_GRAN_NONG_PASID		2
 #define QI_GRAN_PSI_PASID		3
 
+#define qi_shift(iommu)		(DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap))
+
 struct qi_desc {
-	u64 low, high;
+	u64 qw0;
+	u64 qw1;
+	u64 qw2;
+	u64 qw3;
 };
 
 struct q_inval {
 	raw_spinlock_t  q_lock;
-	struct qi_desc  *desc;          /* invalidation queue */
+	void		*desc;          /* invalidation queue */
 	int             *desc_status;   /* desc status */
 	int             free_head;      /* first free entry */
 	int             free_tail;      /* last free entry */
-- 
cgit v1.2.3


From 1c4f88b7f1f9298b56c7dac18c0bcd8d2f75059a Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 10 Dec 2018 09:59:05 +0800
Subject: iommu/vt-d: Shared virtual address in scalable mode

This patch enables the current SVA (Shared Virtual Address)
implementation to work in the scalable mode.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Sanjay Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 38 -----------------------------
 drivers/iommu/intel-pasid.c |  2 +-
 drivers/iommu/intel-pasid.h |  1 -
 drivers/iommu/intel-svm.c   | 58 ++++++++++++++-------------------------------
 include/linux/intel-iommu.h |  9 +------
 5 files changed, 20 insertions(+), 88 deletions(-)

(limited to 'drivers/iommu/intel-svm.c')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 21a6853290cc..cec88df671a6 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5257,18 +5257,6 @@ static void intel_iommu_put_resv_regions(struct device *dev,
 }
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-static inline unsigned long intel_iommu_get_pts(struct device *dev)
-{
-	int pts, max_pasid;
-
-	max_pasid = intel_pasid_get_dev_max_id(dev);
-	pts = find_first_bit((unsigned long *)&max_pasid, MAX_NR_PASID_BITS);
-	if (pts < 5)
-		return 0;
-
-	return pts - 5;
-}
-
 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
 {
 	struct device_domain_info *info;
@@ -5300,33 +5288,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
 	sdev->sid = PCI_DEVID(info->bus, info->devfn);
 
 	if (!(ctx_lo & CONTEXT_PASIDE)) {
-		if (iommu->pasid_state_table)
-			context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
-		context[1].lo = (u64)virt_to_phys(info->pasid_table->table) |
-			intel_iommu_get_pts(sdev->dev);
-
-		wmb();
-		/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
-		 * extended to permit requests-with-PASID if the PASIDE bit
-		 * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
-		 * however, the PASIDE bit is ignored and requests-with-PASID
-		 * are unconditionally blocked. Which makes less sense.
-		 * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
-		 * "guest mode" translation types depending on whether ATS
-		 * is available or not. Annoyingly, we can't use the new
-		 * modes *unless* PASIDE is set. */
-		if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
-			ctx_lo &= ~CONTEXT_TT_MASK;
-			if (info->ats_supported)
-				ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
-			else
-				ctx_lo |= CONTEXT_TT_PT_PASID << 2;
-		}
 		ctx_lo |= CONTEXT_PASIDE;
-		if (iommu->pasid_state_table)
-			ctx_lo |= CONTEXT_DINVE;
-		if (info->pri_supported)
-			ctx_lo |= CONTEXT_PRS;
 		context[0].lo = ctx_lo;
 		wmb();
 		iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index c3dcf4dc2496..53fe5248d8f1 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -286,7 +286,7 @@ static inline void pasid_clear_entry(struct pasid_entry *pe)
 	WRITE_ONCE(pe->val[7], 0);
 }
 
-void intel_pasid_clear_entry(struct device *dev, int pasid)
+static void intel_pasid_clear_entry(struct device *dev, int pasid)
 {
 	struct pasid_entry *pe;
 
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index 512c63ec8a22..23537b3f34e3 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -58,7 +58,6 @@ void intel_pasid_free_table(struct device *dev);
 struct pasid_table *intel_pasid_get_table(struct device *dev);
 int intel_pasid_get_dev_max_id(struct device *dev);
 struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
-void intel_pasid_clear_entry(struct device *dev, int pasid);
 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 				  struct device *dev, pgd_t *pgd,
 				  int pasid, u16 did, int flags);
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index b7f1d12e24b0..04d6bdb51404 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -29,10 +29,6 @@
 
 #include "intel-pasid.h"
 
-#define PASID_ENTRY_P		BIT_ULL(0)
-#define PASID_ENTRY_FLPM_5LP	BIT_ULL(9)
-#define PASID_ENTRY_SRE		BIT_ULL(11)
-
 static irqreturn_t prq_event_thread(int irq, void *d);
 
 struct pasid_state_entry {
@@ -248,20 +244,6 @@ static void intel_invalidate_range(struct mmu_notifier *mn,
 			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
 }
 
-
-static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
-{
-	struct qi_desc desc;
-
-	desc.qw0 = QI_PC_TYPE | QI_PC_DID(sdev->did) |
-			QI_PC_PASID_SEL | QI_PC_PASID(pasid);
-	desc.qw1 = 0;
-	desc.qw2 = 0;
-	desc.qw3 = 0;
-
-	qi_submit_sync(&desc, svm->iommu);
-}
-
 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
@@ -281,8 +263,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	 */
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list) {
-		intel_pasid_clear_entry(sdev->dev, svm->pasid);
-		intel_flush_pasid_dev(svm, sdev, svm->pasid);
+		intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
 		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
 	}
 	rcu_read_unlock();
@@ -301,11 +282,9 @@ static LIST_HEAD(global_svm_list);
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
 {
 	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
-	struct pasid_entry *entry;
 	struct intel_svm_dev *sdev;
 	struct intel_svm *svm = NULL;
 	struct mm_struct *mm = NULL;
-	u64 pasid_entry_val;
 	int pasid_max;
 	int ret;
 
@@ -414,22 +393,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 				kfree(sdev);
 				goto out;
 			}
-			pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P;
-		} else
-			pasid_entry_val = (u64)__pa(init_mm.pgd) |
-					  PASID_ENTRY_P | PASID_ENTRY_SRE;
-		if (cpu_feature_enabled(X86_FEATURE_LA57))
-			pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
-
-		entry = intel_pasid_get_entry(dev, svm->pasid);
-		WRITE_ONCE(entry->val[0], pasid_entry_val);
-
-		/*
-		 * Flush PASID cache when a PASID table entry becomes
-		 * present.
-		 */
-		if (cap_caching_mode(iommu->cap))
-			intel_flush_pasid_dev(svm, sdev, svm->pasid);
+		}
+
+		spin_lock(&iommu->lock);
+		ret = intel_pasid_setup_first_level(iommu, dev,
+				mm ? mm->pgd : init_mm.pgd,
+				svm->pasid, FLPT_DEFAULT_DID,
+				mm ? 0 : PASID_FLAG_SUPERVISOR_MODE);
+		spin_unlock(&iommu->lock);
+		if (ret) {
+			if (mm)
+				mmu_notifier_unregister(&svm->notifier, mm);
+			intel_pasid_free_id(svm->pasid);
+			kfree(svm);
+			kfree(sdev);
+			goto out;
+		}
 
 		list_add_tail(&svm->list, &global_svm_list);
 	}
@@ -475,10 +454,9 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 				 * to use. We have a *shared* PASID table, because it's
 				 * large and has to be physically contiguous. So it's
 				 * hard to be as defensive as we might like. */
-				intel_flush_pasid_dev(svm, sdev, svm->pasid);
+				intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
 				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
 				kfree_rcu(sdev, rcu);
-				intel_pasid_clear_entry(dev, svm->pasid);
 
 				if (list_empty(&svm->devs)) {
 					intel_pasid_free_id(svm->pasid);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4ad62396e81e..cfcf9c1e1872 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -54,14 +54,7 @@
 #define CONTEXT_TT_MULTI_LEVEL	0
 #define CONTEXT_TT_DEV_IOTLB	1
 #define CONTEXT_TT_PASS_THROUGH 2
-/* Extended context entry types */
-#define CONTEXT_TT_PT_PASID	4
-#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
-#define CONTEXT_TT_MASK (7ULL << 2)
-
-#define CONTEXT_DINVE		(1ULL << 8)
-#define CONTEXT_PRS		(1ULL << 9)
-#define CONTEXT_PASIDE		(1ULL << 11)
+#define CONTEXT_PASIDE		BIT_ULL(3)
 
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
-- 
cgit v1.2.3


From 6d68b88e0993d67e9ebb1240f84240b712fbc8a4 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 10 Dec 2018 09:59:06 +0800
Subject: iommu/vt-d: Remove deferred invalidation

Deferred invalidation is an ECS specific feature. It will not be
supported when IOMMU works in scalable mode. As we deprecated the
ECS support, remove deferred invalidation and cleanup the code.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Cc: Sanjay Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |  1 -
 drivers/iommu/intel-svm.c   | 45 ---------------------------------------------
 include/linux/intel-iommu.h |  8 --------
 3 files changed, 54 deletions(-)

(limited to 'drivers/iommu/intel-svm.c')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index cec88df671a6..9043e1e9b2be 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1700,7 +1700,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 	if (pasid_supported(iommu)) {
 		if (ecap_prs(iommu->ecap))
 			intel_svm_finish_prq(iommu);
-		intel_svm_exit(iommu);
 	}
 #endif
 }
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 04d6bdb51404..5b2e3b2d593b 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -31,15 +31,8 @@
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
-struct pasid_state_entry {
-	u64 val;
-};
-
 int intel_svm_init(struct intel_iommu *iommu)
 {
-	struct page *pages;
-	int order;
-
 	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
 			!cap_fl1gp_support(iommu->cap))
 		return -EINVAL;
@@ -48,39 +41,6 @@ int intel_svm_init(struct intel_iommu *iommu)
 			!cap_5lp_support(iommu->cap))
 		return -EINVAL;
 
-	/* Start at 2 because it's defined as 2^(1+PSS) */
-	iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
-
-	/* Eventually I'm promised we will get a multi-level PASID table
-	 * and it won't have to be physically contiguous. Until then,
-	 * limit the size because 8MiB contiguous allocations can be hard
-	 * to come by. The limit of 0x20000, which is 1MiB for each of
-	 * the PASID and PASID-state tables, is somewhat arbitrary. */
-	if (iommu->pasid_max > 0x20000)
-		iommu->pasid_max = 0x20000;
-
-	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-	if (ecap_dis(iommu->ecap)) {
-		pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
-		if (pages)
-			iommu->pasid_state_table = page_address(pages);
-		else
-			pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
-				iommu->name);
-	}
-
-	return 0;
-}
-
-int intel_svm_exit(struct intel_iommu *iommu)
-{
-	int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-
-	if (iommu->pasid_state_table) {
-		free_pages((unsigned long)iommu->pasid_state_table, order);
-		iommu->pasid_state_table = NULL;
-	}
-
 	return 0;
 }
 
@@ -214,11 +174,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
 {
 	struct intel_svm_dev *sdev;
 
-	/* Try deferred invalidate if available */
-	if (svm->iommu->pasid_state_table &&
-	    !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
-		return;
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list)
 		intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index cfcf9c1e1872..0605f3bf6e79 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -541,15 +541,8 @@ struct intel_iommu {
 	struct iommu_flush flush;
 #endif
 #ifdef CONFIG_INTEL_IOMMU_SVM
-	/* These are large and need to be contiguous, so we allocate just
-	 * one for now. We'll maybe want to rethink that if we truly give
-	 * devices away to userspace processes (e.g. for DPDK) and don't
-	 * want to trust that userspace will use *only* the PASID it was
-	 * told to. But while it's all driver-arbitrated, we're fine. */
-	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
-	u32 pasid_max;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
 	u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@@ -663,7 +656,6 @@ void iommu_flush_write_buffer(struct intel_iommu *iommu);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 int intel_svm_init(struct intel_iommu *iommu);
-int intel_svm_exit(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
-- 
cgit v1.2.3