Merge tag 'v4.8' into next

Sync up with mainline to bring in I2C host notify changes and other updates.
author: Dmitry Torokhov <dmitry.torokhov@gmail.com> 2016-10-14 02:25:40 +0200
committer: Dmitry Torokhov <dmitry.torokhov@gmail.com> 2016-10-14 02:25:40 +0200
commit: 4a7126a25b4dfd07d61c699f724118275acc0c25 (patch)
tree: c78b82bfaa96f330d412ad63e355906f963c3faf /drivers/iommu
parent: Input: i8042 - skip selftest on ASUS laptops (diff)
parent: Linux 4.8 (diff)
download: linux-4a7126a25b4dfd07d61c699f724118275acc0c25.tar.xz
linux-4a7126a25b4dfd07d61c699f724118275acc0c25.zip
30 files changed, 3608 insertions, 2228 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd1dc39f84ff..d432ca828472 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -76,8 +76,7 @@ config IOMMU_DMA
 
 config FSL_PAMU
 	bool "Freescale IOMMU support"
-	depends on PPC32
-	depends on PPC_E500MC || COMPILE_TEST
+	depends on PPC_E500MC || (COMPILE_TEST && PPC)
 	select IOMMU_API
 	select GENERIC_ALLOCATOR
 	help
@@ -90,8 +89,8 @@ config MSM_IOMMU
 	bool "MSM IOMMU Support"
 	depends on ARM
 	depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST
-	depends on BROKEN
 	select IOMMU_API
+	select IOMMU_IO_PGTABLE_ARMV7S
 	help
 	  Support for the IOMMUs found on certain Qualcomm SOCs.
 	  These IOMMUs allow virtualization of the address space used by most
@@ -112,6 +111,7 @@ config AMD_IOMMU
 	select PCI_PRI
 	select PCI_PASID
 	select IOMMU_API
+	select IOMMU_IOVA
 	depends on X86_64 && PCI && ACPI
 	---help---
 	  With this option you can enable support for AMD IOMMU hardware in
@@ -124,16 +124,6 @@ config AMD_IOMMU
 	  your BIOS for an option to enable it or if you have an IVRS ACPI
 	  table.
 
-config AMD_IOMMU_STATS
-	bool "Export AMD IOMMU statistics to debugfs"
-	depends on AMD_IOMMU
-	select DEBUG_FS
-	---help---
-	  This option enables code in the AMD IOMMU driver to collect various
-	  statistics about whats happening in the driver and exports that
-	  information to userspace via debugfs.
-	  If unsure, say N.
-
 config AMD_IOMMU_V2
 	tristate "AMD IOMMU Version 2 driver"
 	depends on AMD_IOMMU
@@ -354,4 +344,22 @@ config MTK_IOMMU
 
 	  If unsure, say N here.
 
+config MTK_IOMMU_V1
+	bool "MTK IOMMU Version 1 (M4U gen1) Support"
+	depends on ARM
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select ARM_DMA_USE_IOMMU
+	select IOMMU_API
+	select MEMORY
+	select MTK_SMI
+	select COMMON_CLK_MT2701_MMSYS
+	select COMMON_CLK_MT2701_IMGSYS
+	select COMMON_CLK_MT2701_VDECSYS
+	help
+	  Support for the M4U on certain Mediatek SoCs. M4U generation 1 HW is
+	  Multimedia Memory Managememt Unit. This option enables remapping of
+	  DMA memory accesses for the multimedia subsystem.
+
+	  if unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6edb31bf8c6..195f7b997d8e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
-obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
+obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
@@ -18,6 +18,7 @@ obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
+obj-$(CONFIG_MTK_IOMMU_V1) += mtk_iommu_v1.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 5efadad4615b..96de97a46079 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -19,6 +19,9 @@
 
 #include <linux/ratelimit.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/amba/bus.h>
+#include <linux/platform_device.h>
 #include <linux/pci-ats.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
@@ -36,6 +39,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/irqdomain.h>
 #include <linux/percpu.h>
+#include <linux/iova.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -54,6 +58,17 @@
 
 #define LOOP_TIMEOUT	100000
 
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_BIT_MASK(32))
+
+/* Reserved IOVA ranges */
+#define MSI_RANGE_START		(0xfee00000)
+#define MSI_RANGE_END		(0xfeefffff)
+#define HT_RANGE_START		(0xfd00000000ULL)
+#define HT_RANGE_END		(0xffffffffffULL)
+
 /*
  * This bitmap is used to advertise the page sizes our hardware support
  * to the IOMMU core, which will then use this information to split
@@ -72,6 +87,26 @@ static DEFINE_SPINLOCK(dev_data_list_lock);
 
 LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
+LIST_HEAD(acpihid_map);
+
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+	unsigned long iova_pfn;
+	unsigned long pages;
+	struct dma_ops_domain *dma_dom;
+};
+
+struct flush_queue {
+	spinlock_t lock;
+	unsigned next;
+	struct flush_queue_entry *entries;
+};
+
+DEFINE_PER_CPU(struct flush_queue, flush_queue);
+
+static atomic_t queue_timer_on;
+static struct timer_list queue_timer;
 
 /*
  * Domain for untranslated devices - only allocated
@@ -118,62 +153,90 @@ static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
 
 /*
- * For dynamic growth the aperture size is split into ranges of 128MB of
- * DMA address space each. This struct represents one such range.
- */
-struct aperture_range {
-
-	spinlock_t bitmap_lock;
-
-	/* address allocation bitmap */
-	unsigned long *bitmap;
-	unsigned long offset;
-	unsigned long next_bit;
-
-	/*
-	 * Array of PTE pages for the aperture. In this array we save all the
-	 * leaf pages of the domain page table used for the aperture. This way
-	 * we don't need to walk the page table to find a specific PTE. We can
-	 * just calculate its address in constant time.
-	 */
-	u64 *pte_pages[64];
-};
-
-/*
  * Data container for a dma_ops specific protection domain
  */
 struct dma_ops_domain {
 	/* generic protection domain information */
 	struct protection_domain domain;
 
-	/* size of the aperture for the mappings */
-	unsigned long aperture_size;
-
-	/* aperture index we start searching for free addresses */
-	u32 __percpu *next_index;
-
-	/* address space relevant data */
-	struct aperture_range *aperture[APERTURE_MAX_RANGES];
+	/* IOVA RB-Tree */
+	struct iova_domain iovad;
 };
 
+static struct iova_domain reserved_iova_ranges;
+static struct lock_class_key reserved_rbtree_key;
+
 /****************************************************************************
  *
  * Helper functions
  *
  ****************************************************************************/
 
-static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+static inline int match_hid_uid(struct device *dev,
+				struct acpihid_map_entry *entry)
 {
-	return container_of(dom, struct protection_domain, domain);
+	const char *hid, *uid;
+
+	hid = acpi_device_hid(ACPI_COMPANION(dev));
+	uid = acpi_device_uid(ACPI_COMPANION(dev));
+
+	if (!hid || !(*hid))
+		return -ENODEV;
+
+	if (!uid || !(*uid))
+		return strcmp(hid, entry->hid);
+
+	if (!(*entry->uid))
+		return strcmp(hid, entry->hid);
+
+	return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid));
 }
 
-static inline u16 get_device_id(struct device *dev)
+static inline u16 get_pci_device_id(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
 	return PCI_DEVID(pdev->bus->number, pdev->devfn);
 }
 
+static inline int get_acpihid_device_id(struct device *dev,
+					struct acpihid_map_entry **entry)
+{
+	struct acpihid_map_entry *p;
+
+	list_for_each_entry(p, &acpihid_map, list) {
+		if (!match_hid_uid(dev, p)) {
+			if (entry)
+				*entry = p;
+			return p->devid;
+		}
+	}
+	return -EINVAL;
+}
+
+static inline int get_device_id(struct device *dev)
+{
+	int devid;
+
+	if (dev_is_pci(dev))
+		devid = get_pci_device_id(dev);
+	else
+		devid = get_acpihid_device_id(dev, NULL);
+
+	return devid;
+}
+
+static struct protection_domain *to_pdomain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct protection_domain, domain);
+}
+
+static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
+{
+	BUG_ON(domain->flags != PD_DMA_OPS_MASK);
+	return container_of(domain, struct dma_ops_domain, domain);
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -222,6 +285,7 @@ static u16 get_alias(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	u16 devid, ivrs_alias, pci_alias;
 
+	/* The callers make sure that get_device_id() does not fail here */
 	devid = get_device_id(dev);
 	ivrs_alias = amd_iommu_alias_table[devid];
 	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
@@ -263,8 +327,7 @@ static u16 get_alias(struct device *dev)
 	 */
 	if (pci_alias == devid &&
 	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
-		pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-		pdev->dma_alias_devfn = ivrs_alias & 0xff;
+		pci_add_dma_alias(pdev, ivrs_alias & 0xff);
 		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
 			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
 			dev_name(dev));
@@ -290,6 +353,29 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
 	return dev->archdata.iommu;
 }
 
+/*
+* Find or create an IOMMU group for a acpihid device.
+*/
+static struct iommu_group *acpihid_device_group(struct device *dev)
+{
+	struct acpihid_map_entry *p, *entry = NULL;
+	int devid;
+
+	devid = get_acpihid_device_id(dev, &entry);
+	if (devid < 0)
+		return ERR_PTR(devid);
+
+	list_for_each_entry(p, &acpihid_map, list) {
+		if ((devid == p->devid) && p->group)
+			entry->group = p->group;
+	}
+
+	if (!entry->group)
+		entry->group = generic_device_group(dev);
+
+	return entry->group;
+}
+
 static bool pci_iommuv2_capable(struct pci_dev *pdev)
 {
 	static const int caps[] = {
@@ -318,56 +404,19 @@ static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
 }
 
 /*
- * This function actually applies the mapping to the page table of the
- * dma_ops domain.
- */
-static void alloc_unity_mapping(struct dma_ops_domain *dma_dom,
-				struct unity_map_entry *e)
-{
-	u64 addr;
-
-	for (addr = e->address_start; addr < e->address_end;
-	     addr += PAGE_SIZE) {
-		if (addr < dma_dom->aperture_size)
-			__set_bit(addr >> PAGE_SHIFT,
-				  dma_dom->aperture[0]->bitmap);
-	}
-}
-
-/*
- * Inits the unity mappings required for a specific device
- */
-static void init_unity_mappings_for_device(struct device *dev,
-					   struct dma_ops_domain *dma_dom)
-{
-	struct unity_map_entry *e;
-	u16 devid;
-
-	devid = get_device_id(dev);
-
-	list_for_each_entry(e, &amd_iommu_unity_map, list) {
-		if (!(devid >= e->devid_start && devid <= e->devid_end))
-			continue;
-		alloc_unity_mapping(dma_dom, e);
-	}
-}
-
-/*
  * This function checks if the driver got a valid device from the caller to
  * avoid dereferencing invalid pointers.
  */
 static bool check_device(struct device *dev)
 {
-	u16 devid;
+	int devid;
 
 	if (!dev || !dev->dma_mask)
 		return false;
 
-	/* No PCI device */
-	if (!dev_is_pci(dev))
-		return false;
-
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return false;
 
 	/* Out of our scope? */
 	if (devid > amd_iommu_last_bdf)
@@ -381,43 +430,37 @@ static bool check_device(struct device *dev)
 
 static void init_iommu_group(struct device *dev)
 {
-	struct dma_ops_domain *dma_domain;
-	struct iommu_domain *domain;
 	struct iommu_group *group;
 
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group))
 		return;
 
-	domain = iommu_group_default_domain(group);
-	if (!domain)
-		goto out;
-
-	dma_domain = to_pdomain(domain)->priv;
-
-	init_unity_mappings_for_device(dev, dma_domain);
-out:
 	iommu_group_put(group);
 }
 
 static int iommu_init_device(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
+	int devid;
 
 	if (dev->archdata.iommu)
 		return 0;
 
-	dev_data = find_dev_data(get_device_id(dev));
+	devid = get_device_id(dev);
+	if (devid < 0)
+		return devid;
+
+	dev_data = find_dev_data(devid);
 	if (!dev_data)
 		return -ENOMEM;
 
 	dev_data->alias = get_alias(dev);
 
-	if (pci_iommuv2_capable(pdev)) {
+	if (dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
 		struct amd_iommu *iommu;
 
-		iommu              = amd_iommu_rlookup_table[dev_data->devid];
+		iommu = amd_iommu_rlookup_table[dev_data->devid];
 		dev_data->iommu_v2 = iommu->is_iommu_v2;
 	}
 
@@ -431,9 +474,13 @@ static int iommu_init_device(struct device *dev)
 
 static void iommu_ignore_device(struct device *dev)
 {
-	u16 devid, alias;
+	u16 alias;
+	int devid;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
 	alias = get_alias(dev);
 
 	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
@@ -445,8 +492,14 @@ static void iommu_ignore_device(struct device *dev)
 
 static void iommu_uninit_device(struct device *dev)
 {
-	struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev));
+	int devid;
+	struct iommu_dev_data *dev_data;
 
+	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
+	dev_data = search_dev_data(devid);
 	if (!dev_data)
 		return;
 
@@ -467,70 +520,6 @@ static void iommu_uninit_device(struct device *dev)
 	 */
 }
 
-#ifdef CONFIG_AMD_IOMMU_STATS
-
-/*
- * Initialization code for statistics collection
- */
-
-DECLARE_STATS_COUNTER(compl_wait);
-DECLARE_STATS_COUNTER(cnt_map_single);
-DECLARE_STATS_COUNTER(cnt_unmap_single);
-DECLARE_STATS_COUNTER(cnt_map_sg);
-DECLARE_STATS_COUNTER(cnt_unmap_sg);
-DECLARE_STATS_COUNTER(cnt_alloc_coherent);
-DECLARE_STATS_COUNTER(cnt_free_coherent);
-DECLARE_STATS_COUNTER(cross_page);
-DECLARE_STATS_COUNTER(domain_flush_single);
-DECLARE_STATS_COUNTER(domain_flush_all);
-DECLARE_STATS_COUNTER(alloced_io_mem);
-DECLARE_STATS_COUNTER(total_map_requests);
-DECLARE_STATS_COUNTER(complete_ppr);
-DECLARE_STATS_COUNTER(invalidate_iotlb);
-DECLARE_STATS_COUNTER(invalidate_iotlb_all);
-DECLARE_STATS_COUNTER(pri_requests);
-
-static struct dentry *stats_dir;
-static struct dentry *de_fflush;
-
-static void amd_iommu_stats_add(struct __iommu_counter *cnt)
-{
-	if (stats_dir == NULL)
-		return;
-
-	cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
-				       &cnt->value);
-}
-
-static void amd_iommu_stats_init(void)
-{
-	stats_dir = debugfs_create_dir("amd-iommu", NULL);
-	if (stats_dir == NULL)
-		return;
-
-	de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
-					 &amd_iommu_unmap_flush);
-
-	amd_iommu_stats_add(&compl_wait);
-	amd_iommu_stats_add(&cnt_map_single);
-	amd_iommu_stats_add(&cnt_unmap_single);
-	amd_iommu_stats_add(&cnt_map_sg);
-	amd_iommu_stats_add(&cnt_unmap_sg);
-	amd_iommu_stats_add(&cnt_alloc_coherent);
-	amd_iommu_stats_add(&cnt_free_coherent);
-	amd_iommu_stats_add(&cross_page);
-	amd_iommu_stats_add(&domain_flush_single);
-	amd_iommu_stats_add(&domain_flush_all);
-	amd_iommu_stats_add(&alloced_io_mem);
-	amd_iommu_stats_add(&total_map_requests);
-	amd_iommu_stats_add(&complete_ppr);
-	amd_iommu_stats_add(&invalidate_iotlb);
-	amd_iommu_stats_add(&invalidate_iotlb_all);
-	amd_iommu_stats_add(&pri_requests);
-}
-
-#endif
-
 /****************************************************************************
  *
  * Interrupt handling functions
@@ -653,8 +642,6 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 {
 	struct amd_iommu_fault fault;
 
-	INC_STATS_COUNTER(pri_requests);
-
 	if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
 		pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
 		return;
@@ -1199,7 +1186,7 @@ static void domain_flush_complete(struct protection_domain *domain)
 	int i;
 
 	for (i = 0; i < amd_iommus_present; ++i) {
-		if (!domain->dev_iommu[i])
+		if (domain && !domain->dev_iommu[i])
 			continue;
 
 		/*
@@ -1376,8 +1363,9 @@ static u64 *fetch_pte(struct protection_domain *domain,
 static int iommu_map_page(struct protection_domain *dom,
 			  unsigned long bus_addr,
 			  unsigned long phys_addr,
+			  unsigned long page_size,
 			  int prot,
-			  unsigned long page_size)
+			  gfp_t gfp)
 {
 	u64 __pte, *pte;
 	int i, count;
@@ -1389,7 +1377,7 @@ static int iommu_map_page(struct protection_domain *dom,
 		return -EINVAL;
 
 	count = PAGE_SIZE_PTE_COUNT(page_size);
-	pte   = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL);
+	pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
 
 	if (!pte)
 		return -ENOMEM;
@@ -1453,320 +1441,37 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
 /****************************************************************************
  *
  * The next functions belong to the address allocator for the dma_ops
- * interface functions. They work like the allocators in the other IOMMU
- * drivers. Its basically a bitmap which marks the allocated pages in
- * the aperture. Maybe it could be enhanced in the future to a more
- * efficient allocator.
+ * interface functions.
  *
  ****************************************************************************/
 
-/*
- * The address allocator core functions.
- *
- * called with domain->lock held
- */
-
-/*
- * Used to reserve address ranges in the aperture (e.g. for exclusion
- * ranges.
- */
-static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
-				      unsigned long start_page,
-				      unsigned int pages)
-{
-	unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
-
-	if (start_page + pages > last_page)
-		pages = last_page - start_page;
 
-	for (i = start_page; i < start_page + pages; ++i) {
-		int index = i / APERTURE_RANGE_PAGES;
-		int page  = i % APERTURE_RANGE_PAGES;
-		__set_bit(page, dom->aperture[index]->bitmap);
-	}
-}
-
-/*
- * This function is used to add a new aperture range to an existing
- * aperture in case of dma_ops domain allocation or address allocation
- * failure.
- */
-static int alloc_new_range(struct dma_ops_domain *dma_dom,
-			   bool populate, gfp_t gfp)
+static unsigned long dma_ops_alloc_iova(struct device *dev,
+					struct dma_ops_domain *dma_dom,
+					unsigned int pages, u64 dma_mask)
 {
-	int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	unsigned long i, old_size, pte_pgsize;
-	struct aperture_range *range;
-	struct amd_iommu *iommu;
-	unsigned long flags;
-
-#ifdef CONFIG_IOMMU_STRESS
-	populate = false;
-#endif
-
-	if (index >= APERTURE_MAX_RANGES)
-		return -ENOMEM;
-
-	range = kzalloc(sizeof(struct aperture_range), gfp);
-	if (!range)
-		return -ENOMEM;
-
-	range->bitmap = (void *)get_zeroed_page(gfp);
-	if (!range->bitmap)
-		goto out_free;
-
-	range->offset = dma_dom->aperture_size;
-
-	spin_lock_init(&range->bitmap_lock);
-
-	if (populate) {
-		unsigned long address = dma_dom->aperture_size;
-		int i, num_ptes = APERTURE_RANGE_PAGES / 512;
-		u64 *pte, *pte_page;
-
-		for (i = 0; i < num_ptes; ++i) {
-			pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE,
-					&pte_page, gfp);
-			if (!pte)
-				goto out_free;
-
-			range->pte_pages[i] = pte_page;
-
-			address += APERTURE_RANGE_SIZE / 64;
-		}
-	}
-
-	spin_lock_irqsave(&dma_dom->domain.lock, flags);
-
-	/* First take the bitmap_lock and then publish the range */
-	spin_lock(&range->bitmap_lock);
-
-	old_size                 = dma_dom->aperture_size;
-	dma_dom->aperture[index] = range;
-	dma_dom->aperture_size  += APERTURE_RANGE_SIZE;
-
-	/* Reserve address range used for MSI messages */
-	if (old_size < MSI_ADDR_BASE_LO &&
-	    dma_dom->aperture_size > MSI_ADDR_BASE_LO) {
-		unsigned long spage;
-		int pages;
-
-		pages = iommu_num_pages(MSI_ADDR_BASE_LO, 0x10000, PAGE_SIZE);
-		spage = MSI_ADDR_BASE_LO >> PAGE_SHIFT;
-
-		dma_ops_reserve_addresses(dma_dom, spage, pages);
-	}
-
-	/* Initialize the exclusion range if necessary */
-	for_each_iommu(iommu) {
-		if (iommu->exclusion_start &&
-		    iommu->exclusion_start >= dma_dom->aperture[index]->offset
-		    && iommu->exclusion_start < dma_dom->aperture_size) {
-			unsigned long startpage;
-			int pages = iommu_num_pages(iommu->exclusion_start,
-						    iommu->exclusion_length,
-						    PAGE_SIZE);
-			startpage = iommu->exclusion_start >> PAGE_SHIFT;
-			dma_ops_reserve_addresses(dma_dom, startpage, pages);
-		}
-	}
+	unsigned long pfn = 0;
 
-	/*
-	 * Check for areas already mapped as present in the new aperture
-	 * range and mark those pages as reserved in the allocator. Such
-	 * mappings may already exist as a result of requested unity
-	 * mappings for devices.
-	 */
-	for (i = dma_dom->aperture[index]->offset;
-	     i < dma_dom->aperture_size;
-	     i += pte_pgsize) {
-		u64 *pte = fetch_pte(&dma_dom->domain, i, &pte_pgsize);
-		if (!pte || !IOMMU_PTE_PRESENT(*pte))
-			continue;
-
-		dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT,
-					  pte_pgsize >> 12);
-	}
+	pages = __roundup_pow_of_two(pages);
 
-	update_domain(&dma_dom->domain);
+	if (dma_mask > DMA_BIT_MASK(32))
+		pfn = alloc_iova_fast(&dma_dom->iovad, pages,
+				      IOVA_PFN(DMA_BIT_MASK(32)));
 
-	spin_unlock(&range->bitmap_lock);
+	if (!pfn)
+		pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask));
 
-	spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
-
-	return 0;
-
-out_free:
-	update_domain(&dma_dom->domain);
-
-	free_page((unsigned long)range->bitmap);
-
-	kfree(range);
-
-	return -ENOMEM;
+	return (pfn << PAGE_SHIFT);
 }
 
-static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
-					 struct aperture_range *range,
-					 unsigned long pages,
-					 unsigned long dma_mask,
-					 unsigned long boundary_size,
-					 unsigned long align_mask,
-					 bool trylock)
+static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
+			      unsigned long address,
+			      unsigned int pages)
 {
-	unsigned long offset, limit, flags;
-	dma_addr_t address;
-	bool flush = false;
-
-	offset = range->offset >> PAGE_SHIFT;
-	limit  = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
-					dma_mask >> PAGE_SHIFT);
-
-	if (trylock) {
-		if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
-			return -1;
-	} else {
-		spin_lock_irqsave(&range->bitmap_lock, flags);
-	}
-
-	address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
-				   pages, offset, boundary_size, align_mask);
-	if (address == -1) {
-		/* Nothing found, retry one time */
-		address = iommu_area_alloc(range->bitmap, limit,
-					   0, pages, offset, boundary_size,
-					   align_mask);
-		flush = true;
-	}
-
-	if (address != -1)
-		range->next_bit = address + pages;
-
-	spin_unlock_irqrestore(&range->bitmap_lock, flags);
-
-	if (flush) {
-		domain_flush_tlb(&dom->domain);
-		domain_flush_complete(&dom->domain);
-	}
-
-	return address;
-}
-
-static unsigned long dma_ops_area_alloc(struct device *dev,
-					struct dma_ops_domain *dom,
-					unsigned int pages,
-					unsigned long align_mask,
-					u64 dma_mask)
-{
-	unsigned long boundary_size, mask;
-	unsigned long address = -1;
-	bool first = true;
-	u32 start, i;
-
-	preempt_disable();
-
-	mask = dma_get_seg_boundary(dev);
-
-again:
-	start = this_cpu_read(*dom->next_index);
-
-	/* Sanity check - is it really necessary? */
-	if (unlikely(start > APERTURE_MAX_RANGES)) {
-		start = 0;
-		this_cpu_write(*dom->next_index, 0);
-	}
-
-	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
-				   1UL << (BITS_PER_LONG - PAGE_SHIFT);
-
-	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
-		struct aperture_range *range;
-		int index;
-
-		index = (start + i) % APERTURE_MAX_RANGES;
-
-		range = dom->aperture[index];
-
-		if (!range || range->offset >= dma_mask)
-			continue;
-
-		address = dma_ops_aperture_alloc(dom, range, pages,
-						 dma_mask, boundary_size,
-						 align_mask, first);
-		if (address != -1) {
-			address = range->offset + (address << PAGE_SHIFT);
-			this_cpu_write(*dom->next_index, index);
-			break;
-		}
-	}
-
-	if (address == -1 && first) {
-		first = false;
-		goto again;
-	}
-
-	preempt_enable();
-
-	return address;
-}
-
-static unsigned long dma_ops_alloc_addresses(struct device *dev,
-					     struct dma_ops_domain *dom,
-					     unsigned int pages,
-					     unsigned long align_mask,
-					     u64 dma_mask)
-{
-	unsigned long address = -1;
-
-	while (address == -1) {
-		address = dma_ops_area_alloc(dev, dom, pages,
-					     align_mask, dma_mask);
-
-		if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
-			break;
-	}
-
-	if (unlikely(address == -1))
-		address = DMA_ERROR_CODE;
-
-	WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
-
-	return address;
-}
-
-/*
- * The address free function.
- *
- * called with domain->lock held
- */
-static void dma_ops_free_addresses(struct dma_ops_domain *dom,
-				   unsigned long address,
-				   unsigned int pages)
-{
-	unsigned i = address >> APERTURE_RANGE_SHIFT;
-	struct aperture_range *range = dom->aperture[i];
-	unsigned long flags;
-
-	BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
-
-#ifdef CONFIG_IOMMU_STRESS
-	if (i < 4)
-		return;
-#endif
-
-	if (amd_iommu_unmap_flush) {
-		domain_flush_tlb(&dom->domain);
-		domain_flush_complete(&dom->domain);
-	}
-
-	address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
-
-	spin_lock_irqsave(&range->bitmap_lock, flags);
-	if (address + pages > range->next_bit)
-		range->next_bit = address + pages;
-	bitmap_clear(range->bitmap, address, pages);
-	spin_unlock_irqrestore(&range->bitmap_lock, flags);
+	pages = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
 
+	free_iova_fast(&dma_dom->iovad, address, pages);
 }
 
 /****************************************************************************
@@ -1940,44 +1645,18 @@ static void free_gcr3_table(struct protection_domain *domain)
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
-	int i;
-
 	if (!dom)
 		return;
 
-	free_percpu(dom->next_index);
-
 	del_domain_from_list(&dom->domain);
 
-	free_pagetable(&dom->domain);
+	put_iova_domain(&dom->iovad);
 
-	for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
-		if (!dom->aperture[i])
-			continue;
-		free_page((unsigned long)dom->aperture[i]->bitmap);
-		kfree(dom->aperture[i]);
-	}
+	free_pagetable(&dom->domain);
 
 	kfree(dom);
 }
 
-static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
-					  int max_apertures)
-{
-	int ret, i, apertures;
-
-	apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-	ret       = 0;
-
-	for (i = apertures; i < max_apertures; ++i) {
-		ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
 /*
  * Allocates a new protection domain usable for the dma_ops functions.
  * It also initializes the page table and the address allocator data
@@ -1986,7 +1665,6 @@ static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
 static struct dma_ops_domain *dma_ops_domain_alloc(void)
 {
 	struct dma_ops_domain *dma_dom;
-	int cpu;
 
 	dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
 	if (!dma_dom)
@@ -1995,30 +1673,19 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
 	if (protection_domain_init(&dma_dom->domain))
 		goto free_dma_dom;
 
-	dma_dom->next_index = alloc_percpu(u32);
-	if (!dma_dom->next_index)
-		goto free_dma_dom;
-
-	dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
+	dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
 	dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
 	dma_dom->domain.flags = PD_DMA_OPS_MASK;
-	dma_dom->domain.priv = dma_dom;
 	if (!dma_dom->domain.pt_root)
 		goto free_dma_dom;
 
-	add_domain_to_list(&dma_dom->domain);
-
-	if (alloc_new_range(dma_dom, true, GFP_KERNEL))
-		goto free_dma_dom;
+	init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
+			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	/*
-	 * mark the first page as allocated so we never return 0 as
-	 * a valid dma-address. So we can use 0 as error value
-	 */
-	dma_dom->aperture[0]->bitmap[0] = 1;
+	/* Initialize reserved ranges */
+	copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
 
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(dma_dom->next_index, cpu) = 0;
+	add_domain_to_list(&dma_dom->domain);
 
 	return dma_dom;
 
@@ -2284,13 +1951,17 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev)
 static int attach_device(struct device *dev,
 			 struct protection_domain *domain)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *pdev;
 	struct iommu_dev_data *dev_data;
 	unsigned long flags;
 	int ret;
 
 	dev_data = get_dev_data(dev);
 
+	if (!dev_is_pci(dev))
+		goto skip_ats_check;
+
+	pdev = to_pci_dev(dev);
 	if (domain->flags & PD_IOMMUV2_MASK) {
 		if (!dev_data->passthrough)
 			return -EINVAL;
@@ -2309,6 +1980,7 @@ static int attach_device(struct device *dev,
 		dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
 	}
 
+skip_ats_check:
 	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev_data, domain);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
@@ -2365,6 +2037,9 @@ static void detach_device(struct device *dev)
 	__detach_device(dev_data);
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
+	if (!dev_is_pci(dev))
+		return;
+
 	if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
 		pdev_iommuv2_disable(to_pci_dev(dev));
 	else if (dev_data->ats.enabled)
@@ -2378,13 +2053,15 @@ static int amd_iommu_add_device(struct device *dev)
 	struct iommu_dev_data *dev_data;
 	struct iommu_domain *domain;
 	struct amd_iommu *iommu;
-	u16 devid;
-	int ret;
+	int ret, devid;
 
 	if (!check_device(dev) || get_dev_data(dev))
 		return 0;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return devid;
+
 	iommu = amd_iommu_rlookup_table[devid];
 
 	ret = iommu_init_device(dev);
@@ -2422,24 +2099,121 @@ out:
 static void amd_iommu_remove_device(struct device *dev)
 {
 	struct amd_iommu *iommu;
-	u16 devid;
+	int devid;
 
 	if (!check_device(dev))
 		return;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
+
 	iommu = amd_iommu_rlookup_table[devid];
 
 	iommu_uninit_device(dev);
 	iommu_completion_wait(iommu);
 }
 
+static struct iommu_group *amd_iommu_device_group(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return pci_device_group(dev);
+
+	return acpihid_device_group(dev);
+}
+
 /*****************************************************************************
  *
  * The next functions belong to the dma_ops mapping/unmapping code.
  *
  *****************************************************************************/
 
+static void __queue_flush(struct flush_queue *queue)
+{
+	struct protection_domain *domain;
+	unsigned long flags;
+	int idx;
+
+	/* First flush TLB of all known domains */
+	spin_lock_irqsave(&amd_iommu_pd_lock, flags);
+	list_for_each_entry(domain, &amd_iommu_pd_list, list)
+		domain_flush_tlb(domain);
+	spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
+
+	/* Wait until flushes have completed */
+	domain_flush_complete(NULL);
+
+	for (idx = 0; idx < queue->next; ++idx) {
+		struct flush_queue_entry *entry;
+
+		entry = queue->entries + idx;
+
+		free_iova_fast(&entry->dma_dom->iovad,
+				entry->iova_pfn,
+				entry->pages);
+
+		/* Not really necessary, just to make sure we catch any bugs */
+		entry->dma_dom = NULL;
+	}
+
+	queue->next = 0;
+}
+
+static void queue_flush_all(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue;
+		unsigned long flags;
+
+		queue = per_cpu_ptr(&flush_queue, cpu);
+		spin_lock_irqsave(&queue->lock, flags);
+		if (queue->next > 0)
+			__queue_flush(queue);
+		spin_unlock_irqrestore(&queue->lock, flags);
+	}
+}
+
+static void queue_flush_timeout(unsigned long unsused)
+{
+	atomic_set(&queue_timer_on, 0);
+	queue_flush_all();
+}
+
+static void queue_add(struct dma_ops_domain *dma_dom,
+		      unsigned long address, unsigned long pages)
+{
+	struct flush_queue_entry *entry;
+	struct flush_queue *queue;
+	unsigned long flags;
+	int idx;
+
+	pages     = __roundup_pow_of_two(pages);
+	address >>= PAGE_SHIFT;
+
+	queue = get_cpu_ptr(&flush_queue);
+	spin_lock_irqsave(&queue->lock, flags);
+
+	if (queue->next == FLUSH_QUEUE_SIZE)
+		__queue_flush(queue);
+
+	idx   = queue->next++;
+	entry = queue->entries + idx;
+
+	entry->iova_pfn = address;
+	entry->pages    = pages;
+	entry->dma_dom  = dma_dom;
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
+		mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
+
+	put_cpu_ptr(&flush_queue);
+}
+
+
 /*
  * In the dma_ops path we only have the struct device. This function
  * finds the corresponding IOMMU, the protection domain and the
@@ -2450,16 +2224,11 @@ static void amd_iommu_remove_device(struct device *dev)
 static struct protection_domain *get_domain(struct device *dev)
 {
 	struct protection_domain *domain;
-	struct iommu_domain *io_domain;
 
 	if (!check_device(dev))
 		return ERR_PTR(-EINVAL);
 
-	io_domain = iommu_get_domain_for_dev(dev);
-	if (!io_domain)
-		return NULL;
-
-	domain = to_pdomain(io_domain);
+	domain = get_dev_data(dev)->domain;
 	if (!dma_ops_domain(domain))
 		return ERR_PTR(-EBUSY);
 
@@ -2470,8 +2239,15 @@ static void update_device_table(struct protection_domain *domain)
 {
 	struct iommu_dev_data *dev_data;
 
-	list_for_each_entry(dev_data, &domain->dev_list, list)
+	list_for_each_entry(dev_data, &domain->dev_list, list) {
 		set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+
+		if (dev_data->devid == dev_data->alias)
+			continue;
+
+		/* There is an alias, update device table entry for it */
+		set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled);
+	}
 }
 
 static void update_domain(struct protection_domain *domain)
@@ -2487,94 +2263,17 @@ static void update_domain(struct protection_domain *domain)
 	domain->updated = false;
 }
 
-/*
- * This function fetches the PTE for a given address in the aperture
- */
-static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
-			    unsigned long address)
+static int dir2prot(enum dma_data_direction direction)
 {
-	struct aperture_range *aperture;
-	u64 *pte, *pte_page;
-
-	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
-	if (!aperture)
-		return NULL;
-
-	pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
-	if (!pte) {
-		pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page,
-				GFP_ATOMIC);
-		aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
-	} else
-		pte += PM_LEVEL_INDEX(0, address);
-
-	update_domain(&dom->domain);
-
-	return pte;
-}
-
-/*
- * This is the generic map function. It maps one 4kb page at paddr to
- * the given address in the DMA address space for the domain.
- */
-static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
-				     unsigned long address,
-				     phys_addr_t paddr,
-				     int direction)
-{
-	u64 *pte, __pte;
-
-	WARN_ON(address > dom->aperture_size);
-
-	paddr &= PAGE_MASK;
-
-	pte  = dma_ops_get_pte(dom, address);
-	if (!pte)
-		return DMA_ERROR_CODE;
-
-	__pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
-
 	if (direction == DMA_TO_DEVICE)
-		__pte |= IOMMU_PTE_IR;
+		return IOMMU_PROT_IR;
 	else if (direction == DMA_FROM_DEVICE)
-		__pte |= IOMMU_PTE_IW;
+		return IOMMU_PROT_IW;
 	else if (direction == DMA_BIDIRECTIONAL)
-		__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
-
-	WARN_ON_ONCE(*pte);
-
-	*pte = __pte;
-
-	return (dma_addr_t)address;
-}
-
-/*
- * The generic unmapping function for on page in the DMA address space.
- */
-static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
-				 unsigned long address)
-{
-	struct aperture_range *aperture;
-	u64 *pte;
-
-	if (address >= dom->aperture_size)
-		return;
-
-	aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
-	if (!aperture)
-		return;
-
-	pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
-	if (!pte)
-		return;
-
-	pte += PM_LEVEL_INDEX(0, address);
-
-	WARN_ON_ONCE(!*pte);
-
-	*pte = 0ULL;
+		return IOMMU_PROT_IW | IOMMU_PROT_IR;
+	else
+		return 0;
 }
-
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
@@ -2585,37 +2284,29 @@ static dma_addr_t __map_single(struct device *dev,
 			       struct dma_ops_domain *dma_dom,
 			       phys_addr_t paddr,
 			       size_t size,
-			       int dir,
-			       bool align,
+			       enum dma_data_direction direction,
 			       u64 dma_mask)
 {
 	dma_addr_t offset = paddr & ~PAGE_MASK;
 	dma_addr_t address, start, ret;
 	unsigned int pages;
-	unsigned long align_mask = 0;
+	int prot = 0;
 	int i;
 
 	pages = iommu_num_pages(paddr, size, PAGE_SIZE);
 	paddr &= PAGE_MASK;
 
-	INC_STATS_COUNTER(total_map_requests);
-
-	if (pages > 1)
-		INC_STATS_COUNTER(cross_page);
-
-	if (align)
-		align_mask = (1UL << get_order(size)) - 1;
-
-	address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
-					  dma_mask);
-
+	address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
 	if (address == DMA_ERROR_CODE)
 		goto out;
 
+	prot = dir2prot(direction);
+
 	start = address;
 	for (i = 0; i < pages; ++i) {
-		ret = dma_ops_domain_map(dma_dom, start, paddr, dir);
-		if (ret == DMA_ERROR_CODE)
+		ret = iommu_map_page(&dma_dom->domain, start, paddr,
+				     PAGE_SIZE, prot, GFP_ATOMIC);
+		if (ret)
 			goto out_unmap;
 
 		paddr += PAGE_SIZE;
@@ -2623,8 +2314,6 @@ static dma_addr_t __map_single(struct device *dev,
 	}
 	address += offset;
 
-	ADD_STATS_COUNTER(alloced_io_mem, size);
-
 	if (unlikely(amd_iommu_np_cache)) {
 		domain_flush_pages(&dma_dom->domain, address, size);
 		domain_flush_complete(&dma_dom->domain);
@@ -2637,10 +2326,13 @@ out_unmap:
 
 	for (--i; i >= 0; --i) {
 		start -= PAGE_SIZE;
-		dma_ops_domain_unmap(dma_dom, start);
+		iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
 	}
 
-	dma_ops_free_addresses(dma_dom, address, pages);
+	domain_flush_tlb(&dma_dom->domain);
+	domain_flush_complete(&dma_dom->domain);
+
+	dma_ops_free_iova(dma_dom, address, pages);
 
 	return DMA_ERROR_CODE;
 }
@@ -2658,23 +2350,23 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 	dma_addr_t i, start;
 	unsigned int pages;
 
-	if ((dma_addr == DMA_ERROR_CODE) ||
-	    (dma_addr + size > dma_dom->aperture_size))
-		return;
-
 	flush_addr = dma_addr;
 	pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 	dma_addr &= PAGE_MASK;
 	start = dma_addr;
 
 	for (i = 0; i < pages; ++i) {
-		dma_ops_domain_unmap(dma_dom, start);
+		iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
 		start += PAGE_SIZE;
 	}
 
-	SUB_STATS_COUNTER(alloced_io_mem, size);
-
-	dma_ops_free_addresses(dma_dom, dma_addr, pages);
+	if (amd_iommu_unmap_flush) {
+		dma_ops_free_iova(dma_dom, dma_addr, pages);
+		domain_flush_tlb(&dma_dom->domain);
+		domain_flush_complete(&dma_dom->domain);
+	} else {
+		queue_add(dma_dom, dma_addr, pages);
+	}
 }
 
 /*
@@ -2683,14 +2375,13 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
 static dma_addr_t map_page(struct device *dev, struct page *page,
 			   unsigned long offset, size_t size,
 			   enum dma_data_direction dir,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
 	phys_addr_t paddr = page_to_phys(page) + offset;
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	u64 dma_mask;
 
-	INC_STATS_COUNTER(cnt_map_single);
-
 	domain = get_domain(dev);
 	if (PTR_ERR(domain) == -EINVAL)
 		return (dma_addr_t)paddr;
@@ -2698,26 +2389,53 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 		return DMA_ERROR_CODE;
 
 	dma_mask = *dev->dma_mask;
+	dma_dom = to_dma_ops_domain(domain);
 
-	return __map_single(dev, domain->priv, paddr, size, dir, false,
-			    dma_mask);
+	return __map_single(dev, dma_dom, paddr, size, dir, dma_mask);
 }
 
 /*
  * The exported unmap_single function for dma_ops.
  */
 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
-		       enum dma_data_direction dir, struct dma_attrs *attrs)
+		       enum dma_data_direction dir, unsigned long attrs)
 {
 	struct protection_domain *domain;
-
-	INC_STATS_COUNTER(cnt_unmap_single);
+	struct dma_ops_domain *dma_dom;
 
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
 
-	__unmap_single(domain->priv, dma_addr, size, dir);
+	dma_dom = to_dma_ops_domain(domain);
+
+	__unmap_single(dma_dom, dma_addr, size, dir);
+}
+
+static int sg_num_pages(struct device *dev,
+			struct scatterlist *sglist,
+			int nelems)
+{
+	unsigned long mask, boundary_size;
+	struct scatterlist *s;
+	int i, npages = 0;
+
+	mask          = dma_get_seg_boundary(dev);
+	boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
+				   1UL << (BITS_PER_LONG - PAGE_SHIFT);
+
+	for_each_sg(sglist, s, nelems, i) {
+		int p, n;
+
+		s->dma_address = npages << PAGE_SHIFT;
+		p = npages % boundary_size;
+		n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+		if (p + n > boundary_size)
+			npages += boundary_size - p;
+		npages += n;
+	}
+
+	return npages;
 }
 
 /*
@@ -2725,48 +2443,79 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
  * lists).
  */
 static int map_sg(struct device *dev, struct scatterlist *sglist,
-		  int nelems, enum dma_data_direction dir,
-		  struct dma_attrs *attrs)
+		  int nelems, enum dma_data_direction direction,
+		  unsigned long attrs)
 {
+	int mapped_pages = 0, npages = 0, prot = 0, i;
 	struct protection_domain *domain;
-	int i;
+	struct dma_ops_domain *dma_dom;
 	struct scatterlist *s;
-	phys_addr_t paddr;
-	int mapped_elems = 0;
+	unsigned long address;
 	u64 dma_mask;
 
-	INC_STATS_COUNTER(cnt_map_sg);
-
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return 0;
 
+	dma_dom  = to_dma_ops_domain(domain);
 	dma_mask = *dev->dma_mask;
 
+	npages = sg_num_pages(dev, sglist, nelems);
+
+	address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
+	if (address == DMA_ERROR_CODE)
+		goto out_err;
+
+	prot = dir2prot(direction);
+
+	/* Map all sg entries */
 	for_each_sg(sglist, s, nelems, i) {
-		paddr = sg_phys(s);
+		int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
 
-		s->dma_address = __map_single(dev, domain->priv,
-					      paddr, s->length, dir, false,
-					      dma_mask);
+		for (j = 0; j < pages; ++j) {
+			unsigned long bus_addr, phys_addr;
+			int ret;
 
-		if (s->dma_address) {
-			s->dma_length = s->length;
-			mapped_elems++;
-		} else
-			goto unmap;
+			bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
+			phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
+			ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
+			if (ret)
+				goto out_unmap;
+
+			mapped_pages += 1;
+		}
 	}
 
-	return mapped_elems;
+	/* Everything is mapped - write the right values into s->dma_address */
+	for_each_sg(sglist, s, nelems, i) {
+		s->dma_address += address + s->offset;
+		s->dma_length   = s->length;
+	}
 
-unmap:
-	for_each_sg(sglist, s, mapped_elems, i) {
-		if (s->dma_address)
-			__unmap_single(domain->priv, s->dma_address,
-				       s->dma_length, dir);
-		s->dma_address = s->dma_length = 0;
+	return nelems;
+
+out_unmap:
+	pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n",
+	       dev_name(dev), npages);
+
+	for_each_sg(sglist, s, nelems, i) {
+		int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
+
+		for (j = 0; j < pages; ++j) {
+			unsigned long bus_addr;
+
+			bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
+			iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
+
+			if (--mapped_pages)
+				goto out_free_iova;
+		}
 	}
 
+out_free_iova:
+	free_iova_fast(&dma_dom->iovad, address, npages);
+
+out_err:
 	return 0;
 }
 
@@ -2776,23 +2525,22 @@ unmap:
  */
 static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		     int nelems, enum dma_data_direction dir,
-		     struct dma_attrs *attrs)
+		     unsigned long attrs)
 {
 	struct protection_domain *domain;
-	struct scatterlist *s;
-	int i;
-
-	INC_STATS_COUNTER(cnt_unmap_sg);
+	struct dma_ops_domain *dma_dom;
+	unsigned long startaddr;
+	int npages = 2;
 
 	domain = get_domain(dev);
 	if (IS_ERR(domain))
 		return;
 
-	for_each_sg(sglist, s, nelems, i) {
-		__unmap_single(domain->priv, s->dma_address,
-			       s->dma_length, dir);
-		s->dma_address = s->dma_length = 0;
-	}
+	startaddr = sg_dma_address(sglist) & PAGE_MASK;
+	dma_dom   = to_dma_ops_domain(domain);
+	npages    = sg_num_pages(dev, sglist, nelems);
+
+	__unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir);
 }
 
 /*
@@ -2800,14 +2548,13 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
  */
 static void *alloc_coherent(struct device *dev, size_t size,
 			    dma_addr_t *dma_addr, gfp_t flag,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	u64 dma_mask = dev->coherent_dma_mask;
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	struct page *page;
 
-	INC_STATS_COUNTER(cnt_alloc_coherent);
-
 	domain = get_domain(dev);
 	if (PTR_ERR(domain) == -EINVAL) {
 		page = alloc_pages(flag, get_order(size));
@@ -2816,6 +2563,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	} else if (IS_ERR(domain))
 		return NULL;
 
+	dma_dom   = to_dma_ops_domain(domain);
 	size	  = PAGE_ALIGN(size);
 	dma_mask  = dev->coherent_dma_mask;
 	flag     &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -2835,8 +2583,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
 	if (!dma_mask)
 		dma_mask = *dev->dma_mask;
 
-	*dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
-				 size, DMA_BIDIRECTIONAL, true, dma_mask);
+	*dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
+				 size, DMA_BIDIRECTIONAL, dma_mask);
 
 	if (*dma_addr == DMA_ERROR_CODE)
 		goto out_free;
@@ -2856,13 +2604,12 @@ out_free:
  */
 static void free_coherent(struct device *dev, size_t size,
 			  void *virt_addr, dma_addr_t dma_addr,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	struct protection_domain *domain;
+	struct dma_ops_domain *dma_dom;
 	struct page *page;
 
-	INC_STATS_COUNTER(cnt_free_coherent);
-
 	page = virt_to_page(virt_addr);
 	size = PAGE_ALIGN(size);
 
@@ -2870,7 +2617,9 @@ static void free_coherent(struct device *dev, size_t size,
 	if (IS_ERR(domain))
 		goto free_mem;
 
-	__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
+	dma_dom = to_dma_ops_domain(domain);
+
+	__unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
 
 free_mem:
 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
@@ -2886,52 +2635,121 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
 	return check_device(dev);
 }
 
-static int set_dma_mask(struct device *dev, u64 mask)
+static struct dma_map_ops amd_iommu_dma_ops = {
+	.alloc		= alloc_coherent,
+	.free		= free_coherent,
+	.map_page	= map_page,
+	.unmap_page	= unmap_page,
+	.map_sg		= map_sg,
+	.unmap_sg	= unmap_sg,
+	.dma_supported	= amd_iommu_dma_supported,
+};
+
+static int init_reserved_iova_ranges(void)
 {
-	struct protection_domain *domain;
-	int max_apertures = 1;
+	struct pci_dev *pdev = NULL;
+	struct iova *val;
 
-	domain = get_domain(dev);
-	if (IS_ERR(domain))
-		return PTR_ERR(domain);
+	init_iova_domain(&reserved_iova_ranges, PAGE_SIZE,
+			 IOVA_START_PFN, DMA_32BIT_PFN);
 
-	if (mask == DMA_BIT_MASK(64))
-		max_apertures = 8;
-	else if (mask > DMA_BIT_MASK(32))
-		max_apertures = 4;
+	lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
+			  &reserved_rbtree_key);
+
+	/* MSI memory range */
+	val = reserve_iova(&reserved_iova_ranges,
+			   IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END));
+	if (!val) {
+		pr_err("Reserving MSI range failed\n");
+		return -ENOMEM;
+	}
+
+	/* HT memory range */
+	val = reserve_iova(&reserved_iova_ranges,
+			   IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END));
+	if (!val) {
+		pr_err("Reserving HT range failed\n");
+		return -ENOMEM;
+	}
 
 	/*
-	 * To prevent lock contention it doesn't make sense to allocate more
-	 * apertures than online cpus
+	 * Memory used for PCI resources
+	 * FIXME: Check whether we can reserve the PCI-hole completly
 	 */
-	if (max_apertures > num_online_cpus())
-		max_apertures = num_online_cpus();
+	for_each_pci_dev(pdev) {
+		int i;
 
-	if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
-		dev_err(dev, "Can't allocate %d iommu apertures\n",
-			max_apertures);
+		for (i = 0; i < PCI_NUM_RESOURCES; ++i) {
+			struct resource *r = &pdev->resource[i];
+
+			if (!(r->flags & IORESOURCE_MEM))
+				continue;
+
+			val = reserve_iova(&reserved_iova_ranges,
+					   IOVA_PFN(r->start),
+					   IOVA_PFN(r->end));
+			if (!val) {
+				pr_err("Reserve pci-resource range failed\n");
+				return -ENOMEM;
+			}
+		}
+	}
 
 	return 0;
 }
 
-static struct dma_map_ops amd_iommu_dma_ops = {
-	.alloc		= alloc_coherent,
-	.free		= free_coherent,
-	.map_page	= map_page,
-	.unmap_page	= unmap_page,
-	.map_sg		= map_sg,
-	.unmap_sg	= unmap_sg,
-	.dma_supported	= amd_iommu_dma_supported,
-	.set_dma_mask	= set_dma_mask,
-};
-
 int __init amd_iommu_init_api(void)
 {
-	return bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
+	int ret, cpu, err = 0;
+
+	ret = iova_cache_get();
+	if (ret)
+		return ret;
+
+	ret = init_reserved_iova_ranges();
+	if (ret)
+		return ret;
+
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
+
+		queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
+					 sizeof(*queue->entries),
+					 GFP_KERNEL);
+		if (!queue->entries)
+			goto out_put_iova;
+
+		spin_lock_init(&queue->lock);
+	}
+
+	err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
+	if (err)
+		return err;
+#ifdef CONFIG_ARM_AMBA
+	err = bus_set_iommu(&amba_bustype, &amd_iommu_ops);
+	if (err)
+		return err;
+#endif
+	err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
+	if (err)
+		return err;
+	return 0;
+
+out_put_iova:
+	for_each_possible_cpu(cpu) {
+		struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
+
+		kfree(queue->entries);
+	}
+
+	return -ENOMEM;
 }
 
 int __init amd_iommu_init_dma_ops(void)
 {
+	setup_timer(&queue_timer, queue_flush_timeout, 0);
+	atomic_set(&queue_timer_on, 0);
+
 	swiotlb        = iommu_pass_through ? 1 : 0;
 	iommu_detected = 1;
 
@@ -2944,14 +2762,13 @@ int __init amd_iommu_init_dma_ops(void)
 	if (!swiotlb)
 		dma_ops = &nommu_dma_ops;
 
-	amd_iommu_stats_init();
-
 	if (amd_iommu_unmap_flush)
 		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
 	else
 		pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
 
 	return 0;
+
 }
 
 /*****************************************************************************
@@ -3074,9 +2891,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 static void amd_iommu_domain_free(struct iommu_domain *dom)
 {
 	struct protection_domain *domain;
-
-	if (!dom)
-		return;
+	struct dma_ops_domain *dma_dom;
 
 	domain = to_pdomain(dom);
 
@@ -3085,13 +2900,31 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
 
 	BUG_ON(domain->dev_cnt != 0);
 
-	if (domain->mode != PAGE_MODE_NONE)
-		free_pagetable(domain);
+	if (!dom)
+		return;
 
-	if (domain->flags & PD_IOMMUV2_MASK)
-		free_gcr3_table(domain);
+	switch (dom->type) {
+	case IOMMU_DOMAIN_DMA:
+		/*
+		 * First make sure the domain is no longer referenced from the
+		 * flush queue
+		 */
+		queue_flush_all();
+
+		/* Now release the domain */
+		dma_dom = to_dma_ops_domain(domain);
+		dma_ops_domain_free(dma_dom);
+		break;
+	default:
+		if (domain->mode != PAGE_MODE_NONE)
+			free_pagetable(domain);
+
+		if (domain->flags & PD_IOMMUV2_MASK)
+			free_gcr3_table(domain);
 
-	protection_domain_free(domain);
+		protection_domain_free(domain);
+		break;
+	}
 }
 
 static void amd_iommu_detach_device(struct iommu_domain *dom,
@@ -3099,12 +2932,14 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 {
 	struct iommu_dev_data *dev_data = dev->archdata.iommu;
 	struct amd_iommu *iommu;
-	u16 devid;
+	int devid;
 
 	if (!check_device(dev))
 		return;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
 
 	if (dev_data->domain != NULL)
 		detach_device(dev);
@@ -3159,7 +2994,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 		prot |= IOMMU_PROT_IW;
 
 	mutex_lock(&domain->api_lock);
-	ret = iommu_map_page(domain, iova, paddr, prot, page_size);
+	ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
 	mutex_unlock(&domain->api_lock);
 
 	return ret;
@@ -3222,9 +3057,11 @@ static void amd_iommu_get_dm_regions(struct device *dev,
 				     struct list_head *head)
 {
 	struct unity_map_entry *entry;
-	u16 devid;
+	int devid;
 
 	devid = get_device_id(dev);
+	if (devid < 0)
+		return;
 
 	list_for_each_entry(entry, &amd_iommu_unity_map, list) {
 		struct iommu_dm_region *region;
@@ -3259,6 +3096,19 @@ static void amd_iommu_put_dm_regions(struct device *dev,
 		kfree(entry);
 }
 
+static void amd_iommu_apply_dm_region(struct device *dev,
+				      struct iommu_domain *domain,
+				      struct iommu_dm_region *region)
+{
+	struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
+	unsigned long start, end;
+
+	start = IOVA_PFN(region->start);
+	end   = IOVA_PFN(region->start + region->length);
+
+	WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
+}
+
 static const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3271,9 +3121,10 @@ static const struct iommu_ops amd_iommu_ops = {
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.add_device = amd_iommu_add_device,
 	.remove_device = amd_iommu_remove_device,
-	.device_group = pci_device_group,
+	.device_group = amd_iommu_device_group,
 	.get_dm_regions = amd_iommu_get_dm_regions,
 	.put_dm_regions = amd_iommu_put_dm_regions,
+	.apply_dm_region = amd_iommu_apply_dm_region,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
 
@@ -3432,8 +3283,6 @@ out:
 static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
 				  u64 address)
 {
-	INC_STATS_COUNTER(invalidate_iotlb);
-
 	return __flush_pasid(domain, pasid, address, false);
 }
 
@@ -3454,8 +3303,6 @@ EXPORT_SYMBOL(amd_iommu_flush_page);
 
 static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
 {
-	INC_STATS_COUNTER(invalidate_iotlb_all);
-
 	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 			     true);
 }
@@ -3575,8 +3422,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
 	struct amd_iommu *iommu;
 	struct iommu_cmd cmd;
 
-	INC_STATS_COUNTER(complete_ppr);
-
 	dev_data = get_dev_data(&pdev->dev);
 	iommu    = amd_iommu_rlookup_table[dev_data->devid];
 
@@ -3926,6 +3771,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
 	case X86_IRQ_ALLOC_TYPE_MSI:
 	case X86_IRQ_ALLOC_TYPE_MSIX:
 		devid = get_device_id(&info->msi_dev->dev);
+		if (devid < 0)
+			return NULL;
+
 		iommu = amd_iommu_rlookup_table[devid];
 		if (iommu)
 			return iommu->msi_domain;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index bf4959f4225b..59741ead7e15 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -44,7 +44,7 @@
  */
 #define IVRS_HEADER_LENGTH 48
 
-#define ACPI_IVHD_TYPE                  0x10
+#define ACPI_IVHD_TYPE_MAX_SUPPORTED	0x40
 #define ACPI_IVMD_TYPE_ALL              0x20
 #define ACPI_IVMD_TYPE                  0x21
 #define ACPI_IVMD_TYPE_RANGE            0x22
@@ -58,6 +58,11 @@
 #define IVHD_DEV_EXT_SELECT             0x46
 #define IVHD_DEV_EXT_SELECT_RANGE       0x47
 #define IVHD_DEV_SPECIAL		0x48
+#define IVHD_DEV_ACPI_HID		0xf0
+
+#define UID_NOT_PRESENT                 0
+#define UID_IS_INTEGER                  1
+#define UID_IS_CHARACTER                2
 
 #define IVHD_SPECIAL_IOAPIC		1
 #define IVHD_SPECIAL_HPET		2
@@ -99,7 +104,11 @@ struct ivhd_header {
 	u64 mmio_phys;
 	u16 pci_seg;
 	u16 info;
-	u32 efr;
+	u32 efr_attr;
+
+	/* Following only valid on IVHD type 11h and 40h */
+	u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
+	u64 res;
 } __attribute__((packed));
 
 /*
@@ -111,6 +120,11 @@ struct ivhd_entry {
 	u16 devid;
 	u8 flags;
 	u32 ext;
+	u32 hidh;
+	u64 cid;
+	u8 uidf;
+	u8 uidl;
+	u8 uid;
 } __attribute__((packed));
 
 /*
@@ -133,6 +147,7 @@ bool amd_iommu_irq_remap __read_mostly;
 
 static bool amd_iommu_detected;
 static bool __initdata amd_iommu_disabled;
+static int amd_iommu_target_ivhd_type;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 					   to handle */
@@ -218,8 +233,12 @@ enum iommu_init_state {
 #define EARLY_MAP_SIZE		4
 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
+static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
+
 static int __initdata early_ioapic_map_size;
 static int __initdata early_hpet_map_size;
+static int __initdata early_acpihid_map_size;
+
 static bool __initdata cmdline_maps;
 
 static enum iommu_init_state init_state = IOMMU_START_STATE;
@@ -394,6 +413,22 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 }
 
+static inline u32 get_ivhd_header_size(struct ivhd_header *h)
+{
+	u32 size = 0;
+
+	switch (h->type) {
+	case 0x10:
+		size = 24;
+		break;
+	case 0x11:
+	case 0x40:
+		size = 40;
+		break;
+	}
+	return size;
+}
+
 /****************************************************************************
  *
  * The functions below belong to the first pass of AMD IOMMU ACPI table
@@ -408,7 +443,15 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
  */
 static inline int ivhd_entry_length(u8 *ivhd)
 {
-	return 0x04 << (*ivhd >> 6);
+	u32 type = ((struct ivhd_entry *)ivhd)->type;
+
+	if (type < 0x80) {
+		return 0x04 << (*ivhd >> 6);
+	} else if (type == IVHD_DEV_ACPI_HID) {
+		/* For ACPI_HID, offset 21 is uid len */
+		return *((u8 *)ivhd + 21) + 22;
+	}
+	return 0;
 }
 
 /*
@@ -420,7 +463,14 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 	u8 *p = (void *)h, *end = (void *)h;
 	struct ivhd_entry *dev;
 
-	p += sizeof(*h);
+	u32 ivhd_size = get_ivhd_header_size(h);
+
+	if (!ivhd_size) {
+		pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+		return -EINVAL;
+	}
+
+	p += ivhd_size;
 	end += h->length;
 
 	while (p < end) {
@@ -448,6 +498,22 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 	return 0;
 }
 
+static int __init check_ivrs_checksum(struct acpi_table_header *table)
+{
+	int i;
+	u8 checksum = 0, *p = (u8 *)table;
+
+	for (i = 0; i < table->length; ++i)
+		checksum += p[i];
+	if (checksum != 0) {
+		/* ACPI table corrupt */
+		pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 /*
  * Iterate over all IVHD entries in the ACPI table and find the highest device
  * id which we need to handle. This is the first of three functions which parse
@@ -455,31 +521,19 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
  */
 static int __init find_last_devid_acpi(struct acpi_table_header *table)
 {
-	int i;
-	u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
+	u8 *p = (u8 *)table, *end = (u8 *)table;
 	struct ivhd_header *h;
 
-	/*
-	 * Validate checksum here so we don't need to do it when
-	 * we actually parse the table
-	 */
-	for (i = 0; i < table->length; ++i)
-		checksum += p[i];
-	if (checksum != 0)
-		/* ACPI table corrupt */
-		return -ENODEV;
-
 	p += IVRS_HEADER_LENGTH;
 
 	end += table->length;
 	while (p < end) {
 		h = (struct ivhd_header *)p;
-		switch (h->type) {
-		case ACPI_IVHD_TYPE:
-			find_last_devid_from_ivhd(h);
-			break;
-		default:
-			break;
+		if (h->type == amd_iommu_target_ivhd_type) {
+			int ret = find_last_devid_from_ivhd(h);
+
+			if (ret)
+				return ret;
 		}
 		p += h->length;
 	}
@@ -724,6 +778,42 @@ static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
 	return 0;
 }
 
+static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
+				      bool cmd_line)
+{
+	struct acpihid_map_entry *entry;
+	struct list_head *list = &acpihid_map;
+
+	list_for_each_entry(entry, list, list) {
+		if (strcmp(entry->hid, hid) ||
+		    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
+		    !entry->cmd_line)
+			continue;
+
+		pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n",
+			hid, uid);
+		*devid = entry->devid;
+		return 0;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	memcpy(entry->uid, uid, strlen(uid));
+	memcpy(entry->hid, hid, strlen(hid));
+	entry->devid = *devid;
+	entry->cmd_line	= cmd_line;
+	entry->root_devid = (entry->devid & (~0x7));
+
+	pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n",
+		entry->cmd_line ? "cmd" : "ivrs",
+		entry->hid, entry->uid, entry->root_devid);
+
+	list_add_tail(&entry->list, list);
+	return 0;
+}
+
 static int __init add_early_maps(void)
 {
 	int i, ret;
@@ -746,6 +836,15 @@ static int __init add_early_maps(void)
 			return ret;
 	}
 
+	for (i = 0; i < early_acpihid_map_size; ++i) {
+		ret = add_acpi_hid_device(early_acpihid_map[i].hid,
+					  early_acpihid_map[i].uid,
+					  &early_acpihid_map[i].devid,
+					  early_acpihid_map[i].cmd_line);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -785,6 +884,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	u32 dev_i, ext_flags = 0;
 	bool alias = false;
 	struct ivhd_entry *e;
+	u32 ivhd_size;
 	int ret;
 
 
@@ -800,7 +900,14 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 	/*
 	 * Done. Now parse the device entries
 	 */
-	p += sizeof(struct ivhd_header);
+	ivhd_size = get_ivhd_header_size(h);
+	if (!ivhd_size) {
+		pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
+		return -EINVAL;
+	}
+
+	p += ivhd_size;
+
 	end += h->length;
 
 
@@ -958,6 +1065,70 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 
 			break;
 		}
+		case IVHD_DEV_ACPI_HID: {
+			u16 devid;
+			u8 hid[ACPIHID_HID_LEN] = {0};
+			u8 uid[ACPIHID_UID_LEN] = {0};
+			int ret;
+
+			if (h->type != 0x40) {
+				pr_err(FW_BUG "Invalid IVHD device type %#x\n",
+				       e->type);
+				break;
+			}
+
+			memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
+			hid[ACPIHID_HID_LEN - 1] = '\0';
+
+			if (!(*hid)) {
+				pr_err(FW_BUG "Invalid HID.\n");
+				break;
+			}
+
+			switch (e->uidf) {
+			case UID_NOT_PRESENT:
+
+				if (e->uidl != 0)
+					pr_warn(FW_BUG "Invalid UID length.\n");
+
+				break;
+			case UID_IS_INTEGER:
+
+				sprintf(uid, "%d", e->uid);
+
+				break;
+			case UID_IS_CHARACTER:
+
+				memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
+				uid[ACPIHID_UID_LEN - 1] = '\0';
+
+				break;
+			default:
+				break;
+			}
+
+			devid = e->devid;
+			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
+				    hid, uid,
+				    PCI_BUS_NUM(devid),
+				    PCI_SLOT(devid),
+				    PCI_FUNC(devid));
+
+			flags = e->flags;
+
+			ret = add_acpi_hid_device(hid, uid, &devid, false);
+			if (ret)
+				return ret;
+
+			/*
+			 * add_special_device might update the devid in case a
+			 * command-line override is present. So call
+			 * set_dev_entry_from_acpi after add_special_device.
+			 */
+			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+
+			break;
+		}
 		default:
 			break;
 		}
@@ -1078,13 +1249,25 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	iommu->pci_seg = h->pci_seg;
 	iommu->mmio_phys = h->mmio_phys;
 
-	/* Check if IVHD EFR contains proper max banks/counters */
-	if ((h->efr != 0) &&
-	    ((h->efr & (0xF << 13)) != 0) &&
-	    ((h->efr & (0x3F << 17)) != 0)) {
-		iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
-	} else {
-		iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+	switch (h->type) {
+	case 0x10:
+		/* Check if IVHD EFR contains proper max banks/counters */
+		if ((h->efr_attr != 0) &&
+		    ((h->efr_attr & (0xF << 13)) != 0) &&
+		    ((h->efr_attr & (0x3F << 17)) != 0))
+			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
+		else
+			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+		break;
+	case 0x11:
+	case 0x40:
+		if (h->efr_reg & (1 << 9))
+			iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
+		else
+			iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
@@ -1117,6 +1300,32 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	return 0;
 }
 
+/**
+ * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
+ * @ivrs          Pointer to the IVRS header
+ *
+ * This function search through all IVDB of the maximum supported IVHD
+ */
+static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
+{
+	u8 *base = (u8 *)ivrs;
+	struct ivhd_header *ivhd = (struct ivhd_header *)
+					(base + IVRS_HEADER_LENGTH);
+	u8 last_type = ivhd->type;
+	u16 devid = ivhd->devid;
+
+	while (((u8 *)ivhd - base < ivrs->length) &&
+	       (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
+		u8 *p = (u8 *) ivhd;
+
+		if (ivhd->devid == devid)
+			last_type = ivhd->type;
+		ivhd = (struct ivhd_header *)(p + ivhd->length);
+	}
+
+	return last_type;
+}
+
 /*
  * Iterates over all IOMMU entries in the ACPI table, allocates the
  * IOMMU structure and initializes it with init_iommu_one()
@@ -1133,8 +1342,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 
 	while (p < end) {
 		h = (struct ivhd_header *)p;
-		switch (*p) {
-		case ACPI_IVHD_TYPE:
+		if (*p == amd_iommu_target_ivhd_type) {
 
 			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
 				    "seg: %d flags: %01x info %04x\n",
@@ -1151,9 +1359,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 			ret = init_iommu_one(iommu, h);
 			if (ret)
 				return ret;
-			break;
-		default:
-			break;
 		}
 		p += h->length;
 
@@ -1363,13 +1568,23 @@ static int __init amd_iommu_init_pci(void)
 			break;
 	}
 
+	/*
+	 * Order is important here to make sure any unity map requirements are
+	 * fulfilled. The unity mappings are created and written to the device
+	 * table during the amd_iommu_init_api() call.
+	 *
+	 * After that we call init_device_table_dma() to make sure any
+	 * uninitialized DTE will block DMA, and in the end we flush the caches
+	 * of all IOMMUs to make sure the changes to the device table are
+	 * active.
+	 */
+	ret = amd_iommu_init_api();
+
 	init_device_table_dma();
 
 	for_each_iommu(iommu)
 		iommu_flush_all_caches(iommu);
 
-	ret = amd_iommu_init_api();
-
 	if (!ret)
 		print_iommu_info();
 
@@ -1818,18 +2033,20 @@ static void __init free_dma_resources(void)
  * remapping setup code.
  *
  * This function basically parses the ACPI table for AMD IOMMU (IVRS)
- * three times:
+ * four times:
+ *
+ *	1 pass) Discover the most comprehensive IVHD type to use.
  *
- *	1 pass) Find the highest PCI device id the driver has to handle.
+ *	2 pass) Find the highest PCI device id the driver has to handle.
  *		Upon this information the size of the data structures is
  *		determined that needs to be allocated.
  *
- *	2 pass) Initialize the data structures just allocated with the
+ *	3 pass) Initialize the data structures just allocated with the
  *		information in the ACPI table about available AMD IOMMUs
  *		in the system. It also maps the PCI devices in the
  *		system to specific IOMMUs
  *
- *	3 pass) After the basic data structures are allocated and
+ *	4 pass) After the basic data structures are allocated and
  *		initialized we update them with information about memory
  *		remapping requirements parsed out of the ACPI table in
  *		this last pass.
@@ -1857,6 +2074,17 @@ static int __init early_amd_iommu_init(void)
 	}
 
 	/*
+	 * Validate checksum here so we don't need to do it when
+	 * we actually parse the table
+	 */
+	ret = check_ivrs_checksum(ivrs_base);
+	if (ret)
+		return ret;
+
+	amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
+	DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
+
+	/*
 	 * First parse ACPI tables to find the largest Bus/Dev/Func
 	 * we need to handle. Upon this information the shared data
 	 * structures for the IOMMUs in the system will be allocated
@@ -2259,10 +2487,43 @@ static int __init parse_ivrs_hpet(char *str)
 	return 1;
 }
 
+static int __init parse_ivrs_acpihid(char *str)
+{
+	u32 bus, dev, fn;
+	char *hid, *uid, *p;
+	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+	int ret, i;
+
+	ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
+	if (ret != 4) {
+		pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str);
+		return 1;
+	}
+
+	p = acpiid;
+	hid = strsep(&p, ":");
+	uid = p;
+
+	if (!hid || !(*hid) || !uid) {
+		pr_err("AMD-Vi: Invalid command line: hid or uid\n");
+		return 1;
+	}
+
+	i = early_acpihid_map_size++;
+	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
+	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
+	early_acpihid_map[i].devid =
+		((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
+	early_acpihid_map[i].cmd_line	= true;
+
+	return 1;
+}
+
 __setup("amd_iommu_dump",	parse_amd_iommu_dump);
 __setup("amd_iommu=",		parse_amd_iommu_options);
 __setup("ivrs_ioapic",		parse_ivrs_ioapic);
 __setup("ivrs_hpet",		parse_ivrs_hpet);
+__setup("ivrs_acpihid",		parse_ivrs_acpihid);
 
 IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 9d32b20a5e9a..caf5e3822715 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -421,7 +421,6 @@ struct protection_domain {
 	bool updated;		/* complete domain flush required */
 	unsigned dev_cnt;	/* devices assigned to this domain */
 	unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
-	void *priv;             /* private data */
 };
 
 /*
@@ -527,6 +526,19 @@ struct amd_iommu {
 #endif
 };
 
+#define ACPIHID_UID_LEN 256
+#define ACPIHID_HID_LEN 9
+
+struct acpihid_map_entry {
+	struct list_head list;
+	u8 uid[ACPIHID_UID_LEN];
+	u8 hid[ACPIHID_HID_LEN];
+	u16 devid;
+	u16 root_devid;
+	bool cmd_line;
+	struct iommu_group *group;
+};
+
 struct devid_map {
 	struct list_head list;
 	u8 id;
@@ -537,6 +549,7 @@ struct devid_map {
 /* Map HPET and IOAPIC ids to the devid used by the IOMMU */
 extern struct list_head ioapic_map;
 extern struct list_head hpet_map;
+extern struct list_head acpihid_map;
 
 /*
  * List with all IOMMUs in the system. This list is not locked because it is
@@ -668,30 +681,4 @@ static inline int get_hpet_devid(int id)
 	return -EINVAL;
 }
 
-#ifdef CONFIG_AMD_IOMMU_STATS
-
-struct __iommu_counter {
-	char *name;
-	struct dentry *dent;
-	u64 value;
-};
-
-#define DECLARE_STATS_COUNTER(nm) \
-	static struct __iommu_counter nm = {	\
-		.name = #nm,			\
-	}
-
-#define INC_STATS_COUNTER(name)		name.value += 1
-#define ADD_STATS_COUNTER(name, x)	name.value += (x)
-#define SUB_STATS_COUNTER(name, x)	name.value -= (x)
-
-#else /* CONFIG_AMD_IOMMU_STATS */
-
-#define DECLARE_STATS_COUNTER(name)
-#define INC_STATS_COUNTER(name)
-#define ADD_STATS_COUNTER(name, x)
-#define SUB_STATS_COUNTER(name, x)
-
-#endif /* CONFIG_AMD_IOMMU_STATS */
-
 #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 56999d2fac07..594849a3a9be 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -538,8 +538,7 @@ static void do_fault(struct work_struct *work)
 	if (access_error(vma, fault))
 		goto out;
 
-	ret = handle_mm_fault(mm, vma, address, flags);
-
+	ret = handle_mm_fault(vma, address, flags);
 out:
 	up_read(&mm->mmap_sem);
 
@@ -961,7 +960,7 @@ static int __init amd_iommu_v2_init(void)
 	spin_lock_init(&state_lock);
 
 	ret = -ENOMEM;
-	iommu_wq = create_workqueue("amd_iommu_v2");
+	iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
 	if (iommu_wq == NULL)
 		goto out;
 
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 4ff73ff64e49..641e88761319 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -590,6 +590,7 @@ struct arm_smmu_device {
 
 	unsigned long			ias; /* IPA */
 	unsigned long			oas; /* PA */
+	unsigned long			pgsize_bitmap;
 
 #define ARM_SMMU_MAX_ASIDS		(1 << 16)
 	unsigned int			asid_bits;
@@ -878,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	 * We may have concurrent producers, so we need to be careful
 	 * not to touch any of the shadow cmdq state.
 	 */
-	queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
+	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
 	dev_err(smmu->dev, "skipping command in error state:\n");
 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -889,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 		return;
 	}
 
-	queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
+	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -1033,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 		case STRTAB_STE_0_CFG_S2_TRANS:
 			ste_live = true;
 			break;
+		case STRTAB_STE_0_CFG_ABORT:
+			if (disable_bypass)
+				break;
 		default:
 			BUG(); /* STE corruption */
 		}
@@ -1476,7 +1480,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
 	asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
-	if (IS_ERR_VALUE(asid))
+	if (asid < 0)
 		return asid;
 
 	cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
@@ -1507,7 +1511,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 
 	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
-	if (IS_ERR_VALUE(vmid))
+	if (vmid < 0)
 		return vmid;
 
 	cfg->vmid	= (u16)vmid;
@@ -1516,8 +1520,6 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	return 0;
 }
 
-static struct iommu_ops arm_smmu_ops;
-
 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 {
 	int ret;
@@ -1555,7 +1557,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	}
 
 	pgtbl_cfg = (struct io_pgtable_cfg) {
-		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
+		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
@@ -1566,11 +1568,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 	if (!pgtbl_ops)
 		return -ENOMEM;
 
-	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 	smmu_domain->pgtbl_ops = pgtbl_ops;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
-	if (IS_ERR_VALUE(ret))
+	if (ret < 0)
 		free_io_pgtable_ops(pgtbl_ops);
 
 	return ret;
@@ -1643,7 +1645,7 @@ static void arm_smmu_detach_dev(struct device *dev)
 	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
 
 	smmu_group->ste.bypass = true;
-	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
+	if (arm_smmu_install_ste_for_group(smmu_group) < 0)
 		dev_warn(dev, "failed to install bypass STE\n");
 
 	smmu_group->domain = NULL;
@@ -1695,7 +1697,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	smmu_group->ste.bypass	= domain->type == IOMMU_DOMAIN_DMA;
 
 	ret = arm_smmu_install_ste_for_group(smmu_group);
-	if (IS_ERR_VALUE(ret))
+	if (ret < 0)
 		smmu_group->domain = NULL;
 
 out_unlock:
@@ -1942,6 +1944,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
 	.remove_device		= arm_smmu_remove_device,
@@ -2236,7 +2239,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 						arm_smmu_evtq_handler,
 						arm_smmu_evtq_thread,
 						0, "arm-smmu-v3-evtq", smmu);
-		if (IS_ERR_VALUE(ret))
+		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
 	}
 
@@ -2245,7 +2248,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 		ret = devm_request_irq(smmu->dev, irq,
 				       arm_smmu_cmdq_sync_handler, 0,
 				       "arm-smmu-v3-cmdq-sync", smmu);
-		if (IS_ERR_VALUE(ret))
+		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
 	}
 
@@ -2253,7 +2256,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 	if (irq) {
 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
 				       0, "arm-smmu-v3-gerror", smmu);
-		if (IS_ERR_VALUE(ret))
+		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
 	}
 
@@ -2265,7 +2268,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 							arm_smmu_priq_thread,
 							0, "arm-smmu-v3-priq",
 							smmu);
-			if (IS_ERR_VALUE(ret))
+			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
 			else
@@ -2410,7 +2413,6 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
 {
 	u32 reg;
 	bool coherent;
-	unsigned long pgsize_bitmap = 0;
 
 	/* IDR0 */
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
@@ -2541,13 +2543,16 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
 
 	/* Page sizes */
 	if (reg & IDR5_GRAN64K)
-		pgsize_bitmap |= SZ_64K | SZ_512M;
+		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
 	if (reg & IDR5_GRAN16K)
-		pgsize_bitmap |= SZ_16K | SZ_32M;
+		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
 	if (reg & IDR5_GRAN4K)
-		pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
-	arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
 
 	/* Output address size */
 	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
@@ -2685,6 +2690,8 @@ static int __init arm_smmu_init(void)
 	if (ret)
 		return ret;
 
+	pci_request_acs();
+
 	return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
 }
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7c39ac4b9c53..2db74ebc3240 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -34,6 +34,7 @@
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
@@ -49,7 +50,7 @@
 #include "io-pgtable.h"
 
 /* Maximum number of stream IDs assigned to a single device */
-#define MAX_MASTER_STREAMIDS		MAX_PHANDLE_ARGS
+#define MAX_MASTER_STREAMIDS		128
 
 /* Maximum number of context banks per SMMU */
 #define ARM_SMMU_MAX_CBS		128
@@ -71,16 +72,15 @@
 		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
 			? 0x400 : 0))
 
+/*
+ * Some 64-bit registers only make sense to write atomically, but in such
+ * cases all the data relevant to AArch32 formats lies within the lower word,
+ * therefore this actually makes more sense than it might first appear.
+ */
 #ifdef CONFIG_64BIT
-#define smmu_writeq	writeq_relaxed
+#define smmu_write_atomic_lq		writeq_relaxed
 #else
-#define smmu_writeq(reg64, addr)				\
-	do {							\
-		u64 __val = (reg64);				\
-		void __iomem *__addr = (addr);			\
-		writel_relaxed(__val >> 32, __addr + 4);	\
-		writel_relaxed(__val, __addr);			\
-	} while (0)
+#define smmu_write_atomic_lq		writel_relaxed
 #endif
 
 /* Configuration registers */
@@ -94,9 +94,13 @@
 #define sCR0_VMIDPNE			(1 << 11)
 #define sCR0_PTM			(1 << 12)
 #define sCR0_FB				(1 << 13)
+#define sCR0_VMID16EN			(1 << 31)
 #define sCR0_BSU_SHIFT			14
 #define sCR0_BSU_MASK			0x3
 
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR		0x10
+
 /* Identification registers */
 #define ARM_SMMU_GR0_ID0		0x20
 #define ARM_SMMU_GR0_ID1		0x24
@@ -116,6 +120,8 @@
 #define ID0_NTS				(1 << 28)
 #define ID0_SMS				(1 << 27)
 #define ID0_ATOSNS			(1 << 26)
+#define ID0_PTFS_NO_AARCH32		(1 << 25)
+#define ID0_PTFS_NO_AARCH32S		(1 << 24)
 #define ID0_CTTW			(1 << 14)
 #define ID0_NUMIRPT_SHIFT		16
 #define ID0_NUMIRPT_MASK		0xff
@@ -141,6 +147,10 @@
 #define ID2_PTFS_4K			(1 << 12)
 #define ID2_PTFS_16K			(1 << 13)
 #define ID2_PTFS_64K			(1 << 14)
+#define ID2_VMID16			(1 << 15)
+
+#define ID7_MAJOR_SHIFT			4
+#define ID7_MAJOR_MASK			0xf
 
 /* Global TLB invalidation */
 #define ARM_SMMU_GR0_TLBIVMID		0x64
@@ -193,12 +203,15 @@
 #define ARM_SMMU_GR1_CBA2R(n)		(0x800 + ((n) << 2))
 #define CBA2R_RW64_32BIT		(0 << 0)
 #define CBA2R_RW64_64BIT		(1 << 0)
+#define CBA2R_VMID_SHIFT		16
+#define CBA2R_VMID_MASK			0xffff
 
 /* Translation context bank */
 #define ARM_SMMU_CB_BASE(smmu)		((smmu)->base + ((smmu)->size >> 1))
 #define ARM_SMMU_CB(smmu, n)		((n) * (1 << (smmu)->pgshift))
 
 #define ARM_SMMU_CB_SCTLR		0x0
+#define ARM_SMMU_CB_ACTLR		0x4
 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_CB_TTBCR2		0x10
 #define ARM_SMMU_CB_TTBR0		0x20
@@ -206,11 +219,9 @@
 #define ARM_SMMU_CB_TTBCR		0x30
 #define ARM_SMMU_CB_S1_MAIR0		0x38
 #define ARM_SMMU_CB_S1_MAIR1		0x3c
-#define ARM_SMMU_CB_PAR_LO		0x50
-#define ARM_SMMU_CB_PAR_HI		0x54
+#define ARM_SMMU_CB_PAR			0x50
 #define ARM_SMMU_CB_FSR			0x58
-#define ARM_SMMU_CB_FAR_LO		0x60
-#define ARM_SMMU_CB_FAR_HI		0x64
+#define ARM_SMMU_CB_FAR			0x60
 #define ARM_SMMU_CB_FSYNR0		0x68
 #define ARM_SMMU_CB_S1_TLBIVA		0x600
 #define ARM_SMMU_CB_S1_TLBIASID		0x610
@@ -230,6 +241,10 @@
 #define SCTLR_M				(1 << 0)
 #define SCTLR_EAE_SBOP			(SCTLR_AFE | SCTLR_TRE)
 
+#define ARM_MMU500_ACTLR_CPRE		(1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
+
 #define CB_PAR_F			(1 << 0)
 
 #define ATSR_ACTIVE			(1 << 0)
@@ -270,10 +285,17 @@ MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum arm_smmu_arch_version {
-	ARM_SMMU_V1 = 1,
+	ARM_SMMU_V1,
+	ARM_SMMU_V1_64K,
 	ARM_SMMU_V2,
 };
 
+enum arm_smmu_implementation {
+	GENERIC_SMMU,
+	ARM_MMU500,
+	CAVIUM_SMMUV2,
+};
+
 struct arm_smmu_smr {
 	u8				idx;
 	u16				mask;
@@ -305,11 +327,18 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)
 #define ARM_SMMU_FEAT_TRANS_OPS		(1 << 5)
+#define ARM_SMMU_FEAT_VMID16		(1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K	(1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K	(1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)
 	u32				features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
 	u32				options;
 	enum arm_smmu_arch_version	version;
+	enum arm_smmu_implementation	model;
 
 	u32				num_context_banks;
 	u32				num_s2_context_banks;
@@ -322,6 +351,7 @@ struct arm_smmu_device {
 	unsigned long			va_size;
 	unsigned long			ipa_size;
 	unsigned long			pa_size;
+	unsigned long			pgsize_bitmap;
 
 	u32				num_global_irqs;
 	u32				num_context_irqs;
@@ -329,17 +359,27 @@ struct arm_smmu_device {
 
 	struct list_head		list;
 	struct rb_root			masters;
+
+	u32				cavium_id_base; /* Specific to Cavium */
+};
+
+enum arm_smmu_context_fmt {
+	ARM_SMMU_CTX_FMT_NONE,
+	ARM_SMMU_CTX_FMT_AARCH64,
+	ARM_SMMU_CTX_FMT_AARCH32_L,
+	ARM_SMMU_CTX_FMT_AARCH32_S,
 };
 
 struct arm_smmu_cfg {
 	u8				cbndx;
 	u8				irptndx;
 	u32				cbar;
+	enum arm_smmu_context_fmt	fmt;
 };
 #define INVALID_IRPTNDX			0xff
 
-#define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)
-#define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1)
+#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
+#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
 
 enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_S1 = 0,
@@ -357,7 +397,11 @@ struct arm_smmu_domain {
 	struct iommu_domain		domain;
 };
 
-static struct iommu_ops arm_smmu_ops;
+struct arm_smmu_phandle_args {
+	struct device_node *np;
+	int args_count;
+	uint32_t args[MAX_MASTER_STREAMIDS];
+};
 
 static DEFINE_SPINLOCK(arm_smmu_devices_lock);
 static LIST_HEAD(arm_smmu_devices);
@@ -367,6 +411,8 @@ struct arm_smmu_option_prop {
 	const char *prop;
 };
 
+static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
+
 static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
 	{ 0, NULL},
@@ -466,7 +512,7 @@ static int insert_smmu_master(struct arm_smmu_device *smmu,
 
 static int register_smmu_master(struct arm_smmu_device *smmu,
 				struct device *dev,
-				struct of_phandle_args *masterspec)
+				struct arm_smmu_phandle_args *masterspec)
 {
 	int i;
 	struct arm_smmu_master *master;
@@ -578,11 +624,11 @@ static void arm_smmu_tlb_inv_context(void *cookie)
 
 	if (stage1) {
 		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
-		writel_relaxed(ARM_SMMU_CB_ASID(cfg),
+		writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
 			       base + ARM_SMMU_CB_S1_TLBIASID);
 	} else {
 		base = ARM_SMMU_GR0(smmu);
-		writel_relaxed(ARM_SMMU_CB_VMID(cfg),
+		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
 			       base + ARM_SMMU_GR0_TLBIVMID);
 	}
 
@@ -602,37 +648,33 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
 
-		if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) {
+		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
 			iova &= ~12UL;
-			iova |= ARM_SMMU_CB_ASID(cfg);
+			iova |= ARM_SMMU_CB_ASID(smmu, cfg);
 			do {
 				writel_relaxed(iova, reg);
 				iova += granule;
 			} while (size -= granule);
-#ifdef CONFIG_64BIT
 		} else {
 			iova >>= 12;
-			iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48;
+			iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
 			do {
 				writeq_relaxed(iova, reg);
 				iova += granule >> 12;
 			} while (size -= granule);
-#endif
 		}
-#ifdef CONFIG_64BIT
 	} else if (smmu->version == ARM_SMMU_V2) {
 		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
 			      ARM_SMMU_CB_S2_TLBIIPAS2;
 		iova >>= 12;
 		do {
-			writeq_relaxed(iova, reg);
+			smmu_write_atomic_lq(iova, reg);
 			iova += granule >> 12;
 		} while (size -= granule);
-#endif
 	} else {
 		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
-		writel_relaxed(ARM_SMMU_CB_VMID(cfg), reg);
+		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
 	}
 }
 
@@ -644,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = {
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
-	int flags, ret;
-	u32 fsr, far, fsynr, resume;
+	u32 fsr, fsynr;
 	unsigned long iova;
 	struct iommu_domain *domain = dev;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -659,40 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 	if (!(fsr & FSR_FAULT))
 		return IRQ_NONE;
 
-	if (fsr & FSR_IGN)
-		dev_err_ratelimited(smmu->dev,
-				    "Unexpected context fault (fsr 0x%x)\n",
-				    fsr);
-
 	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
-	flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
 
-	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_LO);
-	iova = far;
-#ifdef CONFIG_64BIT
-	far = readl_relaxed(cb_base + ARM_SMMU_CB_FAR_HI);
-	iova |= ((unsigned long)far << 32);
-#endif
-
-	if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
-		ret = IRQ_HANDLED;
-		resume = RESUME_RETRY;
-	} else {
-		dev_err_ratelimited(smmu->dev,
-		    "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-		    iova, fsynr, cfg->cbndx);
-		ret = IRQ_NONE;
-		resume = RESUME_TERMINATE;
-	}
+	dev_err_ratelimited(smmu->dev,
+	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+			    fsr, iova, fsynr, cfg->cbndx);
 
-	/* Clear the faulting FSR */
 	writel(fsr, cb_base + ARM_SMMU_CB_FSR);
-
-	/* Retry or terminate any stalled transactions */
-	if (fsr & FSR_SS)
-		writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
-
-	return ret;
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -734,22 +750,20 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 
 	if (smmu->version > ARM_SMMU_V1) {
-		/*
-		 * CBA2R.
-		 * *Must* be initialised before CBAR thanks to VMID16
-		 * architectural oversight affected some implementations.
-		 */
-#ifdef CONFIG_64BIT
-		reg = CBA2R_RW64_64BIT;
-#else
-		reg = CBA2R_RW64_32BIT;
-#endif
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+			reg = CBA2R_RW64_64BIT;
+		else
+			reg = CBA2R_RW64_32BIT;
+		/* 16-bit VMIDs live in CBA2R */
+		if (smmu->features & ARM_SMMU_FEAT_VMID16)
+			reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
+
 		writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
 	}
 
 	/* CBAR */
 	reg = cfg->cbar;
-	if (smmu->version == ARM_SMMU_V1)
+	if (smmu->version < ARM_SMMU_V2)
 		reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 
 	/*
@@ -759,8 +773,9 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	if (stage1) {
 		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
 			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
-	} else {
-		reg |= ARM_SMMU_CB_VMID(cfg) << CBAR_VMID_SHIFT;
+	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
+		/* 8-bit VMIDs live in CBAR */
+		reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
 	}
 	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
 
@@ -768,15 +783,15 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	if (stage1) {
 		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
 
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 
 		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
 	} else {
 		reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 	}
 
 	/* TTBCR */
@@ -802,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	}
 
 	/* SCTLR */
-	reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
 	if (stage1)
 		reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
@@ -855,16 +870,40 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
+	/*
+	 * Choosing a suitable context format is even more fiddly. Until we
+	 * grow some way for the caller to express a preference, and/or move
+	 * the decision into the io-pgtable code where it arguably belongs,
+	 * just aim for the closest thing to the rest of the system, and hope
+	 * that the hardware isn't esoteric enough that we can't assume AArch64
+	 * support to be a superset of AArch32 support...
+	 */
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
+		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
+	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
+			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
+			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
+		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
+
+	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
 		start = smmu->num_s2_context_banks;
 		ias = smmu->va_size;
 		oas = smmu->ipa_size;
-		if (IS_ENABLED(CONFIG_64BIT))
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 			fmt = ARM_64_LPAE_S1;
-		else
+		} else {
 			fmt = ARM_32_LPAE_S1;
+			ias = min(ias, 32UL);
+			oas = min(oas, 40UL);
+		}
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 		/*
@@ -876,10 +915,13 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		start = 0;
 		ias = smmu->ipa_size;
 		oas = smmu->pa_size;
-		if (IS_ENABLED(CONFIG_64BIT))
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 			fmt = ARM_64_LPAE_S2;
-		else
+		} else {
 			fmt = ARM_32_LPAE_S2;
+			ias = min(ias, 40UL);
+			oas = min(oas, 40UL);
+		}
 		break;
 	default:
 		ret = -EINVAL;
@@ -888,11 +930,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 
 	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
 				      smmu->num_context_banks);
-	if (IS_ERR_VALUE(ret))
+	if (ret < 0)
 		goto out_unlock;
 
 	cfg->cbndx = ret;
-	if (smmu->version == ARM_SMMU_V1) {
+	if (smmu->version < ARM_SMMU_V2) {
 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
 		cfg->irptndx %= smmu->num_context_irqs;
 	} else {
@@ -900,7 +942,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	}
 
 	pgtbl_cfg = (struct io_pgtable_cfg) {
-		.pgsize_bitmap	= arm_smmu_ops.pgsize_bitmap,
+		.pgsize_bitmap	= smmu->pgsize_bitmap,
 		.ias		= ias,
 		.oas		= oas,
 		.tlb		= &arm_smmu_gather_ops,
@@ -914,8 +956,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		goto out_clear_smmu;
 	}
 
-	/* Update our support page sizes to reflect the page table format */
-	arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	/* Update the domain's page sizes to reflect the page table format */
+	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
 
 	/* Initialise the context bank with our page table cfg */
 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
@@ -925,9 +967,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 	 * handler seeing a half-initialised domain state.
 	 */
 	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
-	ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
-			  "arm-smmu-context-fault", domain);
-	if (IS_ERR_VALUE(ret)) {
+	ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
+			       IRQF_SHARED, "arm-smmu-context-fault", domain);
+	if (ret < 0) {
 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
 			cfg->irptndx, irq);
 		cfg->irptndx = INVALID_IRPTNDX;
@@ -966,7 +1008,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 
 	if (cfg->irptndx != INVALID_IRPTNDX) {
 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
-		free_irq(irq, domain);
+		devm_free_irq(smmu->dev, irq, domain);
 	}
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
@@ -1037,7 +1079,7 @@ static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
 	for (i = 0; i < cfg->num_streamids; ++i) {
 		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
 						  smmu->num_mapping_groups);
-		if (IS_ERR_VALUE(idx)) {
+		if (idx < 0) {
 			dev_err(smmu->dev, "failed to allocate free SMR\n");
 			goto err_free_smrs;
 		}
@@ -1171,7 +1213,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
 	/* Ensure that the domain is finalised */
 	ret = arm_smmu_init_domain_context(domain, smmu);
-	if (IS_ERR_VALUE(ret))
+	if (ret < 0)
 		return ret;
 
 	/*
@@ -1252,8 +1294,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
 	if (smmu->version == ARM_SMMU_V2)
-		smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
-	else
+		smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
+	else /* Register is only 32-bit in v1 */
 		writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
 
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
@@ -1264,9 +1306,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 		return ops->iova_to_phys(ops, iova);
 	}
 
-	phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
-	phys |= ((u64)readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
-
+	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
 	if (phys & CB_PAR_F) {
 		dev_err(dev, "translation fault!\n");
 		dev_err(dev, "PAR = 0x%llx\n", phys);
@@ -1492,7 +1532,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	void __iomem *cb_base;
 	int i = 0;
-	u32 reg;
+	u32 reg, major;
 
 	/* clear global FSR */
 	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
@@ -1505,11 +1545,33 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
 	}
 
+	/*
+	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+	 * bit is only present in MMU-500r2 onwards.
+	 */
+	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
+	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
+		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
+		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
+	}
+
 	/* Make sure all context banks are disabled and clear CB_FSR  */
 	for (i = 0; i < smmu->num_context_banks; ++i) {
 		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
 		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
+		/*
+		 * Disable MMU-500's not-particularly-beneficial next-page
+		 * prefetcher for the sake of errata #841119 and #826419.
+		 */
+		if (smmu->model == ARM_MMU500) {
+			reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
+			reg &= ~ARM_MMU500_ACTLR_CPRE;
+			writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
+		}
 	}
 
 	/* Invalidate the TLB, just in case */
@@ -1537,6 +1599,9 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	/* Don't upgrade barriers */
 	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
 
+	if (smmu->features & ARM_SMMU_FEAT_VMID16)
+		reg |= sCR0_VMID16EN;
+
 	/* Push the button */
 	__arm_smmu_tlb_sync(smmu);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -1569,7 +1634,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	bool cttw_dt, cttw_reg;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
-	dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version);
+	dev_notice(smmu->dev, "SMMUv%d with:\n",
+			smmu->version == ARM_SMMU_V2 ? 2 : 1);
 
 	/* ID0 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
@@ -1601,7 +1667,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 		return -ENODEV;
 	}
 
-	if ((id & ID0_S1TS) && ((smmu->version == 1) || !(id & ID0_ATOSNS))) {
+	if ((id & ID0_S1TS) &&
+		((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
 		dev_notice(smmu->dev, "\taddress translation ops\n");
 	}
@@ -1657,6 +1724,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 					   ID0_NUMSIDB_MASK;
 	}
 
+	if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
+		if (!(id & ID0_PTFS_NO_AARCH32S))
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
+	}
+
 	/* ID1 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
 	smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
@@ -1677,6 +1750,17 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	}
 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
 		   smmu->num_context_banks, smmu->num_s2_context_banks);
+	/*
+	 * Cavium CN88xx erratum #27704.
+	 * Ensure ASID and VMID allocation is unique across all SMMUs in
+	 * the system.
+	 */
+	if (smmu->model == CAVIUM_SMMUV2) {
+		smmu->cavium_id_base =
+			atomic_add_return(smmu->num_context_banks,
+					  &cavium_smmu_context_count);
+		smmu->cavium_id_base -= smmu->num_context_banks;
+	}
 
 	/* ID2 */
 	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
@@ -1687,6 +1771,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
 	smmu->pa_size = size;
 
+	if (id & ID2_VMID16)
+		smmu->features |= ARM_SMMU_FEAT_VMID16;
+
 	/*
 	 * What the page table walker can address actually depends on which
 	 * descriptor format is in use, but since a) we don't know that yet,
@@ -1696,26 +1783,39 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 		dev_warn(smmu->dev,
 			 "failed to set DMA mask for table walker\n");
 
-	if (smmu->version == ARM_SMMU_V1) {
+	if (smmu->version < ARM_SMMU_V2) {
 		smmu->va_size = smmu->ipa_size;
-		size = SZ_4K | SZ_2M | SZ_1G;
+		if (smmu->version == ARM_SMMU_V1_64K)
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
 	} else {
 		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
 		smmu->va_size = arm_smmu_id_size_to_bits(size);
-#ifndef CONFIG_64BIT
-		smmu->va_size = min(32UL, smmu->va_size);
-#endif
-		size = 0;
 		if (id & ID2_PTFS_4K)
-			size |= SZ_4K | SZ_2M | SZ_1G;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
 		if (id & ID2_PTFS_16K)
-			size |= SZ_16K | SZ_32M;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
 		if (id & ID2_PTFS_64K)
-			size |= SZ_64K | SZ_512M;
+			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
 	}
 
-	arm_smmu_ops.pgsize_bitmap &= size;
-	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size);
+	/* Now we've corralled the various formats, what'll it do? */
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
+		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+	if (smmu->features &
+	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
+		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
+		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
+	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
+		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
+
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
+		   smmu->pgsize_bitmap);
+
 
 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
@@ -1728,12 +1828,27 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 	return 0;
 }
 
+struct arm_smmu_match_data {
+	enum arm_smmu_arch_version version;
+	enum arm_smmu_implementation model;
+};
+
+#define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
+static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+
+ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
+ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
+ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
+
 static const struct of_device_id arm_smmu_of_match[] = {
-	{ .compatible = "arm,smmu-v1", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,smmu-v2", .data = (void *)ARM_SMMU_V2 },
-	{ .compatible = "arm,mmu-400", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,mmu-401", .data = (void *)ARM_SMMU_V1 },
-	{ .compatible = "arm,mmu-500", .data = (void *)ARM_SMMU_V2 },
+	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
+	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
+	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
+	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
+	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
+	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
@@ -1741,11 +1856,13 @@ MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id;
+	const struct arm_smmu_match_data *data;
 	struct resource *res;
 	struct arm_smmu_device *smmu;
 	struct device *dev = &pdev->dev;
 	struct rb_node *node;
-	struct of_phandle_args masterspec;
+	struct of_phandle_iterator it;
+	struct arm_smmu_phandle_args *masterspec;
 	int num_irqs, i, err;
 
 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
@@ -1756,7 +1873,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	smmu->dev = dev;
 
 	of_id = of_match_node(arm_smmu_of_match, dev->of_node);
-	smmu->version = (enum arm_smmu_arch_version)of_id->data;
+	data = of_id->data;
+	smmu->version = data->version;
+	smmu->model = data->model;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	smmu->base = devm_ioremap_resource(dev, res);
@@ -1806,23 +1925,38 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 
 	i = 0;
 	smmu->masters = RB_ROOT;
-	while (!of_parse_phandle_with_args(dev->of_node, "mmu-masters",
-					   "#stream-id-cells", i,
-					   &masterspec)) {
-		err = register_smmu_master(smmu, dev, &masterspec);
+
+	err = -ENOMEM;
+	/* No need to zero the memory for masterspec */
+	masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
+	if (!masterspec)
+		goto out_put_masters;
+
+	of_for_each_phandle(&it, err, dev->of_node,
+			    "mmu-masters", "#stream-id-cells", 0) {
+		int count = of_phandle_iterator_args(&it, masterspec->args,
+						     MAX_MASTER_STREAMIDS);
+		masterspec->np		= of_node_get(it.node);
+		masterspec->args_count	= count;
+
+		err = register_smmu_master(smmu, dev, masterspec);
 		if (err) {
 			dev_err(dev, "failed to add master %s\n",
-				masterspec.np->name);
+				masterspec->np->name);
+			kfree(masterspec);
 			goto out_put_masters;
 		}
 
 		i++;
 	}
+
 	dev_notice(dev, "registered %d master devices\n", i);
 
+	kfree(masterspec);
+
 	parse_driver_options(smmu);
 
-	if (smmu->version > ARM_SMMU_V1 &&
+	if (smmu->version == ARM_SMMU_V2 &&
 	    smmu->num_context_banks != smmu->num_context_irqs) {
 		dev_err(dev,
 			"found only %d context interrupt(s) but %d required\n",
@@ -1832,15 +1966,15 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < smmu->num_global_irqs; ++i) {
-		err = request_irq(smmu->irqs[i],
-				  arm_smmu_global_fault,
-				  IRQF_SHARED,
-				  "arm-smmu global fault",
-				  smmu);
+		err = devm_request_irq(smmu->dev, smmu->irqs[i],
+				       arm_smmu_global_fault,
+				       IRQF_SHARED,
+				       "arm-smmu global fault",
+				       smmu);
 		if (err) {
 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
 				i, smmu->irqs[i]);
-			goto out_free_irqs;
+			goto out_put_masters;
 		}
 	}
 
@@ -1852,10 +1986,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	arm_smmu_device_reset(smmu);
 	return 0;
 
-out_free_irqs:
-	while (i--)
-		free_irq(smmu->irqs[i], smmu);
-
 out_put_masters:
 	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
 		struct arm_smmu_master *master
@@ -1896,7 +2026,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
 		dev_err(dev, "removing device with active domains!\n");
 
 	for (i = 0; i < smmu->num_global_irqs; ++i)
-		free_irq(smmu->irqs[i], smmu);
+		devm_free_irq(smmu->dev, smmu->irqs[i], smmu);
 
 	/* Turn the thing off */
 	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -1942,8 +2072,10 @@ static int __init arm_smmu_init(void)
 #endif
 
 #ifdef CONFIG_PCI
-	if (!iommu_present(&pci_bus_type))
+	if (!iommu_present(&pci_bus_type)) {
+		pci_request_acs();
 		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+	}
 #endif
 
 	return 0;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 58f2fe687a24..00c8a08d56e7 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
 	if (!iovad)
 		return;
 
-	put_iova_domain(iovad);
+	if (iovad->granule)
+		put_iova_domain(iovad);
 	kfree(iovad);
 	domain->iova_cookie = NULL;
 }
@@ -94,7 +95,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size
 		return -ENODEV;
 
 	/* Use the smallest supported page size for IOVA granularity */
-	order = __ffs(domain->ops->pgsize_bitmap);
+	order = __ffs(domain->pgsize_bitmap);
 	base_pfn = max_t(unsigned long, 1, base >> order);
 	end_pfn = (base + size - 1) >> order;
 
@@ -151,12 +152,15 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
 	}
 }
 
-static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 		dma_addr_t dma_limit)
 {
+	struct iova_domain *iovad = domain->iova_cookie;
 	unsigned long shift = iova_shift(iovad);
 	unsigned long length = iova_align(iovad, size) >> shift;
 
+	if (domain->geometry.force_aperture)
+		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 	/*
 	 * Enforce size-alignment to be safe - there could perhaps be an
 	 * attribute to control this per-device, or at least per-domain...
@@ -190,11 +194,15 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
 	kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count,
+		unsigned long order_mask, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
-	unsigned int order = MAX_ORDER;
+
+	order_mask &= (2U << MAX_ORDER) - 1;
+	if (!order_mask)
+		return NULL;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -208,36 +216,38 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 
 	while (count) {
 		struct page *page = NULL;
-		int j;
+		unsigned int order_size;
 
 		/*
 		 * Higher-order allocations are a convenience rather
 		 * than a necessity, hence using __GFP_NORETRY until
-		 * falling back to single-page allocations.
+		 * falling back to minimum-order allocations.
 		 */
-		for (order = min_t(unsigned int, order, __fls(count));
-		     order > 0; order--) {
-			page = alloc_pages(gfp | __GFP_NORETRY, order);
+		for (order_mask &= (2U << __fls(count)) - 1;
+		     order_mask; order_mask &= ~order_size) {
+			unsigned int order = __fls(order_mask);
+
+			order_size = 1U << order;
+			page = alloc_pages((order_mask - order_size) ?
+					   gfp | __GFP_NORETRY : gfp, order);
 			if (!page)
 				continue;
-			if (PageCompound(page)) {
-				if (!split_huge_page(page))
-					break;
-				__free_pages(page, order);
-			} else {
+			if (!order)
+				break;
+			if (!PageCompound(page)) {
 				split_page(page, order);
 				break;
+			} else if (!split_huge_page(page)) {
+				break;
 			}
+			__free_pages(page, order);
 		}
-		if (!page)
-			page = alloc_page(gfp);
 		if (!page) {
 			__iommu_dma_free_pages(pages, i);
 			return NULL;
 		}
-		j = 1 << order;
-		count -= j;
-		while (j--)
+		count -= order_size;
+		while (order_size--)
 			pages[i++] = page++;
 	}
 	return pages;
@@ -267,6 +277,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  *	 attached to an iommu_dma_domain
  * @size: Size of buffer in bytes
  * @gfp: Allocation flags
+ * @attrs: DMA attributes for this allocation
  * @prot: IOMMU mapping flags
  * @handle: Out argument for allocated DMA handle
  * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
@@ -278,8 +289,8 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  * Return: Array of struct page pointers describing the buffer,
  *	   or NULL on failure.
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		unsigned long attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -288,15 +299,26 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size,
 	struct page **pages;
 	struct sg_table sgt;
 	dma_addr_t dma_addr;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
 
-	pages = __iommu_dma_alloc_pages(count, gfp);
+	min_size = alloc_sizes & -alloc_sizes;
+	if (min_size < PAGE_SIZE) {
+		min_size = PAGE_SIZE;
+		alloc_sizes |= PAGE_SIZE;
+	} else {
+		size = ALIGN(size, min_size);
+	}
+	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
+		alloc_sizes = min_size;
+
+	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
 	if (!pages)
 		return NULL;
 
-	iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+	iova = __alloc_iova(domain, size, dev->coherent_dma_mask);
 	if (!iova)
 		goto out_free_pages;
 
@@ -368,7 +390,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 	phys_addr_t phys = page_to_phys(page) + offset;
 	size_t iova_off = iova_offset(iovad, phys);
 	size_t len = iova_align(iovad, size + iova_off);
-	struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
 
 	if (!iova)
 		return DMA_ERROR_CODE;
@@ -382,33 +404,65 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 }
 
 void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
-		enum dma_data_direction dir, struct dma_attrs *attrs)
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
 }
 
 /*
  * Prepare a successfully-mapped scatterlist to give back to the caller.
- * Handling IOVA concatenation can come later, if needed
+ *
+ * At this point the segments are already laid out by iommu_dma_map_sg() to
+ * avoid individually crossing any boundaries, so we merely need to check a
+ * segment's start address to avoid concatenating across one.
  */
 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		dma_addr_t dma_addr)
 {
-	struct scatterlist *s;
-	int i;
+	struct scatterlist *s, *cur = sg;
+	unsigned long seg_mask = dma_get_seg_boundary(dev);
+	unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
+	int i, count = 0;
 
 	for_each_sg(sg, s, nents, i) {
-		/* Un-swizzling the fields here, hence the naming mismatch */
-		unsigned int s_offset = sg_dma_address(s);
+		/* Restore this segment's original unaligned fields first */
+		unsigned int s_iova_off = sg_dma_address(s);
 		unsigned int s_length = sg_dma_len(s);
-		unsigned int s_dma_len = s->length;
+		unsigned int s_iova_len = s->length;
 
-		s->offset += s_offset;
+		s->offset += s_iova_off;
 		s->length = s_length;
-		sg_dma_address(s) = dma_addr + s_offset;
-		dma_addr += s_dma_len;
+		sg_dma_address(s) = DMA_ERROR_CODE;
+		sg_dma_len(s) = 0;
+
+		/*
+		 * Now fill in the real DMA data. If...
+		 * - there is a valid output segment to append to
+		 * - and this segment starts on an IOVA page boundary
+		 * - but doesn't fall at a segment boundary
+		 * - and wouldn't make the resulting output segment too long
+		 */
+		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
+		    (cur_len + s_length <= max_len)) {
+			/* ...then concatenate it with the previous one */
+			cur_len += s_length;
+		} else {
+			/* Otherwise start the next output segment */
+			if (i > 0)
+				cur = sg_next(cur);
+			cur_len = s_length;
+			count++;
+
+			sg_dma_address(cur) = dma_addr + s_iova_off;
+		}
+
+		sg_dma_len(cur) = cur_len;
+		dma_addr += s_iova_len;
+
+		if (s_length + s_iova_off < s_iova_len)
+			cur_len = 0;
 	}
-	return i;
+	return count;
 }
 
 /*
@@ -446,34 +500,40 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 	struct scatterlist *s, *prev = NULL;
 	dma_addr_t dma_addr;
 	size_t iova_len = 0;
+	unsigned long mask = dma_get_seg_boundary(dev);
 	int i;
 
 	/*
 	 * Work out how much IOVA space we need, and align the segments to
 	 * IOVA granules for the IOMMU driver to handle. With some clever
 	 * trickery we can modify the list in-place, but reversibly, by
-	 * hiding the original data in the as-yet-unused DMA fields.
+	 * stashing the unaligned parts in the as-yet-unused DMA fields.
 	 */
 	for_each_sg(sg, s, nents, i) {
-		size_t s_offset = iova_offset(iovad, s->offset);
+		size_t s_iova_off = iova_offset(iovad, s->offset);
 		size_t s_length = s->length;
+		size_t pad_len = (mask - iova_len + 1) & mask;
 
-		sg_dma_address(s) = s_offset;
+		sg_dma_address(s) = s_iova_off;
 		sg_dma_len(s) = s_length;
-		s->offset -= s_offset;
-		s_length = iova_align(iovad, s_length + s_offset);
+		s->offset -= s_iova_off;
+		s_length = iova_align(iovad, s_length + s_iova_off);
 		s->length = s_length;
 
 		/*
-		 * The simple way to avoid the rare case of a segment
-		 * crossing the boundary mask is to pad the previous one
-		 * to end at a naturally-aligned IOVA for this one's size,
-		 * at the cost of potentially over-allocating a little.
+		 * Due to the alignment of our single IOVA allocation, we can
+		 * depend on these assumptions about the segment boundary mask:
+		 * - If mask size >= IOVA size, then the IOVA range cannot
+		 *   possibly fall across a boundary, so we don't care.
+		 * - If mask size < IOVA size, then the IOVA range must start
+		 *   exactly on a boundary, therefore we can lay things out
+		 *   based purely on segment lengths without needing to know
+		 *   the actual addresses beforehand.
+		 * - The mask must be a power of 2, so pad_len == 0 if
+		 *   iova_len == 0, thus we cannot dereference prev the first
+		 *   time through here (i.e. before it has a meaningful value).
 		 */
-		if (prev) {
-			size_t pad_len = roundup_pow_of_two(s_length);
-
-			pad_len = (pad_len - iova_len) & (pad_len - 1);
+		if (pad_len && pad_len < s_length - 1) {
 			prev->length += pad_len;
 			iova_len += pad_len;
 		}
@@ -482,7 +542,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 		prev = s;
 	}
 
-	iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev));
 	if (!iova)
 		goto out_restore_sg;
 
@@ -504,7 +564,7 @@ out_restore_sg:
 }
 
 void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction dir, struct dma_attrs *attrs)
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	/*
 	 * The scatterlist segments are mapped into a single
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8ffd7568fc91..58470f5ced04 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -241,8 +241,20 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
 		if (!dmar_match_pci_path(info, scope->bus, path, level))
 			continue;
 
-		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
-		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
+		/*
+		 * We expect devices with endpoint scope to have normal PCI
+		 * headers, and devices with bridge scope to have bridge PCI
+		 * headers.  However PCI NTB devices may be listed in the
+		 * DMAR table with bridge scope, even though they have a
+		 * normal PCI header.  NTB devices are identified by class
+		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
+		 * for this special case.
+		 */
+		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
+		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
+		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
+		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
+		      info->dev->class >> 8 != PCI_CLASS_BRIDGE_OTHER))) {
 			pr_warn("Device scope type does not match for %s\n",
 				pci_name(info->dev));
 			return -EINVAL;
@@ -1155,8 +1167,6 @@ static int qi_check_fault(struct intel_iommu *iommu, int index)
 				(unsigned long long)qi->desc[index].high);
 			memcpy(&qi->desc[index], &qi->desc[wait_index],
 					sizeof(struct qi_desc));
-			__iommu_flush_cache(iommu, &qi->desc[index],
-					sizeof(struct qi_desc));
 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
 			return -EINVAL;
 		}
@@ -1231,9 +1241,6 @@ restart:
 
 	hw[wait_index] = wait_desc;
 
-	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
-	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
-
 	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 	qi->free_cnt -= 2;
 
@@ -1579,18 +1586,14 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
 	if (fault_type == INTR_REMAP)
-		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
-		       "fault index %llx\n"
-			"INTR-REMAP:[fault reason %02d] %s\n",
-			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
+			source_id >> 8, PCI_SLOT(source_id & 0xFF),
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
 	else
-		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
-		       "fault addr %llx \n"
-		       "DMAR:[fault reason %02d] %s\n",
-		       (type ? "DMA Read" : "DMA Write"),
-		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
+		       type ? "DMA Read" : "DMA Write",
+		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
 		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
 	return 0;
 }
@@ -1602,10 +1605,17 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 	int reg, fault_index;
 	u32 fault_status;
 	unsigned long flag;
+	bool ratelimited;
+	static DEFINE_RATELIMIT_STATE(rs,
+				      DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+
+	/* Disable printing, simply clear the fault when ratelimited */
+	ratelimited = !__ratelimit(&rs);
 
 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	if (fault_status)
+	if (fault_status && !ratelimited)
 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
 
 	/* TBD: ignore advanced fault log currently */
@@ -1627,24 +1637,28 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		if (!(data & DMA_FRCD_F))
 			break;
 
-		fault_reason = dma_frcd_fault_reason(data);
-		type = dma_frcd_type(data);
+		if (!ratelimited) {
+			fault_reason = dma_frcd_fault_reason(data);
+			type = dma_frcd_type(data);
 
-		data = readl(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN + 8);
-		source_id = dma_frcd_source_id(data);
+			data = readl(iommu->reg + reg +
+				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
+			source_id = dma_frcd_source_id(data);
+
+			guest_addr = dmar_readq(iommu->reg + reg +
+					fault_index * PRIMARY_FAULT_REG_LEN);
+			guest_addr = dma_frcd_page_addr(guest_addr);
+		}
 
-		guest_addr = dmar_readq(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN);
-		guest_addr = dma_frcd_page_addr(guest_addr);
 		/* clear the fault */
 		writel(DMA_FRCD_F, iommu->reg + reg +
 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
 
 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
-		dmar_fault_do_one(iommu, type, fault_reason,
-				source_id, guest_addr);
+		if (!ratelimited)
+			dmar_fault_do_one(iommu, type, fault_reason,
+					  source_id, guest_addr);
 
 		fault_index++;
 		if (fault_index >= cap_num_fault_regs(iommu->cap))
@@ -1864,10 +1878,11 @@ static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
 	/*
 	 * All PCI devices managed by this unit should have been destroyed.
 	 */
-	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
 		for_each_active_dev_scope(dmaru->devices,
 					  dmaru->devices_cnt, i, dev)
 			return -EBUSY;
+	}
 
 	ret = dmar_ir_hotplug(dmaru, false);
 	if (ret == 0)
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 5ecc86cb74c8..33dcc29ec200 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -54,6 +54,10 @@ typedef u32 sysmmu_pte_t;
 #define lv2ent_small(pent) ((*(pent) & 2) == 2)
 #define lv2ent_large(pent) ((*(pent) & 3) == 1)
 
+#ifdef CONFIG_BIG_ENDIAN
+#warning "revisit driver if we can enable big-endian ptes"
+#endif
+
 /*
  * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces
  * v5.0 introduced support for 36bit physical address space by shifting
@@ -322,14 +326,27 @@ static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd)
 	__sysmmu_tlb_invalidate(data);
 }
 
+static void __sysmmu_enable_clocks(struct sysmmu_drvdata *data)
+{
+	BUG_ON(clk_prepare_enable(data->clk_master));
+	BUG_ON(clk_prepare_enable(data->clk));
+	BUG_ON(clk_prepare_enable(data->pclk));
+	BUG_ON(clk_prepare_enable(data->aclk));
+}
+
+static void __sysmmu_disable_clocks(struct sysmmu_drvdata *data)
+{
+	clk_disable_unprepare(data->aclk);
+	clk_disable_unprepare(data->pclk);
+	clk_disable_unprepare(data->clk);
+	clk_disable_unprepare(data->clk_master);
+}
+
 static void __sysmmu_get_version(struct sysmmu_drvdata *data)
 {
 	u32 ver;
 
-	clk_enable(data->clk_master);
-	clk_enable(data->clk);
-	clk_enable(data->pclk);
-	clk_enable(data->aclk);
+	__sysmmu_enable_clocks(data);
 
 	ver = readl(data->sfrbase + REG_MMU_VERSION);
 
@@ -342,10 +359,7 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data)
 	dev_dbg(data->sysmmu, "hardware version: %d.%d\n",
 		MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version));
 
-	clk_disable(data->aclk);
-	clk_disable(data->pclk);
-	clk_disable(data->clk);
-	clk_disable(data->clk_master);
+	__sysmmu_disable_clocks(data);
 }
 
 static void show_fault_information(struct sysmmu_drvdata *data,
@@ -427,10 +441,7 @@ static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
 	writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
 	writel(0, data->sfrbase + REG_MMU_CFG);
 
-	clk_disable(data->aclk);
-	clk_disable(data->pclk);
-	clk_disable(data->clk);
-	clk_disable(data->clk_master);
+	__sysmmu_disable_clocks(data);
 }
 
 static bool __sysmmu_disable(struct sysmmu_drvdata *data)
@@ -475,10 +486,7 @@ static void __sysmmu_init_config(struct sysmmu_drvdata *data)
 
 static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
 {
-	clk_enable(data->clk_master);
-	clk_enable(data->clk);
-	clk_enable(data->pclk);
-	clk_enable(data->aclk);
+	__sysmmu_enable_clocks(data);
 
 	writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
 
@@ -488,6 +496,12 @@ static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
 
 	writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
 
+	/*
+	 * SYSMMU driver keeps master's clock enabled only for the short
+	 * time, while accessing the registers. For performing address
+	 * translation during DMA transaction it relies on the client
+	 * driver to enable it.
+	 */
 	clk_disable(data->clk_master);
 }
 
@@ -524,16 +538,15 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 {
 	unsigned long flags;
 
-	clk_enable(data->clk_master);
 
 	spin_lock_irqsave(&data->lock, flags);
-	if (is_sysmmu_active(data)) {
-		if (data->version >= MAKE_MMU_VER(3, 3))
-			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+	if (is_sysmmu_active(data) && data->version >= MAKE_MMU_VER(3, 3)) {
+		clk_enable(data->clk_master);
+		__sysmmu_tlb_invalidate_entry(data, iova, 1);
+		clk_disable(data->clk_master);
 	}
 	spin_unlock_irqrestore(&data->lock, flags);
 
-	clk_disable(data->clk_master);
 }
 
 static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
@@ -572,6 +585,8 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 	spin_unlock_irqrestore(&data->lock, flags);
 }
 
+static struct iommu_ops exynos_iommu_ops;
+
 static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 {
 	int irq, ret;
@@ -602,37 +617,22 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	}
 
 	data->clk = devm_clk_get(dev, "sysmmu");
-	if (!IS_ERR(data->clk)) {
-		ret = clk_prepare(data->clk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare clk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->clk) == -ENOENT)
 		data->clk = NULL;
-	}
+	else if (IS_ERR(data->clk))
+		return PTR_ERR(data->clk);
 
 	data->aclk = devm_clk_get(dev, "aclk");
-	if (!IS_ERR(data->aclk)) {
-		ret = clk_prepare(data->aclk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare aclk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->aclk) == -ENOENT)
 		data->aclk = NULL;
-	}
+	else if (IS_ERR(data->aclk))
+		return PTR_ERR(data->aclk);
 
 	data->pclk = devm_clk_get(dev, "pclk");
-	if (!IS_ERR(data->pclk)) {
-		ret = clk_prepare(data->pclk);
-		if (ret) {
-			dev_err(dev, "Failed to prepare pclk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->pclk) == -ENOENT)
 		data->pclk = NULL;
-	}
+	else if (IS_ERR(data->pclk))
+		return PTR_ERR(data->pclk);
 
 	if (!data->clk && (!data->aclk || !data->pclk)) {
 		dev_err(dev, "Failed to get device clock(s)!\n");
@@ -640,15 +640,10 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	}
 
 	data->clk_master = devm_clk_get(dev, "master");
-	if (!IS_ERR(data->clk_master)) {
-		ret = clk_prepare(data->clk_master);
-		if (ret) {
-			dev_err(dev, "Failed to prepare master's clk\n");
-			return ret;
-		}
-	} else {
+	if (PTR_ERR(data->clk_master) == -ENOENT)
 		data->clk_master = NULL;
-	}
+	else if (IS_ERR(data->clk_master))
+		return PTR_ERR(data->clk_master);
 
 	data->sysmmu = dev;
 	spin_lock_init(&data->lock);
@@ -665,6 +660,8 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
+	of_iommu_set_ops(dev->of_node, &exynos_iommu_ops);
+
 	return 0;
 }
 
@@ -709,6 +706,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
 		.name		= "exynos-sysmmu",
 		.of_match_table	= sysmmu_of_match,
 		.pm		= &sysmmu_pm_ops,
+		.suppress_bind_attrs = true,
 	}
 };
 
@@ -716,7 +714,7 @@ static inline void update_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
 {
 	dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent), sizeof(*ent),
 				DMA_TO_DEVICE);
-	*ent = val;
+	*ent = cpu_to_le32(val);
 	dma_sync_single_for_device(dma_dev, virt_to_phys(ent), sizeof(*ent),
 				   DMA_TO_DEVICE);
 }
@@ -1357,7 +1355,6 @@ static int __init exynos_iommu_of_setup(struct device_node *np)
 	if (!dma_dev)
 		dma_dev = &pdev->dev;
 
-	of_iommu_set_ops(np, &exynos_iommu_ops);
 	return 0;
 }
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index e1852e845d21..ebb5bf3ddbd9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -33,6 +33,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/memory.h>
+#include <linux/cpu.h>
 #include <linux/timer.h>
 #include <linux/io.h>
 #include <linux/iova.h>
@@ -390,6 +391,7 @@ struct dmar_domain {
 					 * domain ids are 16 bit wide according
 					 * to VT-d spec, section 9.3 */
 
+	bool has_iotlb_device;
 	struct list_head devices;	/* all devices' list */
 	struct iova_domain iovad;	/* iova's that belong to this domain */
 
@@ -456,27 +458,32 @@ static LIST_HEAD(dmar_rmrr_units);
 
 static void flush_unmaps_timeout(unsigned long data);
 
-static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
+struct deferred_flush_entry {
+	unsigned long iova_pfn;
+	unsigned long nrpages;
+	struct dmar_domain *domain;
+	struct page *freelist;
+};
 
 #define HIGH_WATER_MARK 250
-struct deferred_flush_tables {
+struct deferred_flush_table {
 	int next;
-	struct iova *iova[HIGH_WATER_MARK];
-	struct dmar_domain *domain[HIGH_WATER_MARK];
-	struct page *freelist[HIGH_WATER_MARK];
+	struct deferred_flush_entry entries[HIGH_WATER_MARK];
 };
 
-static struct deferred_flush_tables *deferred_flush;
+struct deferred_flush_data {
+	spinlock_t lock;
+	int timer_on;
+	struct timer_list timer;
+	long size;
+	struct deferred_flush_table *tables;
+};
+
+DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
 
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
 
-static DEFINE_SPINLOCK(async_umap_flush_lock);
-static LIST_HEAD(unmaps_to_do);
-
-static int timer_on;
-static long list_size;
-
 static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
@@ -1143,7 +1150,7 @@ next:
 	} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 }
 
-/* free page table pages. last level pte should already be cleared */
+/* clear last level (leaf) ptes and free page table pages. */
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
 				   unsigned long start_pfn,
 				   unsigned long last_pfn)
@@ -1458,10 +1465,35 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
 	return NULL;
 }
 
+static void domain_update_iotlb(struct dmar_domain *domain)
+{
+	struct device_domain_info *info;
+	bool has_iotlb_device = false;
+
+	assert_spin_locked(&device_domain_lock);
+
+	list_for_each_entry(info, &domain->devices, link) {
+		struct pci_dev *pdev;
+
+		if (!info->dev || !dev_is_pci(info->dev))
+			continue;
+
+		pdev = to_pci_dev(info->dev);
+		if (pdev->ats_enabled) {
+			has_iotlb_device = true;
+			break;
+		}
+	}
+
+	domain->has_iotlb_device = has_iotlb_device;
+}
+
 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 {
 	struct pci_dev *pdev;
 
+	assert_spin_locked(&device_domain_lock);
+
 	if (!info || !dev_is_pci(info->dev))
 		return;
 
@@ -1481,6 +1513,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 #endif
 	if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
 		info->ats_enabled = 1;
+		domain_update_iotlb(info->domain);
 		info->ats_qdep = pci_ats_queue_depth(pdev);
 	}
 }
@@ -1489,6 +1522,8 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 {
 	struct pci_dev *pdev;
 
+	assert_spin_locked(&device_domain_lock);
+
 	if (!dev_is_pci(info->dev))
 		return;
 
@@ -1497,6 +1532,7 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
 	if (info->ats_enabled) {
 		pci_disable_ats(pdev);
 		info->ats_enabled = 0;
+		domain_update_iotlb(info->domain);
 	}
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	if (info->pri_enabled) {
@@ -1517,6 +1553,9 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 	unsigned long flags;
 	struct device_domain_info *info;
 
+	if (!domain->has_iotlb_device)
+		return;
+
 	spin_lock_irqsave(&device_domain_lock, flags);
 	list_for_each_entry(info, &domain->devices, link) {
 		if (!info->ats_enabled)
@@ -1633,7 +1672,7 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 		return -ENOMEM;
 	}
 
-	size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
+	size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
 	iommu->domains = kzalloc(size, GFP_KERNEL);
 
 	if (iommu->domains) {
@@ -1698,7 +1737,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 static void free_dmar_iommu(struct intel_iommu *iommu)
 {
 	if ((iommu->domains) && (iommu->domain_ids)) {
-		int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
+		int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
 		int i;
 
 		for (i = 0; i < elems; i++)
@@ -1734,6 +1773,7 @@ static struct dmar_domain *alloc_domain(int flags)
 	memset(domain, 0, sizeof(*domain));
 	domain->nid = -1;
 	domain->flags = flags;
+	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
 
 	return domain;
@@ -1918,8 +1958,12 @@ static void domain_exit(struct dmar_domain *domain)
 		return;
 
 	/* Flush any lazy unmaps that may reference this domain */
-	if (!intel_iommu_strict)
-		flush_unmaps_timeout(0);
+	if (!intel_iommu_strict) {
+		int cpu;
+
+		for_each_possible_cpu(cpu)
+			flush_unmaps_timeout(cpu);
+	}
 
 	/* Remove associated devices and clear attached or cached domains */
 	rcu_read_lock();
@@ -2032,7 +2076,7 @@ out_unlock:
 	spin_unlock(&iommu->lock);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 struct domain_context_mapping_data {
@@ -3077,7 +3121,7 @@ static int __init init_dmars(void)
 	bool copied_tables = false;
 	struct device *dev;
 	struct intel_iommu *iommu;
-	int i, ret;
+	int i, ret, cpu;
 
 	/*
 	 * for each drhd
@@ -3110,11 +3154,20 @@ static int __init init_dmars(void)
 		goto error;
 	}
 
-	deferred_flush = kzalloc(g_num_of_iommus *
-		sizeof(struct deferred_flush_tables), GFP_KERNEL);
-	if (!deferred_flush) {
-		ret = -ENOMEM;
-		goto free_g_iommus;
+	for_each_possible_cpu(cpu) {
+		struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
+							      cpu);
+
+		dfd->tables = kzalloc(g_num_of_iommus *
+				      sizeof(struct deferred_flush_table),
+				      GFP_KERNEL);
+		if (!dfd->tables) {
+			ret = -ENOMEM;
+			goto free_g_iommus;
+		}
+
+		spin_lock_init(&dfd->lock);
+		setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
 	}
 
 	for_each_active_iommu(iommu, drhd) {
@@ -3169,11 +3222,6 @@ static int __init init_dmars(void)
 			}
 		}
 
-		iommu_flush_write_buffer(iommu);
-		iommu_set_root_entry(iommu);
-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3182,6 +3230,18 @@ static int __init init_dmars(void)
 #endif
 	}
 
+	/*
+	 * Now that qi is enabled on all iommus, set the root entry and flush
+	 * caches. This is required on some Intel X58 chipsets, otherwise the
+	 * flush_context function will loop forever and the boot hangs.
+	 */
+	for_each_active_iommu(iommu, drhd) {
+		iommu_flush_write_buffer(iommu);
+		iommu_set_root_entry(iommu);
+		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	}
+
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
 
@@ -3291,19 +3351,20 @@ free_iommu:
 		disable_dmar_iommu(iommu);
 		free_dmar_iommu(iommu);
 	}
-	kfree(deferred_flush);
 free_g_iommus:
+	for_each_possible_cpu(cpu)
+		kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
 	kfree(g_iommus);
 error:
 	return ret;
 }
 
 /* This takes a number of _MM_ pages, not VTD pages */
-static struct iova *intel_alloc_iova(struct device *dev,
+static unsigned long intel_alloc_iova(struct device *dev,
 				     struct dmar_domain *domain,
 				     unsigned long nrpages, uint64_t dma_mask)
 {
-	struct iova *iova = NULL;
+	unsigned long iova_pfn = 0;
 
 	/* Restrict dma_mask to the width that the iommu can handle */
 	dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
@@ -3316,19 +3377,19 @@ static struct iova *intel_alloc_iova(struct device *dev,
 		 * DMA_BIT_MASK(32) and if that fails then try allocating
 		 * from higher range
 		 */
-		iova = alloc_iova(&domain->iovad, nrpages,
-				  IOVA_PFN(DMA_BIT_MASK(32)), 1);
-		if (iova)
-			return iova;
+		iova_pfn = alloc_iova_fast(&domain->iovad, nrpages,
+					   IOVA_PFN(DMA_BIT_MASK(32)));
+		if (iova_pfn)
+			return iova_pfn;
 	}
-	iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
-	if (unlikely(!iova)) {
+	iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask));
+	if (unlikely(!iova_pfn)) {
 		pr_err("Allocating %ld-page iova for %s failed",
 		       nrpages, dev_name(dev));
-		return NULL;
+		return 0;
 	}
 
-	return iova;
+	return iova_pfn;
 }
 
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
@@ -3426,7 +3487,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 {
 	struct dmar_domain *domain;
 	phys_addr_t start_paddr;
-	struct iova *iova;
+	unsigned long iova_pfn;
 	int prot = 0;
 	int ret;
 	struct intel_iommu *iommu;
@@ -3444,8 +3505,8 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 	iommu = domain_get_iommu(domain);
 	size = aligned_nrpages(paddr, size);
 
-	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
-	if (!iova)
+	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
+	if (!iova_pfn)
 		goto error;
 
 	/*
@@ -3463,7 +3524,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 	 * might have two guest_addr mapping to the same host paddr, but this
 	 * is not a big problem
 	 */
-	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
+	ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
 				 mm_to_dma_pfn(paddr_pfn), size, prot);
 	if (ret)
 		goto error;
@@ -3471,18 +3532,18 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 	/* it's a non-present to present mapping. Only flush if caching mode */
 	if (cap_caching_mode(iommu->cap))
 		iommu_flush_iotlb_psi(iommu, domain,
-				      mm_to_dma_pfn(iova->pfn_lo),
+				      mm_to_dma_pfn(iova_pfn),
 				      size, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
 
-	start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
+	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
 	start_paddr += paddr & ~PAGE_MASK;
 	return start_paddr;
 
 error:
-	if (iova)
-		__free_iova(&domain->iovad, iova);
+	if (iova_pfn)
+		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
 	pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
 		dev_name(dev), size, (unsigned long long)paddr, dir);
 	return 0;
@@ -3491,97 +3552,126 @@ error:
 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
 				 unsigned long offset, size_t size,
 				 enum dma_data_direction dir,
-				 struct dma_attrs *attrs)
+				 unsigned long attrs)
 {
 	return __intel_map_single(dev, page_to_phys(page) + offset, size,
 				  dir, *dev->dma_mask);
 }
 
-static void flush_unmaps(void)
+static void flush_unmaps(struct deferred_flush_data *flush_data)
 {
 	int i, j;
 
-	timer_on = 0;
+	flush_data->timer_on = 0;
 
 	/* just flush them all */
 	for (i = 0; i < g_num_of_iommus; i++) {
 		struct intel_iommu *iommu = g_iommus[i];
+		struct deferred_flush_table *flush_table =
+				&flush_data->tables[i];
 		if (!iommu)
 			continue;
 
-		if (!deferred_flush[i].next)
+		if (!flush_table->next)
 			continue;
 
 		/* In caching mode, global flushes turn emulation expensive */
 		if (!cap_caching_mode(iommu->cap))
 			iommu->flush.flush_iotlb(iommu, 0, 0, 0,
 					 DMA_TLB_GLOBAL_FLUSH);
-		for (j = 0; j < deferred_flush[i].next; j++) {
+		for (j = 0; j < flush_table->next; j++) {
 			unsigned long mask;
-			struct iova *iova = deferred_flush[i].iova[j];
-			struct dmar_domain *domain = deferred_flush[i].domain[j];
+			struct deferred_flush_entry *entry =
+						&flush_table->entries[j];
+			unsigned long iova_pfn = entry->iova_pfn;
+			unsigned long nrpages = entry->nrpages;
+			struct dmar_domain *domain = entry->domain;
+			struct page *freelist = entry->freelist;
 
 			/* On real hardware multiple invalidations are expensive */
 			if (cap_caching_mode(iommu->cap))
 				iommu_flush_iotlb_psi(iommu, domain,
-					iova->pfn_lo, iova_size(iova),
-					!deferred_flush[i].freelist[j], 0);
+					mm_to_dma_pfn(iova_pfn),
+					nrpages, !freelist, 0);
 			else {
-				mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
-				iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
-						(uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
+				mask = ilog2(nrpages);
+				iommu_flush_dev_iotlb(domain,
+						(uint64_t)iova_pfn << PAGE_SHIFT, mask);
 			}
-			__free_iova(&deferred_flush[i].domain[j]->iovad, iova);
-			if (deferred_flush[i].freelist[j])
-				dma_free_pagelist(deferred_flush[i].freelist[j]);
+			free_iova_fast(&domain->iovad, iova_pfn, nrpages);
+			if (freelist)
+				dma_free_pagelist(freelist);
 		}
-		deferred_flush[i].next = 0;
+		flush_table->next = 0;
 	}
 
-	list_size = 0;
+	flush_data->size = 0;
 }
 
-static void flush_unmaps_timeout(unsigned long data)
+static void flush_unmaps_timeout(unsigned long cpuid)
 {
+	struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
 	unsigned long flags;
 
-	spin_lock_irqsave(&async_umap_flush_lock, flags);
-	flush_unmaps();
-	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+	spin_lock_irqsave(&flush_data->lock, flags);
+	flush_unmaps(flush_data);
+	spin_unlock_irqrestore(&flush_data->lock, flags);
 }
 
-static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
+static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
+		      unsigned long nrpages, struct page *freelist)
 {
 	unsigned long flags;
-	int next, iommu_id;
+	int entry_id, iommu_id;
 	struct intel_iommu *iommu;
+	struct deferred_flush_entry *entry;
+	struct deferred_flush_data *flush_data;
+	unsigned int cpuid;
+
+	cpuid = get_cpu();
+	flush_data = per_cpu_ptr(&deferred_flush, cpuid);
+
+	/* Flush all CPUs' entries to avoid deferring too much.  If
+	 * this becomes a bottleneck, can just flush us, and rely on
+	 * flush timer for the rest.
+	 */
+	if (flush_data->size == HIGH_WATER_MARK) {
+		int cpu;
+
+		for_each_online_cpu(cpu)
+			flush_unmaps_timeout(cpu);
+	}
 
-	spin_lock_irqsave(&async_umap_flush_lock, flags);
-	if (list_size == HIGH_WATER_MARK)
-		flush_unmaps();
+	spin_lock_irqsave(&flush_data->lock, flags);
 
 	iommu = domain_get_iommu(dom);
 	iommu_id = iommu->seq_id;
 
-	next = deferred_flush[iommu_id].next;
-	deferred_flush[iommu_id].domain[next] = dom;
-	deferred_flush[iommu_id].iova[next] = iova;
-	deferred_flush[iommu_id].freelist[next] = freelist;
-	deferred_flush[iommu_id].next++;
+	entry_id = flush_data->tables[iommu_id].next;
+	++(flush_data->tables[iommu_id].next);
 
-	if (!timer_on) {
-		mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
-		timer_on = 1;
+	entry = &flush_data->tables[iommu_id].entries[entry_id];
+	entry->domain = dom;
+	entry->iova_pfn = iova_pfn;
+	entry->nrpages = nrpages;
+	entry->freelist = freelist;
+
+	if (!flush_data->timer_on) {
+		mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
+		flush_data->timer_on = 1;
 	}
-	list_size++;
-	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+	flush_data->size++;
+	spin_unlock_irqrestore(&flush_data->lock, flags);
+
+	put_cpu();
 }
 
-static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
+static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 {
 	struct dmar_domain *domain;
 	unsigned long start_pfn, last_pfn;
-	struct iova *iova;
+	unsigned long nrpages;
+	unsigned long iova_pfn;
 	struct intel_iommu *iommu;
 	struct page *freelist;
 
@@ -3593,13 +3683,11 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
 
 	iommu = domain_get_iommu(domain);
 
-	iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
-	if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
-		      (unsigned long long)dev_addr))
-		return;
+	iova_pfn = IOVA_PFN(dev_addr);
 
-	start_pfn = mm_to_dma_pfn(iova->pfn_lo);
-	last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
+	nrpages = aligned_nrpages(dev_addr, size);
+	start_pfn = mm_to_dma_pfn(iova_pfn);
+	last_pfn = start_pfn + nrpages - 1;
 
 	pr_debug("Device %s unmapping: pfn %lx-%lx\n",
 		 dev_name(dev), start_pfn, last_pfn);
@@ -3608,12 +3696,12 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
 
 	if (intel_iommu_strict) {
 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
-				      last_pfn - start_pfn + 1, !freelist, 0);
+				      nrpages, !freelist, 0);
 		/* free iova */
-		__free_iova(&domain->iovad, iova);
+		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
 		dma_free_pagelist(freelist);
 	} else {
-		add_unmap(domain, iova, freelist);
+		add_unmap(domain, iova_pfn, nrpages, freelist);
 		/*
 		 * queue up the release of the unmap to save the 1/6th of the
 		 * cpu used up by the iotlb flush operation...
@@ -3623,14 +3711,14 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
 
 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 			     size_t size, enum dma_data_direction dir,
-			     struct dma_attrs *attrs)
+			     unsigned long attrs)
 {
-	intel_unmap(dev, dev_addr);
+	intel_unmap(dev, dev_addr, size);
 }
 
 static void *intel_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
-				  struct dma_attrs *attrs)
+				  unsigned long attrs)
 {
 	struct page *page = NULL;
 	int order;
@@ -3676,7 +3764,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 }
 
 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
-				dma_addr_t dma_handle, struct dma_attrs *attrs)
+				dma_addr_t dma_handle, unsigned long attrs)
 {
 	int order;
 	struct page *page = virt_to_page(vaddr);
@@ -3684,16 +3772,25 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
 
-	intel_unmap(dev, dma_handle);
+	intel_unmap(dev, dma_handle, size);
 	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
 		__free_pages(page, order);
 }
 
 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			   int nelems, enum dma_data_direction dir,
-			   struct dma_attrs *attrs)
+			   unsigned long attrs)
 {
-	intel_unmap(dev, sglist[0].dma_address);
+	dma_addr_t startaddr = sg_dma_address(sglist) & PAGE_MASK;
+	unsigned long nrpages = 0;
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sglist, sg, nelems, i) {
+		nrpages += aligned_nrpages(sg_dma_address(sg), sg_dma_len(sg));
+	}
+
+	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
 }
 
 static int intel_nontranslate_map_sg(struct device *hddev,
@@ -3711,13 +3808,13 @@ static int intel_nontranslate_map_sg(struct device *hddev,
 }
 
 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
-			enum dma_data_direction dir, struct dma_attrs *attrs)
+			enum dma_data_direction dir, unsigned long attrs)
 {
 	int i;
 	struct dmar_domain *domain;
 	size_t size = 0;
 	int prot = 0;
-	struct iova *iova = NULL;
+	unsigned long iova_pfn;
 	int ret;
 	struct scatterlist *sg;
 	unsigned long start_vpfn;
@@ -3736,9 +3833,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 	for_each_sg(sglist, sg, nelems, i)
 		size += aligned_nrpages(sg->offset, sg->length);
 
-	iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
+	iova_pfn = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
 				*dev->dma_mask);
-	if (!iova) {
+	if (!iova_pfn) {
 		sglist->dma_length = 0;
 		return 0;
 	}
@@ -3753,13 +3850,13 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 	if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
 		prot |= DMA_PTE_WRITE;
 
-	start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
+	start_vpfn = mm_to_dma_pfn(iova_pfn);
 
 	ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
 	if (unlikely(ret)) {
 		dma_pte_free_pagetable(domain, start_vpfn,
 				       start_vpfn + size - 1);
-		__free_iova(&domain->iovad, iova);
+		free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
 		return 0;
 	}
 
@@ -4175,10 +4272,11 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 	if (!atsru)
 		return 0;
 
-	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
 					  i, dev)
 			return -EBUSY;
+	}
 
 	return 0;
 }
@@ -4498,6 +4596,46 @@ static struct notifier_block intel_iommu_memory_nb = {
 	.priority = 0
 };
 
+static void free_all_cpu_cached_iovas(unsigned int cpu)
+{
+	int i;
+
+	for (i = 0; i < g_num_of_iommus; i++) {
+		struct intel_iommu *iommu = g_iommus[i];
+		struct dmar_domain *domain;
+		int did;
+
+		if (!iommu)
+			continue;
+
+		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
+			domain = get_iommu_domain(iommu, (u16)did);
+
+			if (!domain)
+				continue;
+			free_cpu_cached_iovas(cpu, &domain->iovad);
+		}
+	}
+}
+
+static int intel_iommu_cpu_notifier(struct notifier_block *nfb,
+				    unsigned long action, void *v)
+{
+	unsigned int cpu = (unsigned long)v;
+
+	switch (action) {
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+		free_all_cpu_cached_iovas(cpu);
+		flush_unmaps_timeout(cpu);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block intel_iommu_cpu_nb = {
+	.notifier_call = intel_iommu_cpu_notifier,
+};
 
 static ssize_t intel_iommu_show_version(struct device *dev,
 					struct device_attribute *attr,
@@ -4631,7 +4769,6 @@ int __init intel_iommu_init(void)
 	up_write(&dmar_global_lock);
 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
 
-	init_timer(&unmap_timer);
 #ifdef CONFIG_SWIOTLB
 	swiotlb = 0;
 #endif
@@ -4648,6 +4785,7 @@ int __init intel_iommu_init(void)
 	bus_register_notifier(&pci_bus_type, &device_nb);
 	if (si_domain && !hw_pass_through)
 		register_memory_notifier(&intel_iommu_memory_nb);
+	register_hotcpu_notifier(&intel_iommu_cpu_nb);
 
 	intel_iommu_enabled = 1;
 
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index d9939fa9b588..8ebb3530afa7 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -583,7 +583,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		if (access_error(vma, req))
 			goto invalid;
 
-		ret = handle_mm_fault(svm->mm, vma, address,
+		ret = handle_mm_fault(vma, address,
 				      req->wr_req ? FAULT_FLAG_WRITE : 0);
 		if (ret & VM_FAULT_ERROR)
 			goto invalid;
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 9488e3c97bcb..def8ca1c982d 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -121,6 +121,8 @@
 #define ARM_V7S_TEX_MASK		0x7
 #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 
+#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
+
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
 #define ARM_V7S_CONT_PAGE_TEX_MASK	(ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
@@ -258,9 +260,10 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 					 struct io_pgtable_cfg *cfg)
 {
 	bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
-	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S |
-			    ARM_V7S_ATTR_TEX(1);
+	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
 
+	if (!(prot & IOMMU_MMIO))
+		pte |= ARM_V7S_ATTR_TEX(1);
 	if (ap) {
 		pte |= ARM_V7S_PTE_AF | ARM_V7S_PTE_AP_UNPRIV;
 		if (!(prot & IOMMU_WRITE))
@@ -270,7 +273,9 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 
 	if ((prot & IOMMU_NOEXEC) && ap)
 		pte |= ARM_V7S_ATTR_XN(lvl);
-	if (prot & IOMMU_CACHE)
+	if (prot & IOMMU_MMIO)
+		pte |= ARM_V7S_ATTR_B;
+	else if (prot & IOMMU_CACHE)
 		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
 
 	return pte;
@@ -279,11 +284,16 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
 {
 	int prot = IOMMU_READ;
+	arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 
-	if (pte & (ARM_V7S_PTE_AP_RDONLY << ARM_V7S_ATTR_SHIFT(lvl)))
+	if (!(attr & ARM_V7S_PTE_AP_RDONLY))
 		prot |= IOMMU_WRITE;
-	if (pte & ARM_V7S_ATTR_C)
+	if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
+		prot |= IOMMU_MMIO;
+	else if (pte & ARM_V7S_ATTR_C)
 		prot |= IOMMU_CACHE;
+	if (pte & ARM_V7S_ATTR_XN(lvl))
+		prot |= IOMMU_NOEXEC;
 
 	return prot;
 }
@@ -364,6 +374,9 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+		pte |= ARM_V7S_ATTR_MTK_4GB;
+
 	if (num_entries > 1)
 		pte = arm_v7s_pte_to_cont(pte, lvl);
 
@@ -625,9 +638,15 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
-			    IO_PGTABLE_QUIRK_TLBI_ON_MAP))
+			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+			    IO_PGTABLE_QUIRK_ARM_MTK_4GB))
 		return NULL;
 
+	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
+	    !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
+			return NULL;
+
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return NULL;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f433b516098a..f5c90e1366ce 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -355,7 +355,10 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 		if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
 			pte |= ARM_LPAE_PTE_AP_RDONLY;
 
-		if (prot & IOMMU_CACHE)
+		if (prot & IOMMU_MMIO)
+			pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
+				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
+		else if (prot & IOMMU_CACHE)
 			pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
 				<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
 	} else {
@@ -364,7 +367,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
 			pte |= ARM_LPAE_PTE_HAP_READ;
 		if (prot & IOMMU_WRITE)
 			pte |= ARM_LPAE_PTE_HAP_WRITE;
-		if (prot & IOMMU_CACHE)
+		if (prot & IOMMU_MMIO)
+			pte |= ARM_LPAE_PTE_MEMATTR_DEV;
+		else if (prot & IOMMU_CACHE)
 			pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
 		else
 			pte |= ARM_LPAE_PTE_MEMATTR_NC;
@@ -571,7 +576,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 	return 0;
 
 found_translation:
-	iova &= (ARM_LPAE_GRANULE(data) - 1);
+	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
 	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
 }
 
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 876f6a76d288..127558d83667 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -25,8 +25,7 @@
 #include "io-pgtable.h"
 
 static const struct io_pgtable_init_fns *
-io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] =
-{
+io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE
 	[ARM_32_LPAE_S1] = &io_pgtable_arm_32_lpae_s1_init_fns,
 	[ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns,
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index d4f502742e3b..969d82cc92ca 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -60,10 +60,16 @@ struct io_pgtable_cfg {
 	 * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
 	 *	(unmapped) entries but the hardware might do so anyway, perform
 	 *	TLB maintenance when mapping as well as when unmapping.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
+	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
+	 *	when the SoC is in "4GB mode" and they can only access the high
+	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
 	 */
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
+	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b9df1411c894..b06d93594436 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -34,8 +34,7 @@
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
-static struct ida iommu_group_ida;
-static struct mutex iommu_group_mutex;
+static DEFINE_IDA(iommu_group_ida);
 
 struct iommu_callback_data {
 	const struct iommu_ops *ops;
@@ -144,9 +143,7 @@ static void iommu_group_release(struct kobject *kobj)
 	if (group->iommu_data_release)
 		group->iommu_data_release(group->iommu_data);
 
-	mutex_lock(&iommu_group_mutex);
-	ida_remove(&iommu_group_ida, group->id);
-	mutex_unlock(&iommu_group_mutex);
+	ida_simple_remove(&iommu_group_ida, group->id);
 
 	if (group->default_domain)
 		iommu_domain_free(group->default_domain);
@@ -186,26 +183,17 @@ struct iommu_group *iommu_group_alloc(void)
 	INIT_LIST_HEAD(&group->devices);
 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 
-	mutex_lock(&iommu_group_mutex);
-
-again:
-	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
+	ret = ida_simple_get(&iommu_group_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0) {
 		kfree(group);
-		mutex_unlock(&iommu_group_mutex);
-		return ERR_PTR(-ENOMEM);
+		return ERR_PTR(ret);
 	}
-
-	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
-		goto again;
-
-	mutex_unlock(&iommu_group_mutex);
+	group->id = ret;
 
 	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
 				   NULL, "%d", group->id);
 	if (ret) {
-		mutex_lock(&iommu_group_mutex);
-		ida_remove(&iommu_group_ida, group->id);
-		mutex_unlock(&iommu_group_mutex);
+		ida_simple_remove(&iommu_group_ida, group->id);
 		kfree(group);
 		return ERR_PTR(ret);
 	}
@@ -337,9 +325,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group,
 	if (!domain || domain->type != IOMMU_DOMAIN_DMA)
 		return 0;
 
-	BUG_ON(!domain->ops->pgsize_bitmap);
+	BUG_ON(!domain->pgsize_bitmap);
 
-	pg_size = 1UL << __ffs(domain->ops->pgsize_bitmap);
+	pg_size = 1UL << __ffs(domain->pgsize_bitmap);
 	INIT_LIST_HEAD(&mappings);
 
 	iommu_get_dm_regions(dev, &mappings);
@@ -348,6 +336,9 @@ static int iommu_group_create_direct_mappings(struct iommu_group *group,
 	list_for_each_entry(entry, &mappings, list) {
 		dma_addr_t start, end, addr;
 
+		if (domain->ops->apply_dm_region)
+			domain->ops->apply_dm_region(dev, domain, entry);
+
 		start = ALIGN(entry->start, pg_size);
 		end   = ALIGN(entry->start + entry->length, pg_size);
 
@@ -660,8 +651,8 @@ static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
 }
 
 /*
- * Look for aliases to or from the given device for exisiting groups.  The
- * dma_alias_devfn only supports aliases on the same bus, therefore the search
+ * Look for aliases to or from the given device for existing groups. DMA
+ * aliases are only supported on the same bus, therefore the search
  * space is quite small (especially since we're really only looking at pcie
  * device, and therefore only expect multiple slots on the root complex or
  * downstream switch ports).  It's conceivable though that a pair of
@@ -686,11 +677,7 @@ static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
 			continue;
 
 		/* We alias them or they alias us */
-		if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     pdev->dma_alias_devfn == tmp->devfn) ||
-		    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
-		     tmp->dma_alias_devfn == pdev->devfn)) {
-
+		if (pci_devs_are_dma_aliases(pdev, tmp)) {
 			group = get_pci_alias_group(tmp, devfns);
 			if (group) {
 				pci_dev_put(tmp);
@@ -1073,6 +1060,8 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 
 	domain->ops  = bus->iommu_ops;
 	domain->type = type;
+	/* Assume all sizes by default; the driver may override this later */
+	domain->pgsize_bitmap  = bus->iommu_ops->pgsize_bitmap;
 
 	return domain;
 }
@@ -1297,7 +1286,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
 	pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
 	/* throw away page sizes not supported by the hardware */
-	pgsize &= domain->ops->pgsize_bitmap;
+	pgsize &= domain->pgsize_bitmap;
 
 	/* make sure we're still sane */
 	BUG_ON(!pgsize);
@@ -1319,14 +1308,14 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	int ret = 0;
 
 	if (unlikely(domain->ops->map == NULL ||
-		     domain->ops->pgsize_bitmap == 0UL))
+		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return -EINVAL;
 
 	/* find out the minimum page size supported */
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	/*
 	 * both the virtual address and the physical one, as well as
@@ -1373,14 +1362,14 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 	unsigned long orig_iova = iova;
 
 	if (unlikely(domain->ops->unmap == NULL ||
-		     domain->ops->pgsize_bitmap == 0UL))
+		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
 		return -EINVAL;
 
 	/* find out the minimum page size supported */
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	/*
 	 * The virtual address, as well as the size of the mapping, must be
@@ -1426,10 +1415,10 @@ size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 	unsigned int i, min_pagesz;
 	int ret;
 
-	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
+	if (unlikely(domain->pgsize_bitmap == 0UL))
 		return 0;
 
-	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
 
 	for_each_sg(sg, s, nents, i) {
 		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
@@ -1485,9 +1474,6 @@ static int __init iommu_init(void)
 {
 	iommu_group_kset = kset_create_and_add("iommu_groups",
 					       NULL, kernel_kobj);
-	ida_init(&iommu_group_ida);
-	mutex_init(&iommu_group_mutex);
-
 	BUG_ON(!iommu_group_kset);
 
 	return 0;
@@ -1510,7 +1496,7 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 		break;
 	case DOMAIN_ATTR_PAGING:
 		paging  = data;
-		*paging = (domain->ops->pgsize_bitmap != 0UL);
+		*paging = (domain->pgsize_bitmap != 0UL);
 		break;
 	case DOMAIN_ATTR_WINDOWS:
 		count = data;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index fa0adef32bd6..e23001bfcfee 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -20,6 +20,17 @@
 #include <linux/iova.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+
+static bool iova_rcache_insert(struct iova_domain *iovad,
+			       unsigned long pfn,
+			       unsigned long size);
+static unsigned long iova_rcache_get(struct iova_domain *iovad,
+				     unsigned long size,
+				     unsigned long limit_pfn);
+static void init_iova_rcaches(struct iova_domain *iovad);
+static void free_iova_rcaches(struct iova_domain *iovad);
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long granule,
@@ -38,6 +49,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = pfn_32bit;
+	init_iova_rcaches(iovad);
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
@@ -291,33 +303,18 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 }
 EXPORT_SYMBOL_GPL(alloc_iova);
 
-/**
- * find_iova - find's an iova for a given pfn
- * @iovad: - iova domain in question.
- * @pfn: - page frame number
- * This function finds and returns an iova belonging to the
- * given doamin which matches the given pfn.
- */
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+static struct iova *
+private_find_iova(struct iova_domain *iovad, unsigned long pfn)
 {
-	unsigned long flags;
-	struct rb_node *node;
+	struct rb_node *node = iovad->rbroot.rb_node;
+
+	assert_spin_locked(&iovad->iova_rbtree_lock);
 
-	/* Take the lock so that no other thread is manipulating the rbtree */
-	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	node = iovad->rbroot.rb_node;
 	while (node) {
 		struct iova *iova = container_of(node, struct iova, node);
 
 		/* If pfn falls within iova's range, return iova */
 		if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) {
-			spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-			/* We are not holding the lock while this iova
-			 * is referenced by the caller as the same thread
-			 * which called this function also calls __free_iova()
-			 * and it is by design that only one thread can possibly
-			 * reference a particular iova and hence no conflict.
-			 */
 			return iova;
 		}
 
@@ -327,9 +324,35 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 			node = node->rb_right;
 	}
 
-	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 	return NULL;
 }
+
+static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
+{
+	assert_spin_locked(&iovad->iova_rbtree_lock);
+	__cached_rbnode_delete_update(iovad, iova);
+	rb_erase(&iova->node, &iovad->rbroot);
+	free_iova_mem(iova);
+}
+
+/**
+ * find_iova - finds an iova for a given pfn
+ * @iovad: - iova domain in question.
+ * @pfn: - page frame number
+ * This function finds and returns an iova belonging to the
+ * given doamin which matches the given pfn.
+ */
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
+{
+	unsigned long flags;
+	struct iova *iova;
+
+	/* Take the lock so that no other thread is manipulating the rbtree */
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	iova = private_find_iova(iovad, pfn);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return iova;
+}
 EXPORT_SYMBOL_GPL(find_iova);
 
 /**
@@ -344,10 +367,8 @@ __free_iova(struct iova_domain *iovad, struct iova *iova)
 	unsigned long flags;
 
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
-	__cached_rbnode_delete_update(iovad, iova);
-	rb_erase(&iova->node, &iovad->rbroot);
+	private_free_iova(iovad, iova);
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-	free_iova_mem(iova);
 }
 EXPORT_SYMBOL_GPL(__free_iova);
 
@@ -370,6 +391,65 @@ free_iova(struct iova_domain *iovad, unsigned long pfn)
 EXPORT_SYMBOL_GPL(free_iova);
 
 /**
+ * alloc_iova_fast - allocates an iova from rcache
+ * @iovad: - iova domain in question
+ * @size: - size of page frames to allocate
+ * @limit_pfn: - max limit address
+ * This function tries to satisfy an iova allocation from the rcache,
+ * and falls back to regular allocation on failure.
+*/
+unsigned long
+alloc_iova_fast(struct iova_domain *iovad, unsigned long size,
+		unsigned long limit_pfn)
+{
+	bool flushed_rcache = false;
+	unsigned long iova_pfn;
+	struct iova *new_iova;
+
+	iova_pfn = iova_rcache_get(iovad, size, limit_pfn);
+	if (iova_pfn)
+		return iova_pfn;
+
+retry:
+	new_iova = alloc_iova(iovad, size, limit_pfn, true);
+	if (!new_iova) {
+		unsigned int cpu;
+
+		if (flushed_rcache)
+			return 0;
+
+		/* Try replenishing IOVAs by flushing rcache. */
+		flushed_rcache = true;
+		preempt_disable();
+		for_each_online_cpu(cpu)
+			free_cpu_cached_iovas(cpu, iovad);
+		preempt_enable();
+		goto retry;
+	}
+
+	return new_iova->pfn_lo;
+}
+EXPORT_SYMBOL_GPL(alloc_iova_fast);
+
+/**
+ * free_iova_fast - free iova pfn range into rcache
+ * @iovad: - iova domain in question.
+ * @pfn: - pfn that is allocated previously
+ * @size: - # of pages in range
+ * This functions frees an iova range by trying to put it into the rcache,
+ * falling back to regular iova deallocation via free_iova() if this fails.
+ */
+void
+free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size)
+{
+	if (iova_rcache_insert(iovad, pfn, size))
+		return;
+
+	free_iova(iovad, pfn);
+}
+EXPORT_SYMBOL_GPL(free_iova_fast);
+
+/**
  * put_iova_domain - destroys the iova doamin
  * @iovad: - iova domain in question.
  * All the iova's in that domain are destroyed.
@@ -379,6 +459,7 @@ void put_iova_domain(struct iova_domain *iovad)
 	struct rb_node *node;
 	unsigned long flags;
 
+	free_iova_rcaches(iovad);
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	node = rb_first(&iovad->rbroot);
 	while (node) {
@@ -550,5 +631,297 @@ error:
 	return NULL;
 }
 
+/*
+ * Magazine caches for IOVA ranges.  For an introduction to magazines,
+ * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
+ * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams.
+ * For simplicity, we use a static magazine size and don't implement the
+ * dynamic size tuning described in the paper.
+ */
+
+#define IOVA_MAG_SIZE 128
+
+struct iova_magazine {
+	unsigned long size;
+	unsigned long pfns[IOVA_MAG_SIZE];
+};
+
+struct iova_cpu_rcache {
+	spinlock_t lock;
+	struct iova_magazine *loaded;
+	struct iova_magazine *prev;
+};
+
+static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
+{
+	return kzalloc(sizeof(struct iova_magazine), flags);
+}
+
+static void iova_magazine_free(struct iova_magazine *mag)
+{
+	kfree(mag);
+}
+
+static void
+iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
+{
+	unsigned long flags;
+	int i;
+
+	if (!mag)
+		return;
+
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+
+	for (i = 0 ; i < mag->size; ++i) {
+		struct iova *iova = private_find_iova(iovad, mag->pfns[i]);
+
+		BUG_ON(!iova);
+		private_free_iova(iovad, iova);
+	}
+
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+
+	mag->size = 0;
+}
+
+static bool iova_magazine_full(struct iova_magazine *mag)
+{
+	return (mag && mag->size == IOVA_MAG_SIZE);
+}
+
+static bool iova_magazine_empty(struct iova_magazine *mag)
+{
+	return (!mag || mag->size == 0);
+}
+
+static unsigned long iova_magazine_pop(struct iova_magazine *mag,
+				       unsigned long limit_pfn)
+{
+	BUG_ON(iova_magazine_empty(mag));
+
+	if (mag->pfns[mag->size - 1] >= limit_pfn)
+		return 0;
+
+	return mag->pfns[--mag->size];
+}
+
+static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
+{
+	BUG_ON(iova_magazine_full(mag));
+
+	mag->pfns[mag->size++] = pfn;
+}
+
+static void init_iova_rcaches(struct iova_domain *iovad)
+{
+	struct iova_cpu_rcache *cpu_rcache;
+	struct iova_rcache *rcache;
+	unsigned int cpu;
+	int i;
+
+	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+		rcache = &iovad->rcaches[i];
+		spin_lock_init(&rcache->lock);
+		rcache->depot_size = 0;
+		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
+		if (WARN_ON(!rcache->cpu_rcaches))
+			continue;
+		for_each_possible_cpu(cpu) {
+			cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+			spin_lock_init(&cpu_rcache->lock);
+			cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL);
+			cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL);
+		}
+	}
+}
+
+/*
+ * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and
+ * return true on success.  Can fail if rcache is full and we can't free
+ * space, and free_iova() (our only caller) will then return the IOVA
+ * range to the rbtree instead.
+ */
+static bool __iova_rcache_insert(struct iova_domain *iovad,
+				 struct iova_rcache *rcache,
+				 unsigned long iova_pfn)
+{
+	struct iova_magazine *mag_to_free = NULL;
+	struct iova_cpu_rcache *cpu_rcache;
+	bool can_insert = false;
+	unsigned long flags;
+
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	spin_lock_irqsave(&cpu_rcache->lock, flags);
+
+	if (!iova_magazine_full(cpu_rcache->loaded)) {
+		can_insert = true;
+	} else if (!iova_magazine_full(cpu_rcache->prev)) {
+		swap(cpu_rcache->prev, cpu_rcache->loaded);
+		can_insert = true;
+	} else {
+		struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC);
+
+		if (new_mag) {
+			spin_lock(&rcache->lock);
+			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
+				rcache->depot[rcache->depot_size++] =
+						cpu_rcache->loaded;
+			} else {
+				mag_to_free = cpu_rcache->loaded;
+			}
+			spin_unlock(&rcache->lock);
+
+			cpu_rcache->loaded = new_mag;
+			can_insert = true;
+		}
+	}
+
+	if (can_insert)
+		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
+
+	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
+
+	if (mag_to_free) {
+		iova_magazine_free_pfns(mag_to_free, iovad);
+		iova_magazine_free(mag_to_free);
+	}
+
+	return can_insert;
+}
+
+static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn,
+			       unsigned long size)
+{
+	unsigned int log_size = order_base_2(size);
+
+	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
+		return false;
+
+	return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn);
+}
+
+/*
+ * Caller wants to allocate a new IOVA range from 'rcache'.  If we can
+ * satisfy the request, return a matching non-NULL range and remove
+ * it from the 'rcache'.
+ */
+static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
+				       unsigned long limit_pfn)
+{
+	struct iova_cpu_rcache *cpu_rcache;
+	unsigned long iova_pfn = 0;
+	bool has_pfn = false;
+	unsigned long flags;
+
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
+	spin_lock_irqsave(&cpu_rcache->lock, flags);
+
+	if (!iova_magazine_empty(cpu_rcache->loaded)) {
+		has_pfn = true;
+	} else if (!iova_magazine_empty(cpu_rcache->prev)) {
+		swap(cpu_rcache->prev, cpu_rcache->loaded);
+		has_pfn = true;
+	} else {
+		spin_lock(&rcache->lock);
+		if (rcache->depot_size > 0) {
+			iova_magazine_free(cpu_rcache->loaded);
+			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
+			has_pfn = true;
+		}
+		spin_unlock(&rcache->lock);
+	}
+
+	if (has_pfn)
+		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
+
+	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
+
+	return iova_pfn;
+}
+
+/*
+ * Try to satisfy IOVA allocation range from rcache.  Fail if requested
+ * size is too big or the DMA limit we are given isn't satisfied by the
+ * top element in the magazine.
+ */
+static unsigned long iova_rcache_get(struct iova_domain *iovad,
+				     unsigned long size,
+				     unsigned long limit_pfn)
+{
+	unsigned int log_size = order_base_2(size);
+
+	if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
+		return 0;
+
+	return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn);
+}
+
+/*
+ * Free a cpu's rcache.
+ */
+static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad,
+				 struct iova_rcache *rcache)
+{
+	struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpu_rcache->lock, flags);
+
+	iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
+	iova_magazine_free(cpu_rcache->loaded);
+
+	iova_magazine_free_pfns(cpu_rcache->prev, iovad);
+	iova_magazine_free(cpu_rcache->prev);
+
+	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+}
+
+/*
+ * free rcache data structures.
+ */
+static void free_iova_rcaches(struct iova_domain *iovad)
+{
+	struct iova_rcache *rcache;
+	unsigned long flags;
+	unsigned int cpu;
+	int i, j;
+
+	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+		rcache = &iovad->rcaches[i];
+		for_each_possible_cpu(cpu)
+			free_cpu_iova_rcache(cpu, iovad, rcache);
+		spin_lock_irqsave(&rcache->lock, flags);
+		free_percpu(rcache->cpu_rcaches);
+		for (j = 0; j < rcache->depot_size; ++j) {
+			iova_magazine_free_pfns(rcache->depot[j], iovad);
+			iova_magazine_free(rcache->depot[j]);
+		}
+		spin_unlock_irqrestore(&rcache->lock, flags);
+	}
+}
+
+/*
+ * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
+ */
+void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
+{
+	struct iova_cpu_rcache *cpu_rcache;
+	struct iova_rcache *rcache;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+		rcache = &iovad->rcaches[i];
+		cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
+		spin_lock_irqsave(&cpu_rcache->lock, flags);
+		iova_magazine_free_pfns(cpu_rcache->loaded, iovad);
+		iova_magazine_free_pfns(cpu_rcache->prev, iovad);
+		spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	}
+}
+
 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 8adaaeae3268..49721b4e1975 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -36,7 +36,7 @@ static void irq_remapping_disable_io_apic(void)
 	 * As this gets called during crash dump, keep this simple for
 	 * now.
 	 */
-	if (cpu_has_apic || apic_from_smp_config())
+	if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
 		disconnect_bsp_APIC(0);
 }
 
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index e321fa517a45..b09692bb5b0a 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -27,32 +27,35 @@
 #include <linux/slab.h>
 #include <linux/iommu.h>
 #include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/of_iommu.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sizes.h>
 
 #include "msm_iommu_hw-8xxx.h"
 #include "msm_iommu.h"
+#include "io-pgtable.h"
 
 #define MRC(reg, processor, op1, crn, crm, op2)				\
 __asm__ __volatile__ (							\
 "   mrc   "   #processor "," #op1 ", %0,"  #crn "," #crm "," #op2 "\n"  \
 : "=r" (reg))
 
-#define RCP15_PRRR(reg)		MRC(reg, p15, 0, c10, c2, 0)
-#define RCP15_NMRR(reg)		MRC(reg, p15, 0, c10, c2, 1)
-
 /* bitmap of the page sizes currently supported */
 #define MSM_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
-static int msm_iommu_tex_class[4];
-
 DEFINE_SPINLOCK(msm_iommu_lock);
+static LIST_HEAD(qcom_iommu_devices);
+static struct iommu_ops msm_iommu_ops;
 
 struct msm_priv {
-	unsigned long *pgtable;
 	struct list_head list_attached;
 	struct iommu_domain domain;
+	struct io_pgtable_cfg	cfg;
+	struct io_pgtable_ops	*iop;
+	struct device		*dev;
+	spinlock_t		pgtlock; /* pagetable lock */
 };
 
 static struct msm_priv *to_msm_priv(struct iommu_domain *dom)
@@ -60,67 +63,183 @@ static struct msm_priv *to_msm_priv(struct iommu_domain *dom)
 	return container_of(dom, struct msm_priv, domain);
 }
 
-static int __enable_clocks(struct msm_iommu_drvdata *drvdata)
+static int __enable_clocks(struct msm_iommu_dev *iommu)
 {
 	int ret;
 
-	ret = clk_enable(drvdata->pclk);
+	ret = clk_enable(iommu->pclk);
 	if (ret)
 		goto fail;
 
-	if (drvdata->clk) {
-		ret = clk_enable(drvdata->clk);
+	if (iommu->clk) {
+		ret = clk_enable(iommu->clk);
 		if (ret)
-			clk_disable(drvdata->pclk);
+			clk_disable(iommu->pclk);
 	}
 fail:
 	return ret;
 }
 
-static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
+static void __disable_clocks(struct msm_iommu_dev *iommu)
 {
-	clk_disable(drvdata->clk);
-	clk_disable(drvdata->pclk);
+	if (iommu->clk)
+		clk_disable(iommu->clk);
+	clk_disable(iommu->pclk);
 }
 
-static int __flush_iotlb(struct iommu_domain *domain)
+static void msm_iommu_reset(void __iomem *base, int ncb)
 {
-	struct msm_priv *priv = to_msm_priv(domain);
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	int ret = 0;
-#ifndef CONFIG_IOMMU_PGTABLES_L2
-	unsigned long *fl_table = priv->pgtable;
-	int i;
-
-	if (!list_empty(&priv->list_attached)) {
-		dmac_flush_range(fl_table, fl_table + SZ_16K);
-
-		for (i = 0; i < NUM_FL_PTE; i++)
-			if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
-				void *sl_table = __va(fl_table[i] &
-								FL_BASE_MASK);
-				dmac_flush_range(sl_table, sl_table + SZ_4K);
-			}
+	int ctx;
+
+	SET_RPUE(base, 0);
+	SET_RPUEIE(base, 0);
+	SET_ESRRESTORE(base, 0);
+	SET_TBE(base, 0);
+	SET_CR(base, 0);
+	SET_SPDMBE(base, 0);
+	SET_TESTBUSCR(base, 0);
+	SET_TLBRSW(base, 0);
+	SET_GLOBAL_TLBIALL(base, 0);
+	SET_RPU_ACR(base, 0);
+	SET_TLBLKCRWE(base, 1);
+
+	for (ctx = 0; ctx < ncb; ctx++) {
+		SET_BPRCOSH(base, ctx, 0);
+		SET_BPRCISH(base, ctx, 0);
+		SET_BPRCNSH(base, ctx, 0);
+		SET_BPSHCFG(base, ctx, 0);
+		SET_BPMTCFG(base, ctx, 0);
+		SET_ACTLR(base, ctx, 0);
+		SET_SCTLR(base, ctx, 0);
+		SET_FSRRESTORE(base, ctx, 0);
+		SET_TTBR0(base, ctx, 0);
+		SET_TTBR1(base, ctx, 0);
+		SET_TTBCR(base, ctx, 0);
+		SET_BFBCR(base, ctx, 0);
+		SET_PAR(base, ctx, 0);
+		SET_FAR(base, ctx, 0);
+		SET_CTX_TLBIALL(base, ctx, 0);
+		SET_TLBFLPTER(base, ctx, 0);
+		SET_TLBSLPTER(base, ctx, 0);
+		SET_TLBLKCR(base, ctx, 0);
+		SET_CONTEXTIDR(base, ctx, 0);
 	}
-#endif
+}
 
-	list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
+static void __flush_iotlb(void *cookie)
+{
+	struct msm_priv *priv = cookie;
+	struct msm_iommu_dev *iommu = NULL;
+	struct msm_iommu_ctx_dev *master;
+	int ret = 0;
+
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
+		if (ret)
+			goto fail;
 
-		BUG_ON(!ctx_drvdata->pdev || !ctx_drvdata->pdev->dev.parent);
+		list_for_each_entry(master, &iommu->ctx_list, list)
+			SET_CTX_TLBIALL(iommu->base, master->num, 0);
 
-		iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
-		BUG_ON(!iommu_drvdata);
+		__disable_clocks(iommu);
+	}
+fail:
+	return;
+}
 
-		ret = __enable_clocks(iommu_drvdata);
+static void __flush_iotlb_range(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
+{
+	struct msm_priv *priv = cookie;
+	struct msm_iommu_dev *iommu = NULL;
+	struct msm_iommu_ctx_dev *master;
+	int ret = 0;
+	int temp_size;
+
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
 		if (ret)
 			goto fail;
 
-		SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
-		__disable_clocks(iommu_drvdata);
+		list_for_each_entry(master, &iommu->ctx_list, list) {
+			temp_size = size;
+			do {
+				iova &= TLBIVA_VA;
+				iova |= GET_CONTEXTIDR_ASID(iommu->base,
+							    master->num);
+				SET_TLBIVA(iommu->base, master->num, iova);
+				iova += granule;
+			} while (temp_size -= granule);
+		}
+
+		__disable_clocks(iommu);
 	}
+
 fail:
-	return ret;
+	return;
+}
+
+static void __flush_iotlb_sync(void *cookie)
+{
+	/*
+	 * Nothing is needed here, the barrier to guarantee
+	 * completion of the tlb sync operation is implicitly
+	 * taken care when the iommu client does a writel before
+	 * kick starting the other master.
+	 */
+}
+
+static const struct iommu_gather_ops msm_iommu_gather_ops = {
+	.tlb_flush_all = __flush_iotlb,
+	.tlb_add_flush = __flush_iotlb_range,
+	.tlb_sync = __flush_iotlb_sync,
+};
+
+static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
+{
+	int idx;
+
+	do {
+		idx = find_next_zero_bit(map, end, start);
+		if (idx == end)
+			return -ENOSPC;
+	} while (test_and_set_bit(idx, map));
+
+	return idx;
+}
+
+static void msm_iommu_free_ctx(unsigned long *map, int idx)
+{
+	clear_bit(idx, map);
+}
+
+static void config_mids(struct msm_iommu_dev *iommu,
+			struct msm_iommu_ctx_dev *master)
+{
+	int mid, ctx, i;
+
+	for (i = 0; i < master->num_mids; i++) {
+		mid = master->mids[i];
+		ctx = master->num;
+
+		SET_M2VCBR_N(iommu->base, mid, 0);
+		SET_CBACR_N(iommu->base, ctx, 0);
+
+		/* Set VMID = 0 */
+		SET_VMID(iommu->base, mid, 0);
+
+		/* Set the context number for that MID to this context */
+		SET_CBNDX(iommu->base, mid, ctx);
+
+		/* Set MID associated with this context bank to 0*/
+		SET_CBVMID(iommu->base, ctx, 0);
+
+		/* Set the ASID for TLB tagging for this context */
+		SET_CONTEXTIDR_ASID(iommu->base, ctx, ctx);
+
+		/* Set security bit override to be Non-secure */
+		SET_NSCFG(iommu->base, mid, 3);
+	}
 }
 
 static void __reset_context(void __iomem *base, int ctx)
@@ -143,15 +262,17 @@ static void __reset_context(void __iomem *base, int ctx)
 	SET_TLBFLPTER(base, ctx, 0);
 	SET_TLBSLPTER(base, ctx, 0);
 	SET_TLBLKCR(base, ctx, 0);
-	SET_PRRR(base, ctx, 0);
-	SET_NMRR(base, ctx, 0);
 }
 
-static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
+static void __program_context(void __iomem *base, int ctx,
+			      struct msm_priv *priv)
 {
-	unsigned int prrr, nmrr;
 	__reset_context(base, ctx);
 
+	/* Turn on TEX Remap */
+	SET_TRE(base, ctx, 1);
+	SET_AFE(base, ctx, 1);
+
 	/* Set up HTW mode */
 	/* TLB miss configuration: perform HTW on miss */
 	SET_TLBMCFG(base, ctx, 0x3);
@@ -159,8 +280,13 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
 	/* V2P configuration: HTW for access */
 	SET_V2PCFG(base, ctx, 0x3);
 
-	SET_TTBCR(base, ctx, 0);
-	SET_TTBR0_PA(base, ctx, (pgtable >> 14));
+	SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr);
+	SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]);
+	SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]);
+
+	/* Set prrr and nmrr */
+	SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr);
+	SET_NMRR(base, ctx, priv->cfg.arm_v7s_cfg.nmrr);
 
 	/* Invalidate the TLB for this context */
 	SET_CTX_TLBIALL(base, ctx, 0);
@@ -179,38 +305,9 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
 	SET_RCOSH(base, ctx, 1);
 	SET_RCNSH(base, ctx, 1);
 
-	/* Turn on TEX Remap */
-	SET_TRE(base, ctx, 1);
-
-	/* Set TEX remap attributes */
-	RCP15_PRRR(prrr);
-	RCP15_NMRR(nmrr);
-	SET_PRRR(base, ctx, prrr);
-	SET_NMRR(base, ctx, nmrr);
-
 	/* Turn on BFB prefetch */
 	SET_BFBDFE(base, ctx, 1);
 
-#ifdef CONFIG_IOMMU_PGTABLES_L2
-	/* Configure page tables as inner-cacheable and shareable to reduce
-	 * the TLB miss penalty.
-	 */
-	SET_TTBR0_SH(base, ctx, 1);
-	SET_TTBR1_SH(base, ctx, 1);
-
-	SET_TTBR0_NOS(base, ctx, 1);
-	SET_TTBR1_NOS(base, ctx, 1);
-
-	SET_TTBR0_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR0_IRGNL(base, ctx, 1);
-
-	SET_TTBR1_IRGNH(base, ctx, 0); /* WB, WA */
-	SET_TTBR1_IRGNL(base, ctx, 1);
-
-	SET_TTBR0_ORGN(base, ctx, 1); /* WB, WA */
-	SET_TTBR1_ORGN(base, ctx, 1); /* WB, WA */
-#endif
-
 	/* Enable the MMU */
 	SET_M(base, ctx, 1);
 }
@@ -227,13 +324,6 @@ static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
 		goto fail_nomem;
 
 	INIT_LIST_HEAD(&priv->list_attached);
-	priv->pgtable = (unsigned long *)__get_free_pages(GFP_KERNEL,
-							  get_order(SZ_16K));
-
-	if (!priv->pgtable)
-		goto fail_nomem;
-
-	memset(priv->pgtable, 0, SZ_16K);
 
 	priv->domain.geometry.aperture_start = 0;
 	priv->domain.geometry.aperture_end   = (1ULL << 32) - 1;
@@ -250,304 +340,137 @@ static void msm_iommu_domain_free(struct iommu_domain *domain)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
-	unsigned long *fl_table;
-	int i;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
 	priv = to_msm_priv(domain);
+	kfree(priv);
+	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+}
 
-	fl_table = priv->pgtable;
-
-	for (i = 0; i < NUM_FL_PTE; i++)
-		if ((fl_table[i] & 0x03) == FL_TYPE_TABLE)
-			free_page((unsigned long) __va(((fl_table[i]) &
-							FL_BASE_MASK)));
+static int msm_iommu_domain_config(struct msm_priv *priv)
+{
+	spin_lock_init(&priv->pgtlock);
+
+	priv->cfg = (struct io_pgtable_cfg) {
+		.quirks = IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+		.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
+		.ias = 32,
+		.oas = 32,
+		.tlb = &msm_iommu_gather_ops,
+		.iommu_dev = priv->dev,
+	};
+
+	priv->iop = alloc_io_pgtable_ops(ARM_V7S, &priv->cfg, priv);
+	if (!priv->iop) {
+		dev_err(priv->dev, "Failed to allocate pgtable\n");
+		return -EINVAL;
+	}
 
-	free_pages((unsigned long)priv->pgtable, get_order(SZ_16K));
-	priv->pgtable = NULL;
+	msm_iommu_ops.pgsize_bitmap = priv->cfg.pgsize_bitmap;
 
-	kfree(priv);
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+	return 0;
 }
 
 static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-	struct msm_priv *priv;
-	struct msm_iommu_ctx_dev *ctx_dev;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	struct msm_iommu_ctx_drvdata *tmp_drvdata;
 	int ret = 0;
 	unsigned long flags;
+	struct msm_iommu_dev *iommu;
+	struct msm_priv *priv = to_msm_priv(domain);
+	struct msm_iommu_ctx_dev *master;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	priv = to_msm_priv(domain);
-
-	if (!dev) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	iommu_drvdata = dev_get_drvdata(dev->parent);
-	ctx_drvdata = dev_get_drvdata(dev);
-	ctx_dev = dev->platform_data;
-
-	if (!iommu_drvdata || !ctx_drvdata || !ctx_dev) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (!list_empty(&ctx_drvdata->attached_elm)) {
-		ret = -EBUSY;
-		goto fail;
-	}
+	priv->dev = dev;
+	msm_iommu_domain_config(priv);
 
-	list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
-		if (tmp_drvdata == ctx_drvdata) {
-			ret = -EBUSY;
-			goto fail;
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &qcom_iommu_devices, dev_node) {
+		master = list_first_entry(&iommu->ctx_list,
+					  struct msm_iommu_ctx_dev,
+					  list);
+		if (master->of_node == dev->of_node) {
+			ret = __enable_clocks(iommu);
+			if (ret)
+				goto fail;
+
+			list_for_each_entry(master, &iommu->ctx_list, list) {
+				if (master->num) {
+					dev_err(dev, "domain already attached");
+					ret = -EEXIST;
+					goto fail;
+				}
+				master->num =
+					msm_iommu_alloc_ctx(iommu->context_map,
+							    0, iommu->ncb);
+					if (IS_ERR_VALUE(master->num)) {
+						ret = -ENODEV;
+						goto fail;
+					}
+				config_mids(iommu, master);
+				__program_context(iommu->base, master->num,
+						  priv);
+			}
+			__disable_clocks(iommu);
+			list_add(&iommu->dom_node, &priv->list_attached);
 		}
-
-	ret = __enable_clocks(iommu_drvdata);
-	if (ret)
-		goto fail;
-
-	__program_context(iommu_drvdata->base, ctx_dev->num,
-			  __pa(priv->pgtable));
-
-	__disable_clocks(iommu_drvdata);
-	list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
-	ret = __flush_iotlb(domain);
+	}
 
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
 	return ret;
 }
 
 static void msm_iommu_detach_dev(struct iommu_domain *domain,
 				 struct device *dev)
 {
-	struct msm_priv *priv;
-	struct msm_iommu_ctx_dev *ctx_dev;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
+	struct msm_iommu_dev *iommu;
+	struct msm_iommu_ctx_dev *master;
 	int ret;
 
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-	priv = to_msm_priv(domain);
-
-	if (!dev)
-		goto fail;
-
-	iommu_drvdata = dev_get_drvdata(dev->parent);
-	ctx_drvdata = dev_get_drvdata(dev);
-	ctx_dev = dev->platform_data;
-
-	if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
-		goto fail;
-
-	ret = __flush_iotlb(domain);
-	if (ret)
-		goto fail;
-
-	ret = __enable_clocks(iommu_drvdata);
-	if (ret)
-		goto fail;
+	free_io_pgtable_ops(priv->iop);
 
-	__reset_context(iommu_drvdata->base, ctx_dev->num);
-	__disable_clocks(iommu_drvdata);
-	list_del_init(&ctx_drvdata->attached_elm);
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &priv->list_attached, dom_node) {
+		ret = __enable_clocks(iommu);
+		if (ret)
+			goto fail;
 
+		list_for_each_entry(master, &iommu->ctx_list, list) {
+			msm_iommu_free_ctx(iommu->context_map, master->num);
+			__reset_context(iommu->base, master->num);
+		}
+		__disable_clocks(iommu);
+	}
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 }
 
-static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
+static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t pa, size_t len, int prot)
 {
-	struct msm_priv *priv;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
-	unsigned long *fl_table;
-	unsigned long *fl_pte;
-	unsigned long fl_offset;
-	unsigned long *sl_table;
-	unsigned long *sl_pte;
-	unsigned long sl_offset;
-	unsigned int pgprot;
-	int ret = 0, tex, sh;
-
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
-	tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
-
-	if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	priv = to_msm_priv(domain);
-
-	fl_table = priv->pgtable;
-
-	if (len != SZ_16M && len != SZ_1M &&
-	    len != SZ_64K && len != SZ_4K) {
-		pr_debug("Bad size: %d\n", len);
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (!fl_table) {
-		pr_debug("Null page table\n");
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (len == SZ_16M || len == SZ_1M) {
-		pgprot = sh ? FL_SHARED : 0;
-		pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? FL_TEX0 : 0;
-	} else	{
-		pgprot = sh ? SL_SHARED : 0;
-		pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
-		pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
-		pgprot |= tex & 0x04 ? SL_TEX0 : 0;
-	}
-
-	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
-	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
-
-	if (len == SZ_16M) {
-		int i = 0;
-		for (i = 0; i < 16; i++)
-			*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
-				  FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
-				  FL_SHARED | FL_NG | pgprot;
-	}
-
-	if (len == SZ_1M)
-		*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE | FL_NG |
-					    FL_TYPE_SECT | FL_SHARED | pgprot;
-
-	/* Need a 2nd level table */
-	if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
-		unsigned long *sl;
-		sl = (unsigned long *) __get_free_pages(GFP_ATOMIC,
-							get_order(SZ_4K));
-
-		if (!sl) {
-			pr_debug("Could not allocate second level table\n");
-			ret = -ENOMEM;
-			goto fail;
-		}
-
-		memset(sl, 0, SZ_4K);
-		*fl_pte = ((((int)__pa(sl)) & FL_BASE_MASK) | FL_TYPE_TABLE);
-	}
-
-	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
-	sl_offset = SL_OFFSET(va);
-	sl_pte = sl_table + sl_offset;
-
-
-	if (len == SZ_4K)
-		*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 | SL_NG |
-					  SL_SHARED | SL_TYPE_SMALL | pgprot;
-
-	if (len == SZ_64K) {
-		int i;
+	int ret;
 
-		for (i = 0; i < 16; i++)
-			*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
-			    SL_NG | SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
-	}
+	spin_lock_irqsave(&priv->pgtlock, flags);
+	ret = priv->iop->map(priv->iop, iova, pa, len, prot);
+	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
-	ret = __flush_iotlb(domain);
-fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 	return ret;
 }
 
-static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-			    size_t len)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			      size_t len)
 {
-	struct msm_priv *priv;
+	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
-	unsigned long *fl_table;
-	unsigned long *fl_pte;
-	unsigned long fl_offset;
-	unsigned long *sl_table;
-	unsigned long *sl_pte;
-	unsigned long sl_offset;
-	int i, ret = 0;
-
-	spin_lock_irqsave(&msm_iommu_lock, flags);
-
-	priv = to_msm_priv(domain);
 
-	fl_table = priv->pgtable;
+	spin_lock_irqsave(&priv->pgtlock, flags);
+	len = priv->iop->unmap(priv->iop, iova, len);
+	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
-	if (len != SZ_16M && len != SZ_1M &&
-	    len != SZ_64K && len != SZ_4K) {
-		pr_debug("Bad length: %d\n", len);
-		goto fail;
-	}
-
-	if (!fl_table) {
-		pr_debug("Null page table\n");
-		goto fail;
-	}
-
-	fl_offset = FL_OFFSET(va);	/* Upper 12 bits */
-	fl_pte = fl_table + fl_offset;	/* int pointers, 4 bytes */
-
-	if (*fl_pte == 0) {
-		pr_debug("First level PTE is 0\n");
-		goto fail;
-	}
-
-	/* Unmap supersection */
-	if (len == SZ_16M)
-		for (i = 0; i < 16; i++)
-			*(fl_pte+i) = 0;
-
-	if (len == SZ_1M)
-		*fl_pte = 0;
-
-	sl_table = (unsigned long *) __va(((*fl_pte) & FL_BASE_MASK));
-	sl_offset = SL_OFFSET(va);
-	sl_pte = sl_table + sl_offset;
-
-	if (len == SZ_64K) {
-		for (i = 0; i < 16; i++)
-			*(sl_pte+i) = 0;
-	}
-
-	if (len == SZ_4K)
-		*sl_pte = 0;
-
-	if (len == SZ_4K || len == SZ_64K) {
-		int used = 0;
-
-		for (i = 0; i < NUM_SL_PTE; i++)
-			if (sl_table[i])
-				used = 1;
-		if (!used) {
-			free_page((unsigned long)sl_table);
-			*fl_pte = 0;
-		}
-	}
-
-	ret = __flush_iotlb(domain);
-
-fail:
-	spin_unlock_irqrestore(&msm_iommu_lock, flags);
-
-	/* the IOMMU API requires us to return how many bytes were unmapped */
-	len = ret ? 0 : len;
 	return len;
 }
 
@@ -555,47 +478,46 @@ static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
 					  dma_addr_t va)
 {
 	struct msm_priv *priv;
-	struct msm_iommu_drvdata *iommu_drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
+	struct msm_iommu_dev *iommu;
+	struct msm_iommu_ctx_dev *master;
 	unsigned int par;
 	unsigned long flags;
-	void __iomem *base;
 	phys_addr_t ret = 0;
-	int ctx;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
 
 	priv = to_msm_priv(domain);
-	if (list_empty(&priv->list_attached))
-		goto fail;
+	iommu = list_first_entry(&priv->list_attached,
+				 struct msm_iommu_dev, dom_node);
 
-	ctx_drvdata = list_entry(priv->list_attached.next,
-				 struct msm_iommu_ctx_drvdata, attached_elm);
-	iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
+	if (list_empty(&iommu->ctx_list))
+		goto fail;
 
-	base = iommu_drvdata->base;
-	ctx = ctx_drvdata->num;
+	master = list_first_entry(&iommu->ctx_list,
+				  struct msm_iommu_ctx_dev, list);
+	if (!master)
+		goto fail;
 
-	ret = __enable_clocks(iommu_drvdata);
+	ret = __enable_clocks(iommu);
 	if (ret)
 		goto fail;
 
 	/* Invalidate context TLB */
-	SET_CTX_TLBIALL(base, ctx, 0);
-	SET_V2PPR(base, ctx, va & V2Pxx_VA);
+	SET_CTX_TLBIALL(iommu->base, master->num, 0);
+	SET_V2PPR(iommu->base, master->num, va & V2Pxx_VA);
 
-	par = GET_PAR(base, ctx);
+	par = GET_PAR(iommu->base, master->num);
 
 	/* We are dealing with a supersection */
-	if (GET_NOFAULT_SS(base, ctx))
+	if (GET_NOFAULT_SS(iommu->base, master->num))
 		ret = (par & 0xFF000000) | (va & 0x00FFFFFF);
 	else	/* Upper 20 bits from PAR, lower 12 from VA */
 		ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
 
-	if (GET_FAULT(base, ctx))
+	if (GET_FAULT(iommu->base, master->num))
 		ret = 0;
 
-	__disable_clocks(iommu_drvdata);
+	__disable_clocks(iommu);
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
 	return ret;
@@ -629,49 +551,92 @@ static void print_ctx_regs(void __iomem *base, int ctx)
 	       GET_TTBR0(base, ctx), GET_TTBR1(base, ctx));
 	pr_err("SCTLR  = %08x    ACTLR  = %08x\n",
 	       GET_SCTLR(base, ctx), GET_ACTLR(base, ctx));
-	pr_err("PRRR   = %08x    NMRR   = %08x\n",
-	       GET_PRRR(base, ctx), GET_NMRR(base, ctx));
+}
+
+static void insert_iommu_master(struct device *dev,
+				struct msm_iommu_dev **iommu,
+				struct of_phandle_args *spec)
+{
+	struct msm_iommu_ctx_dev *master = dev->archdata.iommu;
+	int sid;
+
+	if (list_empty(&(*iommu)->ctx_list)) {
+		master = kzalloc(sizeof(*master), GFP_ATOMIC);
+		master->of_node = dev->of_node;
+		list_add(&master->list, &(*iommu)->ctx_list);
+		dev->archdata.iommu = master;
+	}
+
+	for (sid = 0; sid < master->num_mids; sid++)
+		if (master->mids[sid] == spec->args[0]) {
+			dev_warn(dev, "Stream ID 0x%hx repeated; ignoring\n",
+				 sid);
+			return;
+		}
+
+	master->mids[master->num_mids++] = spec->args[0];
+}
+
+static int qcom_iommu_of_xlate(struct device *dev,
+			       struct of_phandle_args *spec)
+{
+	struct msm_iommu_dev *iommu;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&msm_iommu_lock, flags);
+	list_for_each_entry(iommu, &qcom_iommu_devices, dev_node)
+		if (iommu->dev->of_node == spec->np)
+			break;
+
+	if (!iommu || iommu->dev->of_node != spec->np) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	insert_iommu_master(dev, &iommu, spec);
+fail:
+	spin_unlock_irqrestore(&msm_iommu_lock, flags);
+
+	return ret;
 }
 
 irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
 {
-	struct msm_iommu_drvdata *drvdata = dev_id;
-	void __iomem *base;
+	struct msm_iommu_dev *iommu = dev_id;
 	unsigned int fsr;
 	int i, ret;
 
 	spin_lock(&msm_iommu_lock);
 
-	if (!drvdata) {
+	if (!iommu) {
 		pr_err("Invalid device ID in context interrupt handler\n");
 		goto fail;
 	}
 
-	base = drvdata->base;
-
 	pr_err("Unexpected IOMMU page fault!\n");
-	pr_err("base = %08x\n", (unsigned int) base);
+	pr_err("base = %08x\n", (unsigned int)iommu->base);
 
-	ret = __enable_clocks(drvdata);
+	ret = __enable_clocks(iommu);
 	if (ret)
 		goto fail;
 
-	for (i = 0; i < drvdata->ncb; i++) {
-		fsr = GET_FSR(base, i);
+	for (i = 0; i < iommu->ncb; i++) {
+		fsr = GET_FSR(iommu->base, i);
 		if (fsr) {
 			pr_err("Fault occurred in context %d.\n", i);
 			pr_err("Interesting registers:\n");
-			print_ctx_regs(base, i);
-			SET_FSR(base, i, 0x4000000F);
+			print_ctx_regs(iommu->base, i);
+			SET_FSR(iommu->base, i, 0x4000000F);
 		}
 	}
-	__disable_clocks(drvdata);
+	__disable_clocks(iommu);
 fail:
 	spin_unlock(&msm_iommu_lock);
 	return 0;
 }
 
-static const struct iommu_ops msm_iommu_ops = {
+static struct iommu_ops msm_iommu_ops = {
 	.capable = msm_iommu_capable,
 	.domain_alloc = msm_iommu_domain_alloc,
 	.domain_free = msm_iommu_domain_free,
@@ -682,54 +647,163 @@ static const struct iommu_ops msm_iommu_ops = {
 	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
+	.of_xlate = qcom_iommu_of_xlate,
 };
 
-static int __init get_tex_class(int icp, int ocp, int mt, int nos)
+static int msm_iommu_probe(struct platform_device *pdev)
 {
-	int i = 0;
-	unsigned int prrr = 0;
-	unsigned int nmrr = 0;
-	int c_icp, c_ocp, c_mt, c_nos;
-
-	RCP15_PRRR(prrr);
-	RCP15_NMRR(nmrr);
-
-	for (i = 0; i < NUM_TEX_CLASS; i++) {
-		c_nos = PRRR_NOS(prrr, i);
-		c_mt = PRRR_MT(prrr, i);
-		c_icp = NMRR_ICP(nmrr, i);
-		c_ocp = NMRR_OCP(nmrr, i);
-
-		if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
-			return i;
+	struct resource *r;
+	struct msm_iommu_dev *iommu;
+	int ret, par, val;
+
+	iommu = devm_kzalloc(&pdev->dev, sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENODEV;
+
+	iommu->dev = &pdev->dev;
+	INIT_LIST_HEAD(&iommu->ctx_list);
+
+	iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk");
+	if (IS_ERR(iommu->pclk)) {
+		dev_err(iommu->dev, "could not get smmu_pclk\n");
+		return PTR_ERR(iommu->pclk);
+	}
+
+	ret = clk_prepare(iommu->pclk);
+	if (ret) {
+		dev_err(iommu->dev, "could not prepare smmu_pclk\n");
+		return ret;
+	}
+
+	iommu->clk = devm_clk_get(iommu->dev, "iommu_clk");
+	if (IS_ERR(iommu->clk)) {
+		dev_err(iommu->dev, "could not get iommu_clk\n");
+		clk_unprepare(iommu->pclk);
+		return PTR_ERR(iommu->clk);
+	}
+
+	ret = clk_prepare(iommu->clk);
+	if (ret) {
+		dev_err(iommu->dev, "could not prepare iommu_clk\n");
+		clk_unprepare(iommu->pclk);
+		return ret;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iommu->base = devm_ioremap_resource(iommu->dev, r);
+	if (IS_ERR(iommu->base)) {
+		dev_err(iommu->dev, "could not get iommu base\n");
+		ret = PTR_ERR(iommu->base);
+		goto fail;
 	}
 
-	return -ENODEV;
+	iommu->irq = platform_get_irq(pdev, 0);
+	if (iommu->irq < 0) {
+		dev_err(iommu->dev, "could not get iommu irq\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val);
+	if (ret) {
+		dev_err(iommu->dev, "could not get ncb\n");
+		goto fail;
+	}
+	iommu->ncb = val;
+
+	msm_iommu_reset(iommu->base, iommu->ncb);
+	SET_M(iommu->base, 0, 1);
+	SET_PAR(iommu->base, 0, 0);
+	SET_V2PCFG(iommu->base, 0, 1);
+	SET_V2PPR(iommu->base, 0, 0);
+	par = GET_PAR(iommu->base, 0);
+	SET_V2PCFG(iommu->base, 0, 0);
+	SET_M(iommu->base, 0, 0);
+
+	if (!par) {
+		pr_err("Invalid PAR value detected\n");
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL,
+					msm_iommu_fault_handler,
+					IRQF_ONESHOT | IRQF_SHARED,
+					"msm_iommu_secure_irpt_handler",
+					iommu);
+	if (ret) {
+		pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret);
+		goto fail;
+	}
+
+	list_add(&iommu->dev_node, &qcom_iommu_devices);
+	of_iommu_set_ops(pdev->dev.of_node, &msm_iommu_ops);
+
+	pr_info("device mapped at %p, irq %d with %d ctx banks\n",
+		iommu->base, iommu->irq, iommu->ncb);
+
+	return ret;
+fail:
+	clk_unprepare(iommu->clk);
+	clk_unprepare(iommu->pclk);
+	return ret;
+}
+
+static const struct of_device_id msm_iommu_dt_match[] = {
+	{ .compatible = "qcom,apq8064-iommu" },
+	{}
+};
+
+static int msm_iommu_remove(struct platform_device *pdev)
+{
+	struct msm_iommu_dev *iommu = platform_get_drvdata(pdev);
+
+	clk_unprepare(iommu->clk);
+	clk_unprepare(iommu->pclk);
+	return 0;
 }
 
-static void __init setup_iommu_tex_classes(void)
+static struct platform_driver msm_iommu_driver = {
+	.driver = {
+		.name	= "msm_iommu",
+		.of_match_table = msm_iommu_dt_match,
+	},
+	.probe		= msm_iommu_probe,
+	.remove		= msm_iommu_remove,
+};
+
+static int __init msm_iommu_driver_init(void)
 {
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
-			get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
+	int ret;
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
-			get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
+	ret = platform_driver_register(&msm_iommu_driver);
+	if (ret != 0)
+		pr_err("Failed to register IOMMU driver\n");
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
-			get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
+	return ret;
+}
 
-	msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
-			get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
+static void __exit msm_iommu_driver_exit(void)
+{
+	platform_driver_unregister(&msm_iommu_driver);
 }
 
+subsys_initcall(msm_iommu_driver_init);
+module_exit(msm_iommu_driver_exit);
+
 static int __init msm_iommu_init(void)
 {
-	setup_iommu_tex_classes();
 	bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
 	return 0;
 }
 
-subsys_initcall(msm_iommu_init);
+static int __init msm_iommu_of_setup(struct device_node *np)
+{
+	msm_iommu_init();
+	return 0;
+}
+
+IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", msm_iommu_of_setup);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/msm_iommu.h b/drivers/iommu/msm_iommu.h
index 5c7c955e6d25..4ca25d50d679 100644
--- a/drivers/iommu/msm_iommu.h
+++ b/drivers/iommu/msm_iommu.h
@@ -42,74 +42,53 @@
  */
 #define MAX_NUM_MIDS	32
 
+/* Maximum number of context banks that can be present in IOMMU */
+#define IOMMU_MAX_CBS	128
+
 /**
  * struct msm_iommu_dev - a single IOMMU hardware instance
- * name		Human-readable name given to this IOMMU HW instance
  * ncb		Number of context banks present on this IOMMU HW instance
+ * dev:		IOMMU device
+ * irq:		Interrupt number
+ * clk:		The bus clock for this IOMMU hardware instance
+ * pclk:	The clock for the IOMMU bus interconnect
+ * dev_node:	list head in qcom_iommu_device_list
+ * dom_node:	list head for domain
+ * ctx_list:	list of 'struct msm_iommu_ctx_dev'
+ * context_map: Bitmap to track allocated context banks
  */
 struct msm_iommu_dev {
-	const char *name;
+	void __iomem *base;
 	int ncb;
+	struct device *dev;
+	int irq;
+	struct clk *clk;
+	struct clk *pclk;
+	struct list_head dev_node;
+	struct list_head dom_node;
+	struct list_head ctx_list;
+	DECLARE_BITMAP(context_map, IOMMU_MAX_CBS);
 };
 
 /**
  * struct msm_iommu_ctx_dev - an IOMMU context bank instance
- * name		Human-readable name given to this context bank
+ * of_node	node ptr of client device
  * num		Index of this context bank within the hardware
  * mids		List of Machine IDs that are to be mapped into this context
  *		bank, terminated by -1. The MID is a set of signals on the
  *		AXI bus that identifies the function associated with a specific
  *		memory request. (See ARM spec).
+ * num_mids	Total number of mids
+ * node		list head in ctx_list
  */
 struct msm_iommu_ctx_dev {
-	const char *name;
+	struct device_node *of_node;
 	int num;
 	int mids[MAX_NUM_MIDS];
+	int num_mids;
+	struct list_head list;
 };
 
-
-/**
- * struct msm_iommu_drvdata - A single IOMMU hardware instance
- * @base:	IOMMU config port base address (VA)
- * @ncb		The number of contexts on this IOMMU
- * @irq:	Interrupt number
- * @clk:	The bus clock for this IOMMU hardware instance
- * @pclk:	The clock for the IOMMU bus interconnect
- *
- * A msm_iommu_drvdata holds the global driver data about a single piece
- * of an IOMMU hardware instance.
- */
-struct msm_iommu_drvdata {
-	void __iomem *base;
-	int irq;
-	int ncb;
-	struct clk *clk;
-	struct clk *pclk;
-};
-
-/**
- * struct msm_iommu_ctx_drvdata - an IOMMU context bank instance
- * @num:		Hardware context number of this context
- * @pdev:		Platform device associated wit this HW instance
- * @attached_elm:	List element for domains to track which devices are
- *			attached to them
- *
- * A msm_iommu_ctx_drvdata holds the driver data for a single context bank
- * within each IOMMU hardware instance
- */
-struct msm_iommu_ctx_drvdata {
-	int num;
-	struct platform_device *pdev;
-	struct list_head attached_elm;
-};
-
-/*
- * Look up an IOMMU context device by its context name. NULL if none found.
- * Useful for testing and drivers that do not yet fully have IOMMU stuff in
- * their platform devices.
- */
-struct device *msm_iommu_get_ctx(const char *ctx_name);
-
 /*
  * Interrupt handler for the IOMMU context fault interrupt. Hooking the
  * interrupt is not supported in the API yet, but this will print an error
diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
deleted file mode 100644
index 4b09e815accf..000000000000
--- a/drivers/iommu/msm_iommu_dev.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/iommu.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-#include "msm_iommu_hw-8xxx.h"
-#include "msm_iommu.h"
-
-struct iommu_ctx_iter_data {
-	/* input */
-	const char *name;
-
-	/* output */
-	struct device *dev;
-};
-
-static struct platform_device *msm_iommu_root_dev;
-
-static int each_iommu_ctx(struct device *dev, void *data)
-{
-	struct iommu_ctx_iter_data *res = data;
-	struct msm_iommu_ctx_dev *c = dev->platform_data;
-
-	if (!res || !c || !c->name || !res->name)
-		return -EINVAL;
-
-	if (!strcmp(res->name, c->name)) {
-		res->dev = dev;
-		return 1;
-	}
-	return 0;
-}
-
-static int each_iommu(struct device *dev, void *data)
-{
-	return device_for_each_child(dev, data, each_iommu_ctx);
-}
-
-struct device *msm_iommu_get_ctx(const char *ctx_name)
-{
-	struct iommu_ctx_iter_data r;
-	int found;
-
-	if (!msm_iommu_root_dev) {
-		pr_err("No root IOMMU device.\n");
-		goto fail;
-	}
-
-	r.name = ctx_name;
-	found = device_for_each_child(&msm_iommu_root_dev->dev, &r, each_iommu);
-
-	if (!found) {
-		pr_err("Could not find context <%s>\n", ctx_name);
-		goto fail;
-	}
-
-	return r.dev;
-fail:
-	return NULL;
-}
-EXPORT_SYMBOL(msm_iommu_get_ctx);
-
-static void msm_iommu_reset(void __iomem *base, int ncb)
-{
-	int ctx;
-
-	SET_RPUE(base, 0);
-	SET_RPUEIE(base, 0);
-	SET_ESRRESTORE(base, 0);
-	SET_TBE(base, 0);
-	SET_CR(base, 0);
-	SET_SPDMBE(base, 0);
-	SET_TESTBUSCR(base, 0);
-	SET_TLBRSW(base, 0);
-	SET_GLOBAL_TLBIALL(base, 0);
-	SET_RPU_ACR(base, 0);
-	SET_TLBLKCRWE(base, 1);
-
-	for (ctx = 0; ctx < ncb; ctx++) {
-		SET_BPRCOSH(base, ctx, 0);
-		SET_BPRCISH(base, ctx, 0);
-		SET_BPRCNSH(base, ctx, 0);
-		SET_BPSHCFG(base, ctx, 0);
-		SET_BPMTCFG(base, ctx, 0);
-		SET_ACTLR(base, ctx, 0);
-		SET_SCTLR(base, ctx, 0);
-		SET_FSRRESTORE(base, ctx, 0);
-		SET_TTBR0(base, ctx, 0);
-		SET_TTBR1(base, ctx, 0);
-		SET_TTBCR(base, ctx, 0);
-		SET_BFBCR(base, ctx, 0);
-		SET_PAR(base, ctx, 0);
-		SET_FAR(base, ctx, 0);
-		SET_CTX_TLBIALL(base, ctx, 0);
-		SET_TLBFLPTER(base, ctx, 0);
-		SET_TLBSLPTER(base, ctx, 0);
-		SET_TLBLKCR(base, ctx, 0);
-		SET_PRRR(base, ctx, 0);
-		SET_NMRR(base, ctx, 0);
-		SET_CONTEXTIDR(base, ctx, 0);
-	}
-}
-
-static int msm_iommu_probe(struct platform_device *pdev)
-{
-	struct resource *r;
-	struct clk *iommu_clk;
-	struct clk *iommu_pclk;
-	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev);
-	void __iomem *regs_base;
-	int ret, irq, par;
-
-	if (pdev->id == -1) {
-		msm_iommu_root_dev = pdev;
-		return 0;
-	}
-
-	drvdata = kzalloc(sizeof(*drvdata), GFP_KERNEL);
-
-	if (!drvdata) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	if (!iommu_dev) {
-		ret = -ENODEV;
-		goto fail;
-	}
-
-	iommu_pclk = clk_get(NULL, "smmu_pclk");
-	if (IS_ERR(iommu_pclk)) {
-		ret = -ENODEV;
-		goto fail;
-	}
-
-	ret = clk_prepare_enable(iommu_pclk);
-	if (ret)
-		goto fail_enable;
-
-	iommu_clk = clk_get(&pdev->dev, "iommu_clk");
-
-	if (!IS_ERR(iommu_clk))	{
-		if (clk_get_rate(iommu_clk) == 0)
-			clk_set_rate(iommu_clk, 1);
-
-		ret = clk_prepare_enable(iommu_clk);
-		if (ret) {
-			clk_put(iommu_clk);
-			goto fail_pclk;
-		}
-	} else
-		iommu_clk = NULL;
-
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "physbase");
-	regs_base = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(regs_base)) {
-		ret = PTR_ERR(regs_base);
-		goto fail_clk;
-	}
-
-	irq = platform_get_irq_byname(pdev, "secure_irq");
-	if (irq < 0) {
-		ret = -ENODEV;
-		goto fail_clk;
-	}
-
-	msm_iommu_reset(regs_base, iommu_dev->ncb);
-
-	SET_M(regs_base, 0, 1);
-	SET_PAR(regs_base, 0, 0);
-	SET_V2PCFG(regs_base, 0, 1);
-	SET_V2PPR(regs_base, 0, 0);
-	par = GET_PAR(regs_base, 0);
-	SET_V2PCFG(regs_base, 0, 0);
-	SET_M(regs_base, 0, 0);
-
-	if (!par) {
-		pr_err("%s: Invalid PAR value detected\n", iommu_dev->name);
-		ret = -ENODEV;
-		goto fail_clk;
-	}
-
-	ret = request_irq(irq, msm_iommu_fault_handler, 0,
-			"msm_iommu_secure_irpt_handler", drvdata);
-	if (ret) {
-		pr_err("Request IRQ %d failed with ret=%d\n", irq, ret);
-		goto fail_clk;
-	}
-
-
-	drvdata->pclk = iommu_pclk;
-	drvdata->clk = iommu_clk;
-	drvdata->base = regs_base;
-	drvdata->irq = irq;
-	drvdata->ncb = iommu_dev->ncb;
-
-	pr_info("device %s mapped at %p, irq %d with %d ctx banks\n",
-		iommu_dev->name, regs_base, irq, iommu_dev->ncb);
-
-	platform_set_drvdata(pdev, drvdata);
-
-	clk_disable(iommu_clk);
-
-	clk_disable(iommu_pclk);
-
-	return 0;
-fail_clk:
-	if (iommu_clk) {
-		clk_disable(iommu_clk);
-		clk_put(iommu_clk);
-	}
-fail_pclk:
-	clk_disable_unprepare(iommu_pclk);
-fail_enable:
-	clk_put(iommu_pclk);
-fail:
-	kfree(drvdata);
-	return ret;
-}
-
-static int msm_iommu_remove(struct platform_device *pdev)
-{
-	struct msm_iommu_drvdata *drv = NULL;
-
-	drv = platform_get_drvdata(pdev);
-	if (drv) {
-		if (drv->clk) {
-			clk_unprepare(drv->clk);
-			clk_put(drv->clk);
-		}
-		clk_unprepare(drv->pclk);
-		clk_put(drv->pclk);
-		memset(drv, 0, sizeof(*drv));
-		kfree(drv);
-	}
-	return 0;
-}
-
-static int msm_iommu_ctx_probe(struct platform_device *pdev)
-{
-	struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev);
-	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_ctx_drvdata *ctx_drvdata;
-	int i, ret;
-
-	if (!c || !pdev->dev.parent)
-		return -EINVAL;
-
-	drvdata = dev_get_drvdata(pdev->dev.parent);
-	if (!drvdata)
-		return -ENODEV;
-
-	ctx_drvdata = kzalloc(sizeof(*ctx_drvdata), GFP_KERNEL);
-	if (!ctx_drvdata)
-		return -ENOMEM;
-
-	ctx_drvdata->num = c->num;
-	ctx_drvdata->pdev = pdev;
-
-	INIT_LIST_HEAD(&ctx_drvdata->attached_elm);
-	platform_set_drvdata(pdev, ctx_drvdata);
-
-	ret = clk_prepare_enable(drvdata->pclk);
-	if (ret)
-		goto fail;
-
-	if (drvdata->clk) {
-		ret = clk_prepare_enable(drvdata->clk);
-		if (ret) {
-			clk_disable_unprepare(drvdata->pclk);
-			goto fail;
-		}
-	}
-
-	/* Program the M2V tables for this context */
-	for (i = 0; i < MAX_NUM_MIDS; i++) {
-		int mid = c->mids[i];
-		if (mid == -1)
-			break;
-
-		SET_M2VCBR_N(drvdata->base, mid, 0);
-		SET_CBACR_N(drvdata->base, c->num, 0);
-
-		/* Set VMID = 0 */
-		SET_VMID(drvdata->base, mid, 0);
-
-		/* Set the context number for that MID to this context */
-		SET_CBNDX(drvdata->base, mid, c->num);
-
-		/* Set MID associated with this context bank to 0*/
-		SET_CBVMID(drvdata->base, c->num, 0);
-
-		/* Set the ASID for TLB tagging for this context */
-		SET_CONTEXTIDR_ASID(drvdata->base, c->num, c->num);
-
-		/* Set security bit override to be Non-secure */
-		SET_NSCFG(drvdata->base, mid, 3);
-	}
-
-	clk_disable(drvdata->clk);
-	clk_disable(drvdata->pclk);
-
-	dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);
-	return 0;
-fail:
-	kfree(ctx_drvdata);
-	return ret;
-}
-
-static int msm_iommu_ctx_remove(struct platform_device *pdev)
-{
-	struct msm_iommu_ctx_drvdata *drv = NULL;
-	drv = platform_get_drvdata(pdev);
-	if (drv) {
-		memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
-		kfree(drv);
-	}
-	return 0;
-}
-
-static struct platform_driver msm_iommu_driver = {
-	.driver = {
-		.name	= "msm_iommu",
-	},
-	.probe		= msm_iommu_probe,
-	.remove		= msm_iommu_remove,
-};
-
-static struct platform_driver msm_iommu_ctx_driver = {
-	.driver = {
-		.name	= "msm_iommu_ctx",
-	},
-	.probe		= msm_iommu_ctx_probe,
-	.remove		= msm_iommu_ctx_remove,
-};
-
-static struct platform_driver * const drivers[] = {
-	&msm_iommu_driver,
-	&msm_iommu_ctx_driver,
-};
-
-static int __init msm_iommu_driver_init(void)
-{
-	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
-}
-
-static void __exit msm_iommu_driver_exit(void)
-{
-	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
-}
-
-subsys_initcall(msm_iommu_driver_init);
-module_exit(msm_iommu_driver_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 929a66a81b2b..b12c12d74c33 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -11,6 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+#include <linux/bootmem.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
@@ -33,7 +34,7 @@
 #include <dt-bindings/memory/mt8173-larb-port.h>
 #include <soc/mediatek/smi.h>
 
-#include "io-pgtable.h"
+#include "mtk_iommu.h"
 
 #define REG_MMU_PT_BASE_ADDR			0x000
 
@@ -56,7 +57,7 @@
 #define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
 
 #define REG_MMU_IVRP_PADDR			0x114
-#define F_MMU_IVRP_PA_SET(pa)			((pa) >> 1)
+#define F_MMU_IVRP_PA_SET(pa, ext)		(((pa) >> 1) | ((!!(ext)) << 31))
 
 #define REG_MMU_INT_CONTROL0			0x120
 #define F_L2_MULIT_HIT_EN			BIT(0)
@@ -92,20 +93,6 @@
 
 #define MTK_PROTECT_PA_ALIGN			128
 
-struct mtk_iommu_suspend_reg {
-	u32				standard_axi_mode;
-	u32				dcm_dis;
-	u32				ctrl_reg;
-	u32				int_control0;
-	u32				int_main_control;
-};
-
-struct mtk_iommu_client_priv {
-	struct list_head		client;
-	unsigned int			mtk_m4u_id;
-	struct device			*m4udev;
-};
-
 struct mtk_iommu_domain {
 	spinlock_t			pgtlock; /* lock for page table */
 
@@ -115,18 +102,6 @@ struct mtk_iommu_domain {
 	struct iommu_domain		domain;
 };
 
-struct mtk_iommu_data {
-	void __iomem			*base;
-	int				irq;
-	struct device			*dev;
-	struct clk			*bclk;
-	phys_addr_t			protect_base; /* protect memory base */
-	struct mtk_iommu_suspend_reg	reg;
-	struct mtk_iommu_domain		*m4u_dom;
-	struct iommu_group		*m4u_group;
-	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
-};
-
 static struct iommu_ops mtk_iommu_ops;
 
 static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
@@ -257,6 +232,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
 		.iommu_dev = data->dev,
 	};
 
+	if (data->enable_4GB)
+		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
+
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
 		dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -264,7 +242,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
 	}
 
 	/* Update our support page sizes bitmap */
-	mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+	dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;
 
 	writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
 	       data->base + REG_MMU_PT_BASE_ADDR);
@@ -450,7 +428,6 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
 	if (!dev->archdata.iommu) {
 		/* Get the m4u device */
 		m4updev = of_find_device_by_node(args->np);
-		of_node_put(args->np);
 		if (WARN_ON(!m4updev))
 			return -EINVAL;
 
@@ -530,7 +507,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
 	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
 		       data->base + REG_MMU_IVRP_PADDR);
 
 	writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
@@ -547,25 +524,6 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 	return 0;
 }
 
-static int compare_of(struct device *dev, void *data)
-{
-	return dev->of_node == data;
-}
-
-static int mtk_iommu_bind(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev_get_drvdata(dev);
-
-	return component_bind_all(dev, &data->smi_imu);
-}
-
-static void mtk_iommu_unbind(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev_get_drvdata(dev);
-
-	component_unbind_all(dev, &data->smi_imu);
-}
-
 static const struct component_master_ops mtk_iommu_com_ops = {
 	.bind		= mtk_iommu_bind,
 	.unbind		= mtk_iommu_unbind,
@@ -591,6 +549,9 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
 
+	/* Whether the current dram is over 4GB */
+	data->enable_4GB = !!(max_pfn > (0xffffffffUL >> PAGE_SHIFT));
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	data->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(data->base))
@@ -690,7 +651,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
 	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
 	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
 	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
 		       base + REG_MMU_IVRP_PADDR);
 	return 0;
 }
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
new file mode 100644
index 000000000000..3dab13b4a211
--- /dev/null
+++ b/drivers/iommu/mtk_iommu.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Honghui Zhang <honghui.zhang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _MTK_IOMMU_H_
+#define _MTK_IOMMU_H_
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <soc/mediatek/smi.h>
+
+#include "io-pgtable.h"
+
+struct mtk_iommu_suspend_reg {
+	u32				standard_axi_mode;
+	u32				dcm_dis;
+	u32				ctrl_reg;
+	u32				int_control0;
+	u32				int_main_control;
+};
+
+struct mtk_iommu_client_priv {
+	struct list_head		client;
+	unsigned int			mtk_m4u_id;
+	struct device			*m4udev;
+};
+
+struct mtk_iommu_domain;
+
+struct mtk_iommu_data {
+	void __iomem			*base;
+	int				irq;
+	struct device			*dev;
+	struct clk			*bclk;
+	phys_addr_t			protect_base; /* protect memory base */
+	struct mtk_iommu_suspend_reg	reg;
+	struct mtk_iommu_domain		*m4u_dom;
+	struct iommu_group		*m4u_group;
+	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
+	bool                            enable_4GB;
+};
+
+static inline int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static inline int mtk_iommu_bind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	return component_bind_all(dev, &data->smi_imu);
+}
+
+static inline void mtk_iommu_unbind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	component_unbind_all(dev, &data->smi_imu);
+}
+
+#endif
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
new file mode 100644
index 000000000000..b8aeb0768483
--- /dev/null
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Honghui Zhang <honghui.zhang@mediatek.com>
+ *
+ * Based on driver/iommu/mtk_iommu.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/bootmem.h>
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/kmemleak.h>
+#include <linux/list.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/barrier.h>
+#include <asm/dma-iommu.h>
+#include <linux/module.h>
+#include <dt-bindings/memory/mt2701-larb-port.h>
+#include <soc/mediatek/smi.h>
+#include "mtk_iommu.h"
+
+#define REG_MMU_PT_BASE_ADDR			0x000
+
+#define F_ALL_INVLD				0x2
+#define F_MMU_INV_RANGE				0x1
+#define F_INVLD_EN0				BIT(0)
+#define F_INVLD_EN1				BIT(1)
+
+#define F_MMU_FAULT_VA_MSK			0xfffff000
+#define MTK_PROTECT_PA_ALIGN			128
+
+#define REG_MMU_CTRL_REG			0x210
+#define F_MMU_CTRL_COHERENT_EN			BIT(8)
+#define REG_MMU_IVRP_PADDR			0x214
+#define REG_MMU_INT_CONTROL			0x220
+#define F_INT_TRANSLATION_FAULT			BIT(0)
+#define F_INT_MAIN_MULTI_HIT_FAULT		BIT(1)
+#define F_INT_INVALID_PA_FAULT			BIT(2)
+#define F_INT_ENTRY_REPLACEMENT_FAULT		BIT(3)
+#define F_INT_TABLE_WALK_FAULT			BIT(4)
+#define F_INT_TLB_MISS_FAULT			BIT(5)
+#define F_INT_PFH_DMA_FIFO_OVERFLOW		BIT(6)
+#define F_INT_MISS_DMA_FIFO_OVERFLOW		BIT(7)
+
+#define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
+#define F_INT_CLR_BIT				BIT(12)
+
+#define REG_MMU_FAULT_ST			0x224
+#define REG_MMU_FAULT_VA			0x228
+#define REG_MMU_INVLD_PA			0x22C
+#define REG_MMU_INT_ID				0x388
+#define REG_MMU_INVALIDATE			0x5c0
+#define REG_MMU_INVLD_START_A			0x5c4
+#define REG_MMU_INVLD_END_A			0x5c8
+
+#define REG_MMU_INV_SEL				0x5d8
+#define REG_MMU_STANDARD_AXI_MODE		0x5e8
+
+#define REG_MMU_DCM				0x5f0
+#define F_MMU_DCM_ON				BIT(1)
+#define REG_MMU_CPE_DONE			0x60c
+#define F_DESC_VALID				0x2
+#define F_DESC_NONSEC				BIT(3)
+#define MT2701_M4U_TF_LARB(TF)			(6 - (((TF) >> 13) & 0x7))
+#define MT2701_M4U_TF_PORT(TF)			(((TF) >> 8) & 0xF)
+/* MTK generation one iommu HW only support 4K size mapping */
+#define MT2701_IOMMU_PAGE_SHIFT			12
+#define MT2701_IOMMU_PAGE_SIZE			(1UL << MT2701_IOMMU_PAGE_SHIFT)
+
+/*
+ * MTK m4u support 4GB iova address space, and only support 4K page
+ * mapping. So the pagetable size should be exactly as 4M.
+ */
+#define M2701_IOMMU_PGT_SIZE			SZ_4M
+
+struct mtk_iommu_domain {
+	spinlock_t			pgtlock; /* lock for page table */
+	struct iommu_domain		domain;
+	u32				*pgt_va;
+	dma_addr_t			pgt_pa;
+	struct mtk_iommu_data		*data;
+};
+
+static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct mtk_iommu_domain, domain);
+}
+
+static const int mt2701_m4u_in_larb[] = {
+	LARB0_PORT_OFFSET, LARB1_PORT_OFFSET,
+	LARB2_PORT_OFFSET, LARB3_PORT_OFFSET
+};
+
+static inline int mt2701_m4u_to_larb(int id)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(mt2701_m4u_in_larb) - 1; i >= 0; i--)
+		if ((id) >= mt2701_m4u_in_larb[i])
+			return i;
+
+	return 0;
+}
+
+static inline int mt2701_m4u_to_port(int id)
+{
+	int larb = mt2701_m4u_to_larb(id);
+
+	return id - mt2701_m4u_in_larb[larb];
+}
+
+static void mtk_iommu_tlb_flush_all(struct mtk_iommu_data *data)
+{
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
+			data->base + REG_MMU_INV_SEL);
+	writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
+	wmb(); /* Make sure the tlb flush all done */
+}
+
+static void mtk_iommu_tlb_flush_range(struct mtk_iommu_data *data,
+				unsigned long iova, size_t size)
+{
+	int ret;
+	u32 tmp;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
+		data->base + REG_MMU_INV_SEL);
+	writel_relaxed(iova & F_MMU_FAULT_VA_MSK,
+		data->base + REG_MMU_INVLD_START_A);
+	writel_relaxed((iova + size - 1) & F_MMU_FAULT_VA_MSK,
+		data->base + REG_MMU_INVLD_END_A);
+	writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
+
+	ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
+				tmp, tmp != 0, 10, 100000);
+	if (ret) {
+		dev_warn(data->dev,
+			 "Partial TLB flush timed out, falling back to full flush\n");
+		mtk_iommu_tlb_flush_all(data);
+	}
+	/* Clear the CPE status */
+	writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
+}
+
+static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
+{
+	struct mtk_iommu_data *data = dev_id;
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+	u32 int_state, regval, fault_iova, fault_pa;
+	unsigned int fault_larb, fault_port;
+
+	/* Read error information from registers */
+	int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST);
+	fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+
+	fault_iova &= F_MMU_FAULT_VA_MSK;
+	fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
+	regval = readl_relaxed(data->base + REG_MMU_INT_ID);
+	fault_larb = MT2701_M4U_TF_LARB(regval);
+	fault_port = MT2701_M4U_TF_PORT(regval);
+
+	/*
+	 * MTK v1 iommu HW could not determine whether the fault is read or
+	 * write fault, report as read fault.
+	 */
+	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
+			IOMMU_FAULT_READ))
+		dev_err_ratelimited(data->dev,
+			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d\n",
+			int_state, fault_iova, fault_pa,
+			fault_larb, fault_port);
+
+	/* Interrupt clear */
+	regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL);
+	regval |= F_INT_CLR_BIT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL);
+
+	mtk_iommu_tlb_flush_all(data);
+
+	return IRQ_HANDLED;
+}
+
+static void mtk_iommu_config(struct mtk_iommu_data *data,
+			     struct device *dev, bool enable)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+	struct mtk_smi_larb_iommu    *larb_mmu;
+	unsigned int                 larbid, portid;
+
+	head = dev->archdata.iommu;
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		larbid = mt2701_m4u_to_larb(cur->mtk_m4u_id);
+		portid = mt2701_m4u_to_port(cur->mtk_m4u_id);
+		larb_mmu = &data->smi_imu.larb_imu[larbid];
+
+		dev_dbg(dev, "%s iommu port: %d\n",
+			enable ? "enable" : "disable", portid);
+
+		if (enable)
+			larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
+		else
+			larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid);
+	}
+}
+
+static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
+{
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+
+	spin_lock_init(&dom->pgtlock);
+
+	dom->pgt_va = dma_zalloc_coherent(data->dev,
+				M2701_IOMMU_PGT_SIZE,
+				&dom->pgt_pa, GFP_KERNEL);
+	if (!dom->pgt_va)
+		return -ENOMEM;
+
+	writel(dom->pgt_pa, data->base + REG_MMU_PT_BASE_ADDR);
+
+	dom->data = data;
+
+	return 0;
+}
+
+static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+{
+	struct mtk_iommu_domain *dom;
+
+	if (type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return NULL;
+
+	return &dom->domain;
+}
+
+static void mtk_iommu_domain_free(struct iommu_domain *domain)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_data *data = dom->data;
+
+	dma_free_coherent(data->dev, M2701_IOMMU_PGT_SIZE,
+			dom->pgt_va, dom->pgt_pa);
+	kfree(to_mtk_domain(domain));
+}
+
+static int mtk_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+	int ret;
+
+	if (!priv)
+		return -ENODEV;
+
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_dom) {
+		data->m4u_dom = dom;
+		ret = mtk_iommu_domain_finalise(data);
+		if (ret) {
+			data->m4u_dom = NULL;
+			return ret;
+		}
+	}
+
+	mtk_iommu_config(data, dev, true);
+	return 0;
+}
+
+static void mtk_iommu_detach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+
+	if (!priv)
+		return;
+
+	data = dev_get_drvdata(priv->m4udev);
+	mtk_iommu_config(data, dev, false);
+}
+
+static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
+	unsigned long flags;
+	unsigned int i;
+	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
+	u32 pabase = (u32)paddr;
+	int map_size = 0;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	for (i = 0; i < page_num; i++) {
+		if (pgt_base_iova[i]) {
+			memset(pgt_base_iova, 0, i * sizeof(u32));
+			break;
+		}
+		pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC;
+		pabase += MT2701_IOMMU_PAGE_SIZE;
+		map_size += MT2701_IOMMU_PAGE_SIZE;
+	}
+
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	mtk_iommu_tlb_flush_range(dom->data, iova, size);
+
+	return map_size == size ? 0 : -EEXIST;
+}
+
+static size_t mtk_iommu_unmap(struct iommu_domain *domain,
+			      unsigned long iova, size_t size)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
+	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	memset(pgt_base_iova, 0, page_num * sizeof(u32));
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	mtk_iommu_tlb_flush_range(dom->data, iova, size);
+
+	return size;
+}
+
+static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	phys_addr_t pa;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	pa = *(dom->pgt_va + (iova >> MT2701_IOMMU_PAGE_SHIFT));
+	pa = pa & (~(MT2701_IOMMU_PAGE_SIZE - 1));
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return pa;
+}
+
+/*
+ * MTK generation one iommu HW only support one iommu domain, and all the client
+ * sharing the same iova address space.
+ */
+static int mtk_iommu_create_mapping(struct device *dev,
+				    struct of_phandle_args *args)
+{
+	struct mtk_iommu_client_priv *head, *priv, *next;
+	struct platform_device *m4updev;
+	struct dma_iommu_mapping *mtk_mapping;
+	struct device *m4udev;
+	int ret;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n",
+			args->args_count);
+		return -EINVAL;
+	}
+
+	if (!dev->archdata.iommu) {
+		/* Get the m4u device */
+		m4updev = of_find_device_by_node(args->np);
+		if (WARN_ON(!m4updev))
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (!head)
+			return -ENOMEM;
+
+		dev->archdata.iommu = head;
+		INIT_LIST_HEAD(&head->client);
+		head->m4udev = &m4updev->dev;
+	} else {
+		head = dev->archdata.iommu;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto err_free_mem;
+	}
+	priv->mtk_m4u_id = args->args[0];
+	list_add_tail(&priv->client, &head->client);
+
+	m4udev = head->m4udev;
+	mtk_mapping = m4udev->archdata.iommu;
+	if (!mtk_mapping) {
+		/* MTK iommu support 4GB iova address space. */
+		mtk_mapping = arm_iommu_create_mapping(&platform_bus_type,
+						0, 1ULL << 32);
+		if (IS_ERR(mtk_mapping)) {
+			ret = PTR_ERR(mtk_mapping);
+			goto err_free_mem;
+		}
+		m4udev->archdata.iommu = mtk_mapping;
+	}
+
+	ret = arm_iommu_attach_device(dev, mtk_mapping);
+	if (ret)
+		goto err_release_mapping;
+
+	return 0;
+
+err_release_mapping:
+	arm_iommu_release_mapping(mtk_mapping);
+	m4udev->archdata.iommu = NULL;
+err_free_mem:
+	list_for_each_entry_safe(priv, next, &head->client, client)
+		kfree(priv);
+	kfree(head);
+	dev->archdata.iommu = NULL;
+	return ret;
+}
+
+static int mtk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	struct of_phandle_args iommu_spec;
+	struct of_phandle_iterator it;
+	int err;
+
+	of_for_each_phandle(&it, err, dev->of_node, "iommus",
+			"#iommu-cells", 0) {
+		int count = of_phandle_iterator_args(&it, iommu_spec.args,
+					MAX_PHANDLE_ARGS);
+		iommu_spec.np = of_node_get(it.node);
+		iommu_spec.args_count = count;
+
+		mtk_iommu_create_mapping(dev, &iommu_spec);
+		of_node_put(iommu_spec.np);
+	}
+
+	if (!dev->archdata.iommu) /* Not a iommu client device */
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+	return 0;
+}
+
+static void mtk_iommu_remove_device(struct device *dev)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+
+	head = dev->archdata.iommu;
+	if (!head)
+		return;
+
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		list_del(&cur->client);
+		kfree(cur);
+	}
+	kfree(head);
+	dev->archdata.iommu = NULL;
+
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+{
+	struct mtk_iommu_data *data;
+	struct mtk_iommu_client_priv *priv;
+
+	priv = dev->archdata.iommu;
+	if (!priv)
+		return ERR_PTR(-ENODEV);
+
+	/* All the client devices are in the same m4u iommu-group */
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_group) {
+		data->m4u_group = iommu_group_alloc();
+		if (IS_ERR(data->m4u_group))
+			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+	}
+	return data->m4u_group;
+}
+
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
+{
+	u32 regval;
+	int ret;
+
+	ret = clk_prepare_enable(data->bclk);
+	if (ret) {
+		dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
+		return ret;
+	}
+
+	regval = F_MMU_CTRL_COHERENT_EN | F_MMU_TF_PROTECT_SEL(2);
+	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
+
+	regval = F_INT_TRANSLATION_FAULT |
+		F_INT_MAIN_MULTI_HIT_FAULT |
+		F_INT_INVALID_PA_FAULT |
+		F_INT_ENTRY_REPLACEMENT_FAULT |
+		F_INT_TABLE_WALK_FAULT |
+		F_INT_TLB_MISS_FAULT |
+		F_INT_PFH_DMA_FIFO_OVERFLOW |
+		F_INT_MISS_DMA_FIFO_OVERFLOW;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL);
+
+	/* protect memory,hw will write here while translation fault */
+	writel_relaxed(data->protect_base,
+			data->base + REG_MMU_IVRP_PADDR);
+
+	writel_relaxed(F_MMU_DCM_ON, data->base + REG_MMU_DCM);
+
+	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
+			     dev_name(data->dev), (void *)data)) {
+		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
+		clk_disable_unprepare(data->bclk);
+		dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static struct iommu_ops mtk_iommu_ops = {
+	.domain_alloc	= mtk_iommu_domain_alloc,
+	.domain_free	= mtk_iommu_domain_free,
+	.attach_dev	= mtk_iommu_attach_device,
+	.detach_dev	= mtk_iommu_detach_device,
+	.map		= mtk_iommu_map,
+	.unmap		= mtk_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
+	.iova_to_phys	= mtk_iommu_iova_to_phys,
+	.add_device	= mtk_iommu_add_device,
+	.remove_device	= mtk_iommu_remove_device,
+	.device_group	= mtk_iommu_device_group,
+	.pgsize_bitmap	= ~0UL << MT2701_IOMMU_PAGE_SHIFT,
+};
+
+static const struct of_device_id mtk_iommu_of_ids[] = {
+	{ .compatible = "mediatek,mt2701-m4u", },
+	{}
+};
+
+static const struct component_master_ops mtk_iommu_com_ops = {
+	.bind		= mtk_iommu_bind,
+	.unbind		= mtk_iommu_unbind,
+};
+
+static int mtk_iommu_probe(struct platform_device *pdev)
+{
+	struct mtk_iommu_data		*data;
+	struct device			*dev = &pdev->dev;
+	struct resource			*res;
+	struct component_match		*match = NULL;
+	struct of_phandle_args		larb_spec;
+	struct of_phandle_iterator	it;
+	void				*protect;
+	int				larb_nr, ret, err;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->dev = dev;
+
+	/* Protect memory. HW will access here while translation fault.*/
+	protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2,
+			GFP_KERNEL | GFP_DMA);
+	if (!protect)
+		return -ENOMEM;
+	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->base))
+		return PTR_ERR(data->base);
+
+	data->irq = platform_get_irq(pdev, 0);
+	if (data->irq < 0)
+		return data->irq;
+
+	data->bclk = devm_clk_get(dev, "bclk");
+	if (IS_ERR(data->bclk))
+		return PTR_ERR(data->bclk);
+
+	larb_nr = 0;
+	of_for_each_phandle(&it, err, dev->of_node,
+			"mediatek,larbs", NULL, 0) {
+		struct platform_device *plarbdev;
+		int count = of_phandle_iterator_args(&it, larb_spec.args,
+					MAX_PHANDLE_ARGS);
+
+		if (count)
+			continue;
+
+		larb_spec.np = of_node_get(it.node);
+		if (!of_device_is_available(larb_spec.np))
+			continue;
+
+		plarbdev = of_find_device_by_node(larb_spec.np);
+		of_node_put(larb_spec.np);
+		if (!plarbdev) {
+			plarbdev = of_platform_device_create(
+						larb_spec.np, NULL,
+						platform_bus_type.dev_root);
+			if (!plarbdev)
+				return -EPROBE_DEFER;
+		}
+
+		data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev;
+		component_match_add(dev, &match, compare_of, larb_spec.np);
+		larb_nr++;
+	}
+
+	data->smi_imu.larb_nr = larb_nr;
+
+	platform_set_drvdata(pdev, data);
+
+	ret = mtk_iommu_hw_init(data);
+	if (ret)
+		return ret;
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type,  &mtk_iommu_ops);
+
+	return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+}
+
+static int mtk_iommu_remove(struct platform_device *pdev)
+{
+	struct mtk_iommu_data *data = platform_get_drvdata(pdev);
+
+	if (iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, NULL);
+
+	clk_disable_unprepare(data->bclk);
+	devm_free_irq(&pdev->dev, data->irq, data);
+	component_master_del(&pdev->dev, &mtk_iommu_com_ops);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	reg->standard_axi_mode = readl_relaxed(base +
+					       REG_MMU_STANDARD_AXI_MODE);
+	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM);
+	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
+	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_resume(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	writel_relaxed(data->m4u_dom->pgt_pa, base + REG_MMU_PT_BASE_ADDR);
+	writel_relaxed(reg->standard_axi_mode,
+		       base + REG_MMU_STANDARD_AXI_MODE);
+	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM);
+	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
+	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL);
+	writel_relaxed(data->protect_base, base + REG_MMU_IVRP_PADDR);
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_iommu_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
+};
+
+static struct platform_driver mtk_iommu_driver = {
+	.probe	= mtk_iommu_probe,
+	.remove	= mtk_iommu_remove,
+	.driver	= {
+		.name = "mtk-iommu",
+		.of_match_table = mtk_iommu_of_ids,
+		.pm = &mtk_iommu_pm_ops,
+	}
+};
+
+static int __init m4u_init(void)
+{
+	return platform_driver_register(&mtk_iommu_driver);
+}
+
+static void __exit m4u_exit(void)
+{
+	return platform_driver_unregister(&mtk_iommu_driver);
+}
+
+subsys_initcall(m4u_init);
+module_exit(m4u_exit);
+
+MODULE_DESCRIPTION("IOMMU API for MTK architected m4u v1 implementations");
+MODULE_AUTHOR("Honghui Zhang <honghui.zhang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 5fea665af99d..57f23eaaa2f9 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -98,12 +98,12 @@ EXPORT_SYMBOL_GPL(of_get_dma_window);
 struct of_iommu_node {
 	struct list_head list;
 	struct device_node *np;
-	struct iommu_ops *ops;
+	const struct iommu_ops *ops;
 };
 static LIST_HEAD(of_iommu_list);
 static DEFINE_SPINLOCK(of_iommu_lock);
 
-void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
+void of_iommu_set_ops(struct device_node *np, const struct iommu_ops *ops)
 {
 	struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 
@@ -119,10 +119,10 @@ void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
 	spin_unlock(&of_iommu_lock);
 }
 
-struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 {
 	struct of_iommu_node *node;
-	struct iommu_ops *ops = NULL;
+	const struct iommu_ops *ops = NULL;
 
 	spin_lock(&of_iommu_lock);
 	list_for_each_entry(node, &of_iommu_list, list)
@@ -134,12 +134,12 @@ struct iommu_ops *of_iommu_get_ops(struct device_node *np)
 	return ops;
 }
 
-struct iommu_ops *of_iommu_configure(struct device *dev,
-				     struct device_node *master_np)
+const struct iommu_ops *of_iommu_configure(struct device *dev,
+					   struct device_node *master_np)
 {
 	struct of_phandle_args iommu_spec;
 	struct device_node *np;
-	struct iommu_ops *ops = NULL;
+	const struct iommu_ops *ops = NULL;
 	int idx = 0;
 
 	/*
@@ -174,7 +174,7 @@ err_put_node:
 	return NULL;
 }
 
-void __init of_iommu_init(void)
+static int __init of_iommu_init(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match, *matches = &__iommu_of_table;
@@ -186,4 +186,7 @@ void __init of_iommu_init(void)
 			pr_err("Failed to initialise IOMMU %s\n",
 				of_node_full_name(np));
 	}
+
+	return 0;
 }
+postcore_initcall_sync(of_iommu_init);
diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c
index 9bc20e2119a3..505548aafeff 100644
--- a/drivers/iommu/omap-iommu-debug.c
+++ b/drivers/iommu/omap-iommu-debug.c
@@ -136,7 +136,7 @@ static ssize_t iotlb_dump_cr(struct omap_iommu *obj, struct cr_regs *cr,
 			     struct seq_file *s)
 {
 	seq_printf(s, "%08x %08x %01x\n", cr->cam, cr->ram,
-			  (cr->cam & MMU_CAM_P) ? 1 : 0);
+		   (cr->cam & MMU_CAM_P) ? 1 : 0);
 	return 0;
 }
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 3dc5b65f3990..e2583cce2cc1 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -628,10 +628,12 @@ iopgtable_store_entry_core(struct omap_iommu *obj, struct iotlb_entry *e)
 		break;
 	default:
 		fn = NULL;
-		BUG();
 		break;
 	}
 
+	if (WARN_ON(!fn))
+		return -EINVAL;
+
 	prot = get_iopte_attr(e);
 
 	spin_lock(&obj->page_table_lock);
@@ -987,7 +989,6 @@ static int omap_iommu_remove(struct platform_device *pdev)
 {
 	struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-	iopgtable_clear_entry_all(obj);
 	omap_iommu_debugfs_remove(obj);
 
 	pm_runtime_disable(obj->dev);
@@ -1161,7 +1162,8 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
 	 * should never fail, but please keep this around to ensure
 	 * we keep the hardware happy
 	 */
-	BUG_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE));
+	if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE)))
+		goto fail_align;
 
 	clean_dcache_area(omap_domain->pgtable, IOPGD_TABLE_SIZE);
 	spin_lock_init(&omap_domain->lock);
@@ -1172,6 +1174,8 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
 
 	return &omap_domain->domain;
 
+fail_align:
+	kfree(omap_domain->pgtable);
 fail_nomem:
 	kfree(omap_domain);
 out:
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 5710a06c3049..9afcbf79f0b0 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,11 +4,10 @@
  * published by the Free Software Foundation.
  */
 
-#include <asm/cacheflush.h>
-#include <asm/pgtable.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/dma-iommu.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -77,7 +76,9 @@
 
 struct rk_iommu_domain {
 	struct list_head iommus;
+	struct platform_device *pdev;
 	u32 *dt; /* page directory table */
+	dma_addr_t dt_dma;
 	spinlock_t iommus_lock; /* lock for iommus list */
 	spinlock_t dt_lock; /* lock for modifying page directory table */
 
@@ -93,14 +94,12 @@ struct rk_iommu {
 	struct iommu_domain *domain; /* domain to which iommu is attached */
 };
 
-static inline void rk_table_flush(u32 *va, unsigned int count)
+static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
+				  unsigned int count)
 {
-	phys_addr_t pa_start = virt_to_phys(va);
-	phys_addr_t pa_end = virt_to_phys(va + count);
-	size_t size = pa_end - pa_start;
+	size_t size = count * sizeof(u32); /* count of u32 entry */
 
-	__cpuc_flush_dcache_area(va, size);
-	outer_flush_range(pa_start, pa_end);
+	dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE);
 }
 
 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -183,10 +182,9 @@ static inline bool rk_dte_is_pt_valid(u32 dte)
 	return dte & RK_DTE_PT_VALID;
 }
 
-static u32 rk_mk_dte(u32 *pt)
+static inline u32 rk_mk_dte(dma_addr_t pt_dma)
 {
-	phys_addr_t pt_phys = virt_to_phys(pt);
-	return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
+	return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
 }
 
 /*
@@ -603,13 +601,16 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 				  dma_addr_t iova)
 {
+	struct device *dev = &rk_domain->pdev->dev;
 	u32 *page_table, *dte_addr;
-	u32 dte;
+	u32 dte_index, dte;
 	phys_addr_t pt_phys;
+	dma_addr_t pt_dma;
 
 	assert_spin_locked(&rk_domain->dt_lock);
 
-	dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)];
+	dte_index = rk_iova_dte_index(iova);
+	dte_addr = &rk_domain->dt[dte_index];
 	dte = *dte_addr;
 	if (rk_dte_is_pt_valid(dte))
 		goto done;
@@ -618,19 +619,27 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 	if (!page_table)
 		return ERR_PTR(-ENOMEM);
 
-	dte = rk_mk_dte(page_table);
-	*dte_addr = dte;
+	pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pt_dma)) {
+		dev_err(dev, "DMA mapping error while allocating page table\n");
+		free_page((unsigned long)page_table);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	rk_table_flush(page_table, NUM_PT_ENTRIES);
-	rk_table_flush(dte_addr, 1);
+	dte = rk_mk_dte(pt_dma);
+	*dte_addr = dte;
 
+	rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES);
+	rk_table_flush(rk_domain,
+		       rk_domain->dt_dma + dte_index * sizeof(u32), 1);
 done:
 	pt_phys = rk_dte_pt_address(dte);
 	return (u32 *)phys_to_virt(pt_phys);
 }
 
 static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
-				  u32 *pte_addr, dma_addr_t iova, size_t size)
+				  u32 *pte_addr, dma_addr_t pte_dma,
+				  size_t size)
 {
 	unsigned int pte_count;
 	unsigned int pte_total = size / SPAGE_SIZE;
@@ -645,14 +654,14 @@ static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
 		pte_addr[pte_count] = rk_mk_pte_invalid(pte);
 	}
 
-	rk_table_flush(pte_addr, pte_count);
+	rk_table_flush(rk_domain, pte_dma, pte_count);
 
 	return pte_count * SPAGE_SIZE;
 }
 
 static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
-			     dma_addr_t iova, phys_addr_t paddr, size_t size,
-			     int prot)
+			     dma_addr_t pte_dma, dma_addr_t iova,
+			     phys_addr_t paddr, size_t size, int prot)
 {
 	unsigned int pte_count;
 	unsigned int pte_total = size / SPAGE_SIZE;
@@ -671,7 +680,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
 		paddr += SPAGE_SIZE;
 	}
 
-	rk_table_flush(pte_addr, pte_count);
+	rk_table_flush(rk_domain, pte_dma, pte_total);
 
 	/*
 	 * Zap the first and last iova to evict from iotlb any previously
@@ -684,7 +693,8 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
 	return 0;
 unwind:
 	/* Unmap the range of iovas that we just mapped */
-	rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE);
+	rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma,
+			    pte_count * SPAGE_SIZE);
 
 	iova += pte_count * SPAGE_SIZE;
 	page_phys = rk_pte_page_address(pte_addr[pte_count]);
@@ -699,8 +709,9 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	dma_addr_t iova = (dma_addr_t)_iova;
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
 	u32 *page_table, *pte_addr;
+	u32 dte_index, pte_index;
 	int ret;
 
 	spin_lock_irqsave(&rk_domain->dt_lock, flags);
@@ -718,8 +729,13 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 		return PTR_ERR(page_table);
 	}
 
-	pte_addr = &page_table[rk_iova_pte_index(iova)];
-	ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot);
+	dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
+	pte_index = rk_iova_pte_index(iova);
+	pte_addr = &page_table[pte_index];
+	pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32);
+	ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
+				paddr, size, prot);
+
 	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
 
 	return ret;
@@ -730,7 +746,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	dma_addr_t iova = (dma_addr_t)_iova;
+	dma_addr_t pte_dma, iova = (dma_addr_t)_iova;
 	phys_addr_t pt_phys;
 	u32 dte;
 	u32 *pte_addr;
@@ -754,7 +770,8 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 
 	pt_phys = rk_dte_pt_address(dte);
 	pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
-	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size);
+	pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
+	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);
 
 	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
 
@@ -787,7 +804,6 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
 	int ret, i;
-	phys_addr_t dte_addr;
 
 	/*
 	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
@@ -807,15 +823,15 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 
 	iommu->domain = domain;
 
-	ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq,
+	ret = devm_request_irq(iommu->dev, iommu->irq, rk_iommu_irq,
 			       IRQF_SHARED, dev_name(dev), iommu);
 	if (ret)
 		return ret;
 
-	dte_addr = virt_to_phys(rk_domain->dt);
 	for (i = 0; i < iommu->num_mmu; i++) {
-		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
-		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
+			       rk_domain->dt_dma);
+		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
 	}
 
@@ -860,7 +876,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	}
 	rk_iommu_disable_stall(iommu);
 
-	devm_free_irq(dev, iommu->irq, iommu);
+	devm_free_irq(iommu->dev, iommu->irq, iommu);
 
 	iommu->domain = NULL;
 
@@ -870,14 +886,30 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 {
 	struct rk_iommu_domain *rk_domain;
+	struct platform_device *pdev;
+	struct device *iommu_dev;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 
-	rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
-	if (!rk_domain)
+	/* Register a pdev per domain, so DMA API can base on this *dev
+	 * even some virtual master doesn't have an iommu slave
+	 */
+	pdev = platform_device_register_simple("rk_iommu_domain",
+					       PLATFORM_DEVID_AUTO, NULL, 0);
+	if (IS_ERR(pdev))
 		return NULL;
 
+	rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL);
+	if (!rk_domain)
+		goto err_unreg_pdev;
+
+	rk_domain->pdev = pdev;
+
+	if (type == IOMMU_DOMAIN_DMA &&
+	    iommu_get_dma_cookie(&rk_domain->domain))
+		goto err_unreg_pdev;
+
 	/*
 	 * rk32xx iommus use a 2 level pagetable.
 	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
@@ -885,18 +917,36 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	 */
 	rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
 	if (!rk_domain->dt)
-		goto err_dt;
+		goto err_put_cookie;
+
+	iommu_dev = &pdev->dev;
+	rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt,
+					   SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) {
+		dev_err(iommu_dev, "DMA map error for DT\n");
+		goto err_free_dt;
+	}
 
-	rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES);
+	rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES);
 
 	spin_lock_init(&rk_domain->iommus_lock);
 	spin_lock_init(&rk_domain->dt_lock);
 	INIT_LIST_HEAD(&rk_domain->iommus);
 
+	rk_domain->domain.geometry.aperture_start = 0;
+	rk_domain->domain.geometry.aperture_end   = DMA_BIT_MASK(32);
+	rk_domain->domain.geometry.force_aperture = true;
+
 	return &rk_domain->domain;
 
-err_dt:
-	kfree(rk_domain);
+err_free_dt:
+	free_page((unsigned long)rk_domain->dt);
+err_put_cookie:
+	if (type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(&rk_domain->domain);
+err_unreg_pdev:
+	platform_device_unregister(pdev);
+
 	return NULL;
 }
 
@@ -912,12 +962,20 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
 		if (rk_dte_is_pt_valid(dte)) {
 			phys_addr_t pt_phys = rk_dte_pt_address(dte);
 			u32 *page_table = phys_to_virt(pt_phys);
+			dma_unmap_single(&rk_domain->pdev->dev, pt_phys,
+					 SPAGE_SIZE, DMA_TO_DEVICE);
 			free_page((unsigned long)page_table);
 		}
 	}
 
+	dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma,
+			 SPAGE_SIZE, DMA_TO_DEVICE);
 	free_page((unsigned long)rk_domain->dt);
-	kfree(rk_domain);
+
+	if (domain->type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(&rk_domain->domain);
+
+	platform_device_unregister(rk_domain->pdev);
 }
 
 static bool rk_iommu_is_dev_iommu_master(struct device *dev)
@@ -1022,17 +1080,43 @@ static const struct iommu_ops rk_iommu_ops = {
 	.detach_dev = rk_iommu_detach_device,
 	.map = rk_iommu_map,
 	.unmap = rk_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
 	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
 };
 
+static int rk_iommu_domain_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL);
+	if (!dev->dma_parms)
+		return -ENOMEM;
+
+	/* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
+	arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
+
+	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
+	dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+
+	return 0;
+}
+
+static struct platform_driver rk_iommu_domain_driver = {
+	.probe = rk_iommu_domain_probe,
+	.driver = {
+		   .name = "rk_iommu_domain",
+	},
+};
+
 static int rk_iommu_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct rk_iommu *iommu;
 	struct resource *res;
+	int num_res = pdev->num_resources;
 	int i;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
@@ -1042,13 +1126,16 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, iommu);
 	iommu->dev = dev;
 	iommu->num_mmu = 0;
-	iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * iommu->num_mmu,
+
+	iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * num_res,
 				    GFP_KERNEL);
 	if (!iommu->bases)
 		return -ENOMEM;
 
-	for (i = 0; i < pdev->num_resources; i++) {
+	for (i = 0; i < num_res; i++) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			continue;
 		iommu->bases[i] = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(iommu->bases[i]))
 			continue;
@@ -1101,11 +1188,19 @@ static int __init rk_iommu_init(void)
 	if (ret)
 		return ret;
 
-	return platform_driver_register(&rk_iommu_driver);
+	ret = platform_driver_register(&rk_iommu_domain_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&rk_iommu_driver);
+	if (ret)
+		platform_driver_unregister(&rk_iommu_domain_driver);
+	return ret;
 }
 static void __exit rk_iommu_exit(void)
 {
 	platform_driver_unregister(&rk_iommu_driver);
+	platform_driver_unregister(&rk_iommu_domain_driver);
 }
 
 subsys_initcall(rk_iommu_init);
author	Dmitry Torokhov <dmitry.torokhov@gmail.com>	2016-10-14 02:25:40 +0200
committer	Dmitry Torokhov <dmitry.torokhov@gmail.com>	2016-10-14 02:25:40 +0200
commit	4a7126a25b4dfd07d61c699f724118275acc0c25 (patch)
tree	c78b82bfaa96f330d412ad63e355906f963c3faf /drivers/iommu
parent	Input: i8042 - skip selftest on ASUS laptops (diff)
parent	Linux 4.8 (diff)
download	linux-4a7126a25b4dfd07d61c699f724118275acc0c25.tar.xz linux-4a7126a25b4dfd07d61c699f724118275acc0c25.zip