17 files changed, 1396 insertions, 476 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 340893727538..9f69b561f5db 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -13,6 +13,10 @@ menuconfig IOMMU_SUPPORT
 
 if IOMMU_SUPPORT
 
+config OF_IOMMU
+       def_bool y
+       depends on OF
+
 # MSM IOMMU support
 config MSM_IOMMU
 	bool "MSM IOMMU Support"
@@ -154,7 +158,7 @@ config TEGRA_IOMMU_GART
 
 config TEGRA_IOMMU_SMMU
 	bool "Tegra SMMU IOMMU Support"
-	depends on ARCH_TEGRA_3x_SOC
+	depends on ARCH_TEGRA_3x_SOC && TEGRA_AHB
 	select IOMMU_API
 	help
 	  Enables support for remapping discontiguous physical memory
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 76e54ef796de..14a4d5fc94fa 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
+obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 625626391f2d..b64502dfa9f4 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -256,11 +256,21 @@ static bool check_device(struct device *dev)
 	return true;
 }
 
+static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
+{
+	pci_dev_put(*from);
+	*from = to;
+}
+
+#define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
 static int iommu_init_device(struct device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev);
 	struct iommu_dev_data *dev_data;
+	struct iommu_group *group;
 	u16 alias;
+	int ret;
 
 	if (dev->archdata.iommu)
 		return 0;
@@ -281,8 +291,62 @@ static int iommu_init_device(struct device *dev)
 			return -ENOTSUPP;
 		}
 		dev_data->alias_data = alias_data;
+
+		dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
+	} else
+		dma_pdev = pci_dev_get(pdev);
+
+	/* Account for quirked devices */
+	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+	/*
+	 * If it's a multifunction device that does not support our
+	 * required ACS flags, add to the same group as function 0.
+	 */
+	if (dma_pdev->multifunction &&
+	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
+		swap_pci_ref(&dma_pdev,
+			     pci_get_slot(dma_pdev->bus,
+					  PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
+					  0)));
+
+	/*
+	 * Devices on the root bus go through the iommu.  If that's not us,
+	 * find the next upstream device and test ACS up to the root bus.
+	 * Finding the next device may require skipping virtual buses.
+	 */
+	while (!pci_is_root_bus(dma_pdev->bus)) {
+		struct pci_bus *bus = dma_pdev->bus;
+
+		while (!bus->self) {
+			if (!pci_is_root_bus(bus))
+				bus = bus->parent;
+			else
+				goto root_bus;
+		}
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
+	}
+
+root_bus:
+	group = iommu_group_get(&dma_pdev->dev);
+	pci_dev_put(dma_pdev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
 	}
 
+	ret = iommu_group_add_device(group, dev);
+
+	iommu_group_put(group);
+
+	if (ret)
+		return ret;
+
 	if (pci_iommuv2_capable(pdev)) {
 		struct amd_iommu *iommu;
 
@@ -311,6 +375,8 @@ static void iommu_ignore_device(struct device *dev)
 
 static void iommu_uninit_device(struct device *dev)
 {
+	iommu_group_remove_device(dev);
+
 	/*
 	 * Nothing to do here - we keep dev_data around for unplugged devices
 	 * and reuse it when the device is re-plugged - not doing so would
@@ -384,7 +450,6 @@ DECLARE_STATS_COUNTER(invalidate_iotlb);
 DECLARE_STATS_COUNTER(invalidate_iotlb_all);
 DECLARE_STATS_COUNTER(pri_requests);
 
-
 static struct dentry *stats_dir;
 static struct dentry *de_fflush;
 
@@ -2073,7 +2138,7 @@ out_err:
 /* FIXME: Move this to PCI code */
 #define PCI_PRI_TLP_OFF		(1 << 15)
 
-bool pci_pri_tlp_required(struct pci_dev *pdev)
+static bool pci_pri_tlp_required(struct pci_dev *pdev)
 {
 	u16 status;
 	int pos;
@@ -2254,6 +2319,18 @@ static int device_change_notifier(struct notifier_block *nb,
 
 		iommu_init_device(dev);
 
+		/*
+		 * dev_data is still NULL and
+		 * got initialized in iommu_init_device
+		 */
+		dev_data = get_dev_data(dev);
+
+		if (iommu_pass_through || dev_data->iommu_v2) {
+			dev_data->passthrough = true;
+			attach_device(dev, pt_domain);
+			break;
+		}
+
 		domain = domain_for_device(dev);
 
 		/* allocate a protection domain if a device is added */
@@ -2271,10 +2348,7 @@ static int device_change_notifier(struct notifier_block *nb,
 
 		dev_data = get_dev_data(dev);
 
-		if (!dev_data->passthrough)
-			dev->archdata.dma_ops = &amd_iommu_dma_ops;
-		else
-			dev->archdata.dma_ops = &nommu_dma_ops;
+		dev->archdata.dma_ops = &amd_iommu_dma_ops;
 
 		break;
 	case BUS_NOTIFY_DEL_DEVICE:
@@ -2972,6 +3046,11 @@ int __init amd_iommu_init_dma_ops(void)
 
 	amd_iommu_stats_init();
 
+	if (amd_iommu_unmap_flush)
+		pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
+	else
+		pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
+
 	return 0;
 
 free_domains:
@@ -3078,6 +3157,10 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
 
 	dom->priv = domain;
 
+	dom->geometry.aperture_start = 0;
+	dom->geometry.aperture_end   = ~0ULL;
+	dom->geometry.force_aperture = true;
+
 	return 0;
 
 out_free:
@@ -3236,26 +3319,6 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
-static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
-{
-	struct iommu_dev_data *dev_data = dev->archdata.iommu;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	u16 devid;
-
-	if (!dev_data)
-		return -ENODEV;
-
-	if (pdev->is_virtfn || !iommu_group_mf)
-		devid = dev_data->devid;
-	else
-		devid = calc_devid(pdev->bus->number,
-				   PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
-
-	*groupid = amd_iommu_alias_table[devid];
-
-	return 0;
-}
-
 static struct iommu_ops amd_iommu_ops = {
 	.domain_init = amd_iommu_domain_init,
 	.domain_destroy = amd_iommu_domain_destroy,
@@ -3265,7 +3328,6 @@ static struct iommu_ops amd_iommu_ops = {
 	.unmap = amd_iommu_unmap,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.domain_has_cap = amd_iommu_domain_has_cap,
-	.device_group = amd_iommu_device_group,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
 
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index a33612f3206f..18a89b760aaa 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -26,6 +26,8 @@
 #include <linux/msi.h>
 #include <linux/amd-iommu.h>
 #include <linux/export.h>
+#include <linux/acpi.h>
+#include <acpi/acpi.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -122,7 +124,7 @@ struct ivmd_header {
 
 bool amd_iommu_dump;
 
-static int __initdata amd_iommu_detected;
+static bool amd_iommu_detected;
 static bool __initdata amd_iommu_disabled;
 
 u16 amd_iommu_last_bdf;			/* largest PCI device id we have
@@ -149,11 +151,6 @@ bool amd_iommu_v2_present __read_mostly;
 bool amd_iommu_force_isolation __read_mostly;
 
 /*
- * The ACPI table parsing functions set this variable on an error
- */
-static int __initdata amd_iommu_init_err;
-
-/*
  * List of protection domains - used during resume
  */
 LIST_HEAD(amd_iommu_pd_list);
@@ -190,13 +187,23 @@ static u32 dev_table_size;	/* size of the device table */
 static u32 alias_table_size;	/* size of the alias table */
 static u32 rlookup_table_size;	/* size if the rlookup table */
 
-/*
- * This function flushes all internal caches of
- * the IOMMU used by this driver.
- */
-extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+enum iommu_init_state {
+	IOMMU_START_STATE,
+	IOMMU_IVRS_DETECTED,
+	IOMMU_ACPI_FINISHED,
+	IOMMU_ENABLED,
+	IOMMU_PCI_INIT,
+	IOMMU_INTERRUPTS_EN,
+	IOMMU_DMA_OPS,
+	IOMMU_INITIALIZED,
+	IOMMU_NOT_FOUND,
+	IOMMU_INIT_ERROR,
+};
+
+static enum iommu_init_state init_state = IOMMU_START_STATE;
 
 static int amd_iommu_enable_interrupts(void);
+static int __init iommu_go_to_state(enum iommu_init_state state);
 
 static inline void update_last_devid(u16 devid)
 {
@@ -321,23 +328,6 @@ static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
-	static const char * const feat_str[] = {
-		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
-		"IA", "GA", "HE", "PC", NULL
-	};
-	int i;
-
-	printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx",
-	       dev_name(&iommu->dev->dev), iommu->cap_ptr);
-
-	if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
-		printk(KERN_CONT " extended features: ");
-		for (i = 0; feat_str[i]; ++i)
-			if (iommu_feature(iommu, (1ULL << i)))
-				printk(KERN_CONT " %s", feat_str[i]);
-	}
-	printk(KERN_CONT "\n");
-
 	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
@@ -358,7 +348,7 @@ static void iommu_disable(struct amd_iommu *iommu)
  * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
  * the system has one.
  */
-static u8 * __init iommu_map_mmio_space(u64 address)
+static u8 __iomem * __init iommu_map_mmio_space(u64 address)
 {
 	if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
 		pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
@@ -367,7 +357,7 @@ static u8 * __init iommu_map_mmio_space(u64 address)
 		return NULL;
 	}
 
-	return ioremap_nocache(address, MMIO_REGION_LENGTH);
+	return (u8 __iomem *)ioremap_nocache(address, MMIO_REGION_LENGTH);
 }
 
 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
@@ -463,11 +453,9 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
 	 */
 	for (i = 0; i < table->length; ++i)
 		checksum += p[i];
-	if (checksum != 0) {
+	if (checksum != 0)
 		/* ACPI table corrupt */
-		amd_iommu_init_err = -ENODEV;
-		return 0;
-	}
+		return -ENODEV;
 
 	p += IVRS_HEADER_LENGTH;
 
@@ -726,90 +714,6 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
 }
 
 /*
- * This function reads some important data from the IOMMU PCI space and
- * initializes the driver data structure with it. It reads the hardware
- * capabilities and the first/last device entries
- */
-static void __init init_iommu_from_pci(struct amd_iommu *iommu)
-{
-	int cap_ptr = iommu->cap_ptr;
-	u32 range, misc, low, high;
-	int i, j;
-
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
-			      &iommu->cap);
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
-			      &range);
-	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
-			      &misc);
-
-	iommu->first_device = calc_devid(MMIO_GET_BUS(range),
-					 MMIO_GET_FD(range));
-	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
-					MMIO_GET_LD(range));
-	iommu->evt_msi_num = MMIO_MSI_NUM(misc);
-
-	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
-		amd_iommu_iotlb_sup = false;
-
-	/* read extended feature bits */
-	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
-	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
-
-	iommu->features = ((u64)high << 32) | low;
-
-	if (iommu_feature(iommu, FEATURE_GT)) {
-		int glxval;
-		u32 pasids;
-		u64 shift;
-
-		shift   = iommu->features & FEATURE_PASID_MASK;
-		shift >>= FEATURE_PASID_SHIFT;
-		pasids  = (1 << shift);
-
-		amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
-
-		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
-		glxval >>= FEATURE_GLXVAL_SHIFT;
-
-		if (amd_iommu_max_glx_val == -1)
-			amd_iommu_max_glx_val = glxval;
-		else
-			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
-	}
-
-	if (iommu_feature(iommu, FEATURE_GT) &&
-	    iommu_feature(iommu, FEATURE_PPR)) {
-		iommu->is_iommu_v2   = true;
-		amd_iommu_v2_present = true;
-	}
-
-	if (!is_rd890_iommu(iommu->dev))
-		return;
-
-	/*
-	 * Some rd890 systems may not be fully reconfigured by the BIOS, so
-	 * it's necessary for us to store this information so it can be
-	 * reprogrammed on resume
-	 */
-
-	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
-			      &iommu->stored_addr_lo);
-	pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
-			      &iommu->stored_addr_hi);
-
-	/* Low bit locks writes to configuration space */
-	iommu->stored_addr_lo &= ~1;
-
-	for (i = 0; i < 6; i++)
-		for (j = 0; j < 0x12; j++)
-			iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
-
-	for (i = 0; i < 0x83; i++)
-		iommu->stored_l2[i] = iommu_read_l2(iommu, i);
-}
-
-/*
  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
  * initializes the hardware and our data structures with it.
  */
@@ -1025,13 +929,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 	/*
 	 * Copy data from ACPI table entry to the iommu struct
 	 */
-	iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
-	if (!iommu->dev)
-		return 1;
-
-	iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
-						PCI_DEVFN(0, 0));
-
+	iommu->devid   = h->devid;
 	iommu->cap_ptr = h->cap_ptr;
 	iommu->pci_seg = h->pci_seg;
 	iommu->mmio_phys = h->mmio_phys;
@@ -1049,20 +947,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 
 	iommu->int_enabled = false;
 
-	init_iommu_from_pci(iommu);
 	init_iommu_from_acpi(iommu, h);
 	init_iommu_devices(iommu);
 
-	if (iommu_feature(iommu, FEATURE_PPR)) {
-		iommu->ppr_log = alloc_ppr_log(iommu);
-		if (!iommu->ppr_log)
-			return -ENOMEM;
-	}
-
-	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
-		amd_iommu_np_cache = true;
-
-	return pci_enable_device(iommu->dev);
+	return 0;
 }
 
 /*
@@ -1093,16 +981,12 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 				    h->mmio_phys);
 
 			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
-			if (iommu == NULL) {
-				amd_iommu_init_err = -ENOMEM;
-				return 0;
-			}
+			if (iommu == NULL)
+				return -ENOMEM;
 
 			ret = init_iommu_one(iommu, h);
-			if (ret) {
-				amd_iommu_init_err = ret;
-				return 0;
-			}
+			if (ret)
+				return ret;
 			break;
 		default:
 			break;
@@ -1115,6 +999,145 @@ static int __init init_iommu_all(struct acpi_table_header *table)
 	return 0;
 }
 
+static int iommu_init_pci(struct amd_iommu *iommu)
+{
+	int cap_ptr = iommu->cap_ptr;
+	u32 range, misc, low, high;
+
+	iommu->dev = pci_get_bus_and_slot(PCI_BUS(iommu->devid),
+					  iommu->devid & 0xff);
+	if (!iommu->dev)
+		return -ENODEV;
+
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
+			      &iommu->cap);
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
+			      &range);
+	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
+			      &misc);
+
+	iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+					 MMIO_GET_FD(range));
+	iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+					MMIO_GET_LD(range));
+
+	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
+		amd_iommu_iotlb_sup = false;
+
+	/* read extended feature bits */
+	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
+	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
+
+	iommu->features = ((u64)high << 32) | low;
+
+	if (iommu_feature(iommu, FEATURE_GT)) {
+		int glxval;
+		u32 pasids;
+		u64 shift;
+
+		shift   = iommu->features & FEATURE_PASID_MASK;
+		shift >>= FEATURE_PASID_SHIFT;
+		pasids  = (1 << shift);
+
+		amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
+
+		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
+		glxval >>= FEATURE_GLXVAL_SHIFT;
+
+		if (amd_iommu_max_glx_val == -1)
+			amd_iommu_max_glx_val = glxval;
+		else
+			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
+	}
+
+	if (iommu_feature(iommu, FEATURE_GT) &&
+	    iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->is_iommu_v2   = true;
+		amd_iommu_v2_present = true;
+	}
+
+	if (iommu_feature(iommu, FEATURE_PPR)) {
+		iommu->ppr_log = alloc_ppr_log(iommu);
+		if (!iommu->ppr_log)
+			return -ENOMEM;
+	}
+
+	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+		amd_iommu_np_cache = true;
+
+	if (is_rd890_iommu(iommu->dev)) {
+		int i, j;
+
+		iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
+				PCI_DEVFN(0, 0));
+
+		/*
+		 * Some rd890 systems may not be fully reconfigured by the
+		 * BIOS, so it's necessary for us to store this information so
+		 * it can be reprogrammed on resume
+		 */
+		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
+				&iommu->stored_addr_lo);
+		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
+				&iommu->stored_addr_hi);
+
+		/* Low bit locks writes to configuration space */
+		iommu->stored_addr_lo &= ~1;
+
+		for (i = 0; i < 6; i++)
+			for (j = 0; j < 0x12; j++)
+				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
+
+		for (i = 0; i < 0x83; i++)
+			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
+	}
+
+	return pci_enable_device(iommu->dev);
+}
+
+static void print_iommu_info(void)
+{
+	static const char * const feat_str[] = {
+		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
+		"IA", "GA", "HE", "PC"
+	};
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		int i;
+
+		pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
+			dev_name(&iommu->dev->dev), iommu->cap_ptr);
+
+		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
+			pr_info("AMD-Vi:  Extended features: ");
+			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+				if (iommu_feature(iommu, (1ULL << i)))
+					pr_cont(" %s", feat_str[i]);
+			}
+		}
+		pr_cont("\n");
+	}
+}
+
+static int __init amd_iommu_init_pci(void)
+{
+	struct amd_iommu *iommu;
+	int ret = 0;
+
+	for_each_iommu(iommu) {
+		ret = iommu_init_pci(iommu);
+		if (ret)
+			break;
+	}
+
+	ret = amd_iommu_init_devices();
+
+	print_iommu_info();
+
+	return ret;
+}
+
 /****************************************************************************
  *
  * The following functions initialize the MSI interrupts for all IOMMUs
@@ -1217,7 +1240,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 /* called for unity map ACPI definition */
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
-	struct unity_map_entry *e = 0;
+	struct unity_map_entry *e = NULL;
 	char *s;
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
@@ -1369,7 +1392,7 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void enable_iommus(void)
+static void early_enable_iommus(void)
 {
 	struct amd_iommu *iommu;
 
@@ -1379,14 +1402,29 @@ static void enable_iommus(void)
 		iommu_set_device_table(iommu);
 		iommu_enable_command_buffer(iommu);
 		iommu_enable_event_buffer(iommu);
-		iommu_enable_ppr_log(iommu);
-		iommu_enable_gt(iommu);
 		iommu_set_exclusion_range(iommu);
 		iommu_enable(iommu);
 		iommu_flush_all_caches(iommu);
 	}
 }
 
+static void enable_iommus_v2(void)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		iommu_enable_ppr_log(iommu);
+		iommu_enable_gt(iommu);
+	}
+}
+
+static void enable_iommus(void)
+{
+	early_enable_iommus();
+
+	enable_iommus_v2();
+}
+
 static void disable_iommus(void)
 {
 	struct amd_iommu *iommu;
@@ -1481,16 +1519,23 @@ static void __init free_on_init_error(void)
  * After everything is set up the IOMMUs are enabled and the necessary
  * hotplug and suspend notifiers are registered.
  */
-int __init amd_iommu_init_hardware(void)
+static int __init early_amd_iommu_init(void)
 {
+	struct acpi_table_header *ivrs_base;
+	acpi_size ivrs_size;
+	acpi_status status;
 	int i, ret = 0;
 
 	if (!amd_iommu_detected)
 		return -ENODEV;
 
-	if (amd_iommu_dev_table != NULL) {
-		/* Hardware already initialized */
-		return 0;
+	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
+	if (status == AE_NOT_FOUND)
+		return -ENODEV;
+	else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("AMD-Vi: IVRS table error: %s\n", err);
+		return -EINVAL;
 	}
 
 	/*
@@ -1498,10 +1543,7 @@ int __init amd_iommu_init_hardware(void)
 	 * we need to handle. Upon this information the shared data
 	 * structures for the IOMMUs in the system will be allocated
 	 */
-	if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
-		return -ENODEV;
-
-	ret = amd_iommu_init_err;
+	ret = find_last_devid_acpi(ivrs_base);
 	if (ret)
 		goto out;
 
@@ -1523,20 +1565,20 @@ int __init amd_iommu_init_hardware(void)
 	amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
 			get_order(alias_table_size));
 	if (amd_iommu_alias_table == NULL)
-		goto free;
+		goto out;
 
 	/* IOMMU rlookup table - find the IOMMU for a specific device */
 	amd_iommu_rlookup_table = (void *)__get_free_pages(
 			GFP_KERNEL | __GFP_ZERO,
 			get_order(rlookup_table_size));
 	if (amd_iommu_rlookup_table == NULL)
-		goto free;
+		goto out;
 
 	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
 					    GFP_KERNEL | __GFP_ZERO,
 					    get_order(MAX_DOMAIN_ID/8));
 	if (amd_iommu_pd_alloc_bitmap == NULL)
-		goto free;
+		goto out;
 
 	/* init the device table */
 	init_device_table();
@@ -1559,38 +1601,18 @@ int __init amd_iommu_init_hardware(void)
 	 * now the data structures are allocated and basically initialized
 	 * start the real acpi table scan
 	 */
-	ret = -ENODEV;
-	if (acpi_table_parse("IVRS", init_iommu_all) != 0)
-		goto free;
-
-	if (amd_iommu_init_err) {
-		ret = amd_iommu_init_err;
-		goto free;
-	}
-
-	if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
-		goto free;
-
-	if (amd_iommu_init_err) {
-		ret = amd_iommu_init_err;
-		goto free;
-	}
-
-	ret = amd_iommu_init_devices();
+	ret = init_iommu_all(ivrs_base);
 	if (ret)
-		goto free;
-
-	enable_iommus();
-
-	amd_iommu_init_notifier();
+		goto out;
 
-	register_syscore_ops(&amd_iommu_syscore_ops);
+	ret = init_memory_definitions(ivrs_base);
+	if (ret)
+		goto out;
 
 out:
-	return ret;
-
-free:
-	free_on_init_error();
+	/* Don't leak any ACPI memory */
+	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
+	ivrs_base = NULL;
 
 	return ret;
 }
@@ -1610,26 +1632,32 @@ out:
 	return ret;
 }
 
-/*
- * This is the core init function for AMD IOMMU hardware in the system.
- * This function is called from the generic x86 DMA layer initialization
- * code.
- *
- * The function calls amd_iommu_init_hardware() to setup and enable the
- * IOMMU hardware if this has not happened yet. After that the driver
- * registers for the DMA-API and for the IOMMU-API as necessary.
- */
-static int __init amd_iommu_init(void)
+static bool detect_ivrs(void)
 {
-	int ret = 0;
+	struct acpi_table_header *ivrs_base;
+	acpi_size ivrs_size;
+	acpi_status status;
 
-	ret = amd_iommu_init_hardware();
-	if (ret)
-		goto out;
+	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
+	if (status == AE_NOT_FOUND)
+		return false;
+	else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("AMD-Vi: IVRS table error: %s\n", err);
+		return false;
+	}
 
-	ret = amd_iommu_enable_interrupts();
-	if (ret)
-		goto free;
+	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
+
+	/* Make sure ACS will be enabled during PCI probe */
+	pci_request_acs();
+
+	return true;
+}
+
+static int amd_iommu_init_dma(void)
+{
+	int ret;
 
 	if (iommu_pass_through)
 		ret = amd_iommu_init_passthrough();
@@ -1637,29 +1665,108 @@ static int __init amd_iommu_init(void)
 		ret = amd_iommu_init_dma_ops();
 
 	if (ret)
-		goto free;
+		return ret;
 
 	amd_iommu_init_api();
 
-	x86_platform.iommu_shutdown = disable_iommus;
+	amd_iommu_init_notifier();
 
-	if (iommu_pass_through)
-		goto out;
+	return 0;
+}
 
-	if (amd_iommu_unmap_flush)
-		printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
-	else
-		printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
+/****************************************************************************
+ *
+ * AMD IOMMU Initialization State Machine
+ *
+ ****************************************************************************/
+
+static int __init state_next(void)
+{
+	int ret = 0;
+
+	switch (init_state) {
+	case IOMMU_START_STATE:
+		if (!detect_ivrs()) {
+			init_state	= IOMMU_NOT_FOUND;
+			ret		= -ENODEV;
+		} else {
+			init_state	= IOMMU_IVRS_DETECTED;
+		}
+		break;
+	case IOMMU_IVRS_DETECTED:
+		ret = early_amd_iommu_init();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
+		break;
+	case IOMMU_ACPI_FINISHED:
+		early_enable_iommus();
+		register_syscore_ops(&amd_iommu_syscore_ops);
+		x86_platform.iommu_shutdown = disable_iommus;
+		init_state = IOMMU_ENABLED;
+		break;
+	case IOMMU_ENABLED:
+		ret = amd_iommu_init_pci();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
+		enable_iommus_v2();
+		break;
+	case IOMMU_PCI_INIT:
+		ret = amd_iommu_enable_interrupts();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
+		break;
+	case IOMMU_INTERRUPTS_EN:
+		ret = amd_iommu_init_dma();
+		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
+		break;
+	case IOMMU_DMA_OPS:
+		init_state = IOMMU_INITIALIZED;
+		break;
+	case IOMMU_INITIALIZED:
+		/* Nothing to do */
+		break;
+	case IOMMU_NOT_FOUND:
+	case IOMMU_INIT_ERROR:
+		/* Error states => do nothing */
+		ret = -EINVAL;
+		break;
+	default:
+		/* Unknown state */
+		BUG();
+	}
 
-out:
 	return ret;
+}
 
-free:
-	disable_iommus();
+static int __init iommu_go_to_state(enum iommu_init_state state)
+{
+	int ret = 0;
+
+	while (init_state != state) {
+		ret = state_next();
+		if (init_state == IOMMU_NOT_FOUND ||
+		    init_state == IOMMU_INIT_ERROR)
+			break;
+	}
+
+	return ret;
+}
+
+
+
+/*
+ * This is the core init function for AMD IOMMU hardware in the system.
+ * This function is called from the generic x86 DMA layer initialization
+ * code.
+ */
+static int __init amd_iommu_init(void)
+{
+	int ret;
 
-	free_on_init_error();
+	ret = iommu_go_to_state(IOMMU_INITIALIZED);
+	if (ret) {
+		disable_iommus();
+		free_on_init_error();
+	}
 
-	goto out;
+	return ret;
 }
 
 /****************************************************************************
@@ -1669,29 +1776,25 @@ free:
  * IOMMUs
  *
  ****************************************************************************/
-static int __init early_amd_iommu_detect(struct acpi_table_header *table)
-{
-	return 0;
-}
-
 int __init amd_iommu_detect(void)
 {
+	int ret;
+
 	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
 		return -ENODEV;
 
 	if (amd_iommu_disabled)
 		return -ENODEV;
 
-	if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
-		iommu_detected = 1;
-		amd_iommu_detected = 1;
-		x86_init.iommu.iommu_init = amd_iommu_init;
+	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
+	if (ret)
+		return ret;
 
-		/* Make sure ACS will be enabled */
-		pci_request_acs();
-		return 1;
-	}
-	return -ENODEV;
+	amd_iommu_detected = true;
+	iommu_detected = 1;
+	x86_init.iommu.iommu_init = amd_iommu_init;
+
+	return 0;
 }
 
 /****************************************************************************
@@ -1727,8 +1830,8 @@ __setup("amd_iommu=", parse_amd_iommu_options);
 
 IOMMU_INIT_FINISH(amd_iommu_detect,
 		  gart_iommu_hole_init,
-		  0,
-		  0);
+		  NULL,
+		  NULL);
 
 bool amd_iommu_v2_supported(void)
 {
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c1b1d489817e..d0dab865a8b8 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -487,7 +487,7 @@ struct amd_iommu {
 	/* physical address of MMIO space */
 	u64 mmio_phys;
 	/* virtual address of MMIO space */
-	u8 *mmio_base;
+	u8 __iomem *mmio_base;
 
 	/* capabilities of that IOMMU read from ACPI */
 	u32 cap;
@@ -501,6 +501,9 @@ struct amd_iommu {
 	/* IOMMUv2 */
 	bool is_iommu_v2;
 
+	/* PCI device id of the IOMMU device */
+	u16 devid;
+
 	/*
 	 * Capability pointer. There could be more than one IOMMU per PCI
 	 * device function if there are more than one AMD IOMMU capability
@@ -530,8 +533,6 @@ struct amd_iommu {
 	u32 evt_buf_size;
 	/* event buffer virtual address */
 	u8 *evt_buf;
-	/* MSI number for event interrupt */
-	u16 evt_msi_num;
 
 	/* Base of the PPR log, if present */
 	u8 *ppr_log;
@@ -664,6 +665,12 @@ extern bool amd_iommu_force_isolation;
 /* Max levels of glxval supported */
 extern int amd_iommu_max_glx_val;
 
+/*
+ * This function flushes all internal caches of
+ * the IOMMU used by this driver.
+ */
+extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+
 /* takes bus and device/function and returns the device id
  * FIXME: should that be in generic PCI code? */
 static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 036fe9bf157e..5208828792e6 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -81,7 +81,7 @@ struct fault {
 	u16 flags;
 };
 
-struct device_state **state_table;
+static struct device_state **state_table;
 static spinlock_t state_lock;
 
 /* List and lock for all pasid_states */
@@ -681,6 +681,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 
 	atomic_set(&pasid_state->count, 1);
 	init_waitqueue_head(&pasid_state->wq);
+	spin_lock_init(&pasid_state->lock);
+
 	pasid_state->task         = task;
 	pasid_state->mm           = get_task_mm(task);
 	pasid_state->device_state = dev_state;
@@ -924,7 +926,7 @@ static int __init amd_iommu_v2_init(void)
 	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n");
 
 	if (!amd_iommu_v2_supported()) {
-		pr_info("AMD IOMMUv2 functionality not available on this sytem\n");
+		pr_info("AMD IOMMUv2 functionality not available on this system\n");
 		/*
 		 * Load anyway to provide the symbols to other modules
 		 * which may use AMD IOMMUv2 optionally.
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 9a114b9ff170..80bad32aa463 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -317,7 +317,7 @@ static int default_fault_handler(enum exynos_sysmmu_inttype itype,
 	if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT))
 		itype = SYSMMU_FAULT_UNKNOWN;
 
-	pr_err("%s occured at 0x%lx(Page table base: 0x%lx)\n",
+	pr_err("%s occurred at 0x%lx(Page table base: 0x%lx)\n",
 			sysmmu_fault_name[itype], fault_addr, pgtable_base);
 
 	ent = section_entry(__va(pgtable_base), fault_addr);
@@ -732,6 +732,10 @@ static int exynos_iommu_domain_init(struct iommu_domain *domain)
 	spin_lock_init(&priv->pgtablelock);
 	INIT_LIST_HEAD(&priv->clients);
 
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = ~0UL;
+	domain->geometry.force_aperture = true;
+
 	domain->priv = priv;
 	return 0;
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b12af2ff8c54..2297ec193eb4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -661,7 +661,7 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
 			if (drhd->devices[i] &&
 			    drhd->devices[i]->subordinate &&
 			    drhd->devices[i]->subordinate->number <= bus &&
-			    drhd->devices[i]->subordinate->subordinate >= bus)
+			    drhd->devices[i]->subordinate->busn_res.end >= bus)
 				return drhd->iommu;
 		}
 
@@ -2008,6 +2008,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 	if (!drhd) {
 		printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
 			pci_name(pdev));
+		free_domain_mem(domain);
 		return NULL;
 	}
 	iommu = drhd->iommu;
@@ -3932,6 +3933,10 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
 	domain_update_iommu_cap(dmar_domain);
 	domain->priv = dmar_domain;
 
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
+	domain->geometry.force_aperture = true;
+
 	return 0;
 }
 
@@ -4090,52 +4095,89 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
 	return 0;
 }
 
-/*
- * Group numbers are arbitrary.  Device with the same group number
- * indicate the iommu cannot differentiate between them.  To avoid
- * tracking used groups we just use the seg|bus|devfn of the lowest
- * level we're able to differentiate devices
- */
-static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
+static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct pci_dev *bridge;
-	union {
-		struct {
-			u8 devfn;
-			u8 bus;
-			u16 segment;
-		} pci;
-		u32 group;
-	} id;
+	pci_dev_put(*from);
+	*from = to;
+}
 
-	if (iommu_no_mapping(dev))
-		return -ENODEV;
+#define REQ_ACS_FLAGS	(PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
 
-	id.pci.segment = pci_domain_nr(pdev->bus);
-	id.pci.bus = pdev->bus->number;
-	id.pci.devfn = pdev->devfn;
+static int intel_iommu_add_device(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_dev *bridge, *dma_pdev;
+	struct iommu_group *group;
+	int ret;
 
-	if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
+	if (!device_to_iommu(pci_domain_nr(pdev->bus),
+			     pdev->bus->number, pdev->devfn))
 		return -ENODEV;
 
 	bridge = pci_find_upstream_pcie_bridge(pdev);
 	if (bridge) {
-		if (pci_is_pcie(bridge)) {
-			id.pci.bus = bridge->subordinate->number;
-			id.pci.devfn = 0;
-		} else {
-			id.pci.bus = bridge->bus->number;
-			id.pci.devfn = bridge->devfn;
+		if (pci_is_pcie(bridge))
+			dma_pdev = pci_get_domain_bus_and_slot(
+						pci_domain_nr(pdev->bus),
+						bridge->subordinate->number, 0);
+		else
+			dma_pdev = pci_dev_get(bridge);
+	} else
+		dma_pdev = pci_dev_get(pdev);
+
+	/* Account for quirked devices */
+	swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+	/*
+	 * If it's a multifunction device that does not support our
+	 * required ACS flags, add to the same group as function 0.
+	 */
+	if (dma_pdev->multifunction &&
+	    !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
+		swap_pci_ref(&dma_pdev,
+			     pci_get_slot(dma_pdev->bus,
+					  PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
+					  0)));
+
+	/*
+	 * Devices on the root bus go through the iommu.  If that's not us,
+	 * find the next upstream device and test ACS up to the root bus.
+	 * Finding the next device may require skipping virtual buses.
+	 */
+	while (!pci_is_root_bus(dma_pdev->bus)) {
+		struct pci_bus *bus = dma_pdev->bus;
+
+		while (!bus->self) {
+			if (!pci_is_root_bus(bus))
+				bus = bus->parent;
+			else
+				goto root_bus;
 		}
+
+		if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+			break;
+
+		swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
 	}
 
-	if (!pdev->is_virtfn && iommu_group_mf)
-		id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
+root_bus:
+	group = iommu_group_get(&dma_pdev->dev);
+	pci_dev_put(dma_pdev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+	}
 
-	*groupid = id.group;
+	ret = iommu_group_add_device(group, dev);
 
-	return 0;
+	iommu_group_put(group);
+	return ret;
+}
+
+static void intel_iommu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
 }
 
 static struct iommu_ops intel_iommu_ops = {
@@ -4147,7 +4189,8 @@ static struct iommu_ops intel_iommu_ops = {
 	.unmap		= intel_iommu_unmap,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.domain_has_cap = intel_iommu_domain_has_cap,
-	.device_group	= intel_iommu_device_group,
+	.add_device	= intel_iommu_add_device,
+	.remove_device	= intel_iommu_remove_device,
 	.pgsize_bitmap	= INTEL_IOMMU_PGSIZES,
 };
 
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index e0b18f3ae9a8..af8904de1d44 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -736,6 +736,7 @@ int __init parse_ioapics_under_ir(void)
 {
 	struct dmar_drhd_unit *drhd;
 	int ir_supported = 0;
+	int ioapic_idx;
 
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
@@ -748,13 +749,20 @@ int __init parse_ioapics_under_ir(void)
 		}
 	}
 
-	if (ir_supported && ir_ioapic_num != nr_ioapics) {
-		printk(KERN_WARNING
-		       "Not all IO-APIC's listed under remapping hardware\n");
-		return -1;
+	if (!ir_supported)
+		return 0;
+
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+		int ioapic_id = mpc_ioapic_id(ioapic_idx);
+		if (!map_ioapic_to_ir(ioapic_id)) {
+			pr_err(FW_BUG "ioapic %d has no mapping iommu, "
+			       "interrupt remapping will be disabled\n",
+			       ioapic_id);
+			return -1;
+		}
 	}
 
-	return ir_supported;
+	return 1;
 }
 
 int __init ir_dev_scope_init(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8b9ded88e6f5..ddbdacad7768 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -26,60 +26,535 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/iommu.h>
+#include <linux/idr.h>
+#include <linux/notifier.h>
+#include <linux/err.h>
+
+static struct kset *iommu_group_kset;
+static struct ida iommu_group_ida;
+static struct mutex iommu_group_mutex;
+
+struct iommu_group {
+	struct kobject kobj;
+	struct kobject *devices_kobj;
+	struct list_head devices;
+	struct mutex mutex;
+	struct blocking_notifier_head notifier;
+	void *iommu_data;
+	void (*iommu_data_release)(void *iommu_data);
+	char *name;
+	int id;
+};
+
+struct iommu_device {
+	struct list_head list;
+	struct device *dev;
+	char *name;
+};
+
+struct iommu_group_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct iommu_group *group, char *buf);
+	ssize_t (*store)(struct iommu_group *group,
+			 const char *buf, size_t count);
+};
+
+#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
+struct iommu_group_attribute iommu_group_attr_##_name =		\
+	__ATTR(_name, _mode, _show, _store)
+
+#define to_iommu_group_attr(_attr)	\
+	container_of(_attr, struct iommu_group_attribute, attr)
+#define to_iommu_group(_kobj)		\
+	container_of(_kobj, struct iommu_group, kobj)
 
-static ssize_t show_iommu_group(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t iommu_group_attr_show(struct kobject *kobj,
+				     struct attribute *__attr, char *buf)
 {
-	unsigned int groupid;
+	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
+	struct iommu_group *group = to_iommu_group(kobj);
+	ssize_t ret = -EIO;
 
-	if (iommu_device_group(dev, &groupid))
-		return 0;
+	if (attr->show)
+		ret = attr->show(group, buf);
+	return ret;
+}
+
+static ssize_t iommu_group_attr_store(struct kobject *kobj,
+				      struct attribute *__attr,
+				      const char *buf, size_t count)
+{
+	struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
+	struct iommu_group *group = to_iommu_group(kobj);
+	ssize_t ret = -EIO;
+
+	if (attr->store)
+		ret = attr->store(group, buf, count);
+	return ret;
+}
+
+static const struct sysfs_ops iommu_group_sysfs_ops = {
+	.show = iommu_group_attr_show,
+	.store = iommu_group_attr_store,
+};
 
-	return sprintf(buf, "%u", groupid);
+static int iommu_group_create_file(struct iommu_group *group,
+				   struct iommu_group_attribute *attr)
+{
+	return sysfs_create_file(&group->kobj, &attr->attr);
 }
-static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
 
-static int add_iommu_group(struct device *dev, void *data)
+static void iommu_group_remove_file(struct iommu_group *group,
+				    struct iommu_group_attribute *attr)
+{
+	sysfs_remove_file(&group->kobj, &attr->attr);
+}
+
+static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
+{
+	return sprintf(buf, "%s\n", group->name);
+}
+
+static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
+
+static void iommu_group_release(struct kobject *kobj)
+{
+	struct iommu_group *group = to_iommu_group(kobj);
+
+	if (group->iommu_data_release)
+		group->iommu_data_release(group->iommu_data);
+
+	mutex_lock(&iommu_group_mutex);
+	ida_remove(&iommu_group_ida, group->id);
+	mutex_unlock(&iommu_group_mutex);
+
+	kfree(group->name);
+	kfree(group);
+}
+
+static struct kobj_type iommu_group_ktype = {
+	.sysfs_ops = &iommu_group_sysfs_ops,
+	.release = iommu_group_release,
+};
+
+/**
+ * iommu_group_alloc - Allocate a new group
+ * @name: Optional name to associate with group, visible in sysfs
+ *
+ * This function is called by an iommu driver to allocate a new iommu
+ * group.  The iommu group represents the minimum granularity of the iommu.
+ * Upon successful return, the caller holds a reference to the supplied
+ * group in order to hold the group until devices are added.  Use
+ * iommu_group_put() to release this extra reference count, allowing the
+ * group to be automatically reclaimed once it has no devices or external
+ * references.
+ */
+struct iommu_group *iommu_group_alloc(void)
+{
+	struct iommu_group *group;
+	int ret;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	group->kobj.kset = iommu_group_kset;
+	mutex_init(&group->mutex);
+	INIT_LIST_HEAD(&group->devices);
+	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
+	mutex_lock(&iommu_group_mutex);
+
+again:
+	if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
+		kfree(group);
+		mutex_unlock(&iommu_group_mutex);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
+		goto again;
+
+	mutex_unlock(&iommu_group_mutex);
+
+	ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
+				   NULL, "%d", group->id);
+	if (ret) {
+		mutex_lock(&iommu_group_mutex);
+		ida_remove(&iommu_group_ida, group->id);
+		mutex_unlock(&iommu_group_mutex);
+		kfree(group);
+		return ERR_PTR(ret);
+	}
+
+	group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
+	if (!group->devices_kobj) {
+		kobject_put(&group->kobj); /* triggers .release & free */
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * The devices_kobj holds a reference on the group kobject, so
+	 * as long as that exists so will the group.  We can therefore
+	 * use the devices_kobj for reference counting.
+	 */
+	kobject_put(&group->kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_alloc);
+
+/**
+ * iommu_group_get_iommudata - retrieve iommu_data registered for a group
+ * @group: the group
+ *
+ * iommu drivers can store data in the group for use when doing iommu
+ * operations.  This function provides a way to retrieve it.  Caller
+ * should hold a group reference.
+ */
+void *iommu_group_get_iommudata(struct iommu_group *group)
+{
+	return group->iommu_data;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
+
+/**
+ * iommu_group_set_iommudata - set iommu_data for a group
+ * @group: the group
+ * @iommu_data: new data
+ * @release: release function for iommu_data
+ *
+ * iommu drivers can store data in the group for use when doing iommu
+ * operations.  This function provides a way to set the data after
+ * the group has been allocated.  Caller should hold a group reference.
+ */
+void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
+			       void (*release)(void *iommu_data))
 {
-	unsigned int groupid;
+	group->iommu_data = iommu_data;
+	group->iommu_data_release = release;
+}
+EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
 
-	if (iommu_device_group(dev, &groupid) == 0)
-		return device_create_file(dev, &dev_attr_iommu_group);
+/**
+ * iommu_group_set_name - set name for a group
+ * @group: the group
+ * @name: name
+ *
+ * Allow iommu driver to set a name for a group.  When set it will
+ * appear in a name attribute file under the group in sysfs.
+ */
+int iommu_group_set_name(struct iommu_group *group, const char *name)
+{
+	int ret;
+
+	if (group->name) {
+		iommu_group_remove_file(group, &iommu_group_attr_name);
+		kfree(group->name);
+		group->name = NULL;
+		if (!name)
+			return 0;
+	}
+
+	group->name = kstrdup(name, GFP_KERNEL);
+	if (!group->name)
+		return -ENOMEM;
+
+	ret = iommu_group_create_file(group, &iommu_group_attr_name);
+	if (ret) {
+		kfree(group->name);
+		group->name = NULL;
+		return ret;
+	}
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(iommu_group_set_name);
+
+/**
+ * iommu_group_add_device - add a device to an iommu group
+ * @group: the group into which to add the device (reference should be held)
+ * @dev: the device
+ *
+ * This function is called by an iommu driver to add a device into a
+ * group.  Adding a device increments the group reference count.
+ */
+int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+{
+	int ret, i = 0;
+	struct iommu_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	device->dev = dev;
+
+	ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
+	if (ret) {
+		kfree(device);
+		return ret;
+	}
+
+	device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
+rename:
+	if (!device->name) {
+		sysfs_remove_link(&dev->kobj, "iommu_group");
+		kfree(device);
+		return -ENOMEM;
+	}
+
+	ret = sysfs_create_link_nowarn(group->devices_kobj,
+				       &dev->kobj, device->name);
+	if (ret) {
+		kfree(device->name);
+		if (ret == -EEXIST && i >= 0) {
+			/*
+			 * Account for the slim chance of collision
+			 * and append an instance to the name.
+			 */
+			device->name = kasprintf(GFP_KERNEL, "%s.%d",
+						 kobject_name(&dev->kobj), i++);
+			goto rename;
+		}
+
+		sysfs_remove_link(&dev->kobj, "iommu_group");
+		kfree(device);
+		return ret;
+	}
+
+	kobject_get(group->devices_kobj);
+
+	dev->iommu_group = group;
+
+	mutex_lock(&group->mutex);
+	list_add_tail(&device->list, &group->devices);
+	mutex_unlock(&group->mutex);
+
+	/* Notify any listeners about change to group. */
+	blocking_notifier_call_chain(&group->notifier,
+				     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_group_add_device);
+
+/**
+ * iommu_group_remove_device - remove a device from it's current group
+ * @dev: device to be removed
+ *
+ * This function is called by an iommu driver to remove the device from
+ * it's current group.  This decrements the iommu group reference count.
+ */
+void iommu_group_remove_device(struct device *dev)
+{
+	struct iommu_group *group = dev->iommu_group;
+	struct iommu_device *tmp_device, *device = NULL;
+
+	/* Pre-notify listeners that a device is being removed. */
+	blocking_notifier_call_chain(&group->notifier,
+				     IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
+
+	mutex_lock(&group->mutex);
+	list_for_each_entry(tmp_device, &group->devices, list) {
+		if (tmp_device->dev == dev) {
+			device = tmp_device;
+			list_del(&device->list);
+			break;
+		}
+	}
+	mutex_unlock(&group->mutex);
+
+	if (!device)
+		return;
+
+	sysfs_remove_link(group->devices_kobj, device->name);
+	sysfs_remove_link(&dev->kobj, "iommu_group");
+
+	kfree(device->name);
+	kfree(device);
+	dev->iommu_group = NULL;
+	kobject_put(group->devices_kobj);
+}
+EXPORT_SYMBOL_GPL(iommu_group_remove_device);
+
+/**
+ * iommu_group_for_each_dev - iterate over each device in the group
+ * @group: the group
+ * @data: caller opaque data to be passed to callback function
+ * @fn: caller supplied callback function
+ *
+ * This function is called by group users to iterate over group devices.
+ * Callers should hold a reference count to the group during callback.
+ * The group->mutex is held across callbacks, which will block calls to
+ * iommu_group_add/remove_device.
+ */
+int iommu_group_for_each_dev(struct iommu_group *group, void *data,
+			     int (*fn)(struct device *, void *))
+{
+	struct iommu_device *device;
+	int ret = 0;
+
+	mutex_lock(&group->mutex);
+	list_for_each_entry(device, &group->devices, list) {
+		ret = fn(device->dev, data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&group->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
+
+/**
+ * iommu_group_get - Return the group for a device and increment reference
+ * @dev: get the group that this device belongs to
+ *
+ * This function is called by iommu drivers and users to get the group
+ * for the specified device.  If found, the group is returned and the group
+ * reference in incremented, else NULL.
+ */
+struct iommu_group *iommu_group_get(struct device *dev)
+{
+	struct iommu_group *group = dev->iommu_group;
+
+	if (group)
+		kobject_get(group->devices_kobj);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(iommu_group_get);
+
+/**
+ * iommu_group_put - Decrement group reference
+ * @group: the group to use
+ *
+ * This function is called by iommu drivers and users to release the
+ * iommu group.  Once the reference count is zero, the group is released.
+ */
+void iommu_group_put(struct iommu_group *group)
+{
+	if (group)
+		kobject_put(group->devices_kobj);
+}
+EXPORT_SYMBOL_GPL(iommu_group_put);
+
+/**
+ * iommu_group_register_notifier - Register a notifier for group changes
+ * @group: the group to watch
+ * @nb: notifier block to signal
+ *
+ * This function allows iommu group users to track changes in a group.
+ * See include/linux/iommu.h for actions sent via this notifier.  Caller
+ * should hold a reference to the group throughout notifier registration.
+ */
+int iommu_group_register_notifier(struct iommu_group *group,
+				  struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&group->notifier, nb);
+}
+EXPORT_SYMBOL_GPL(iommu_group_register_notifier);
+
+/**
+ * iommu_group_unregister_notifier - Unregister a notifier
+ * @group: the group to watch
+ * @nb: notifier block to signal
+ *
+ * Unregister a previously registered group notifier block.
+ */
+int iommu_group_unregister_notifier(struct iommu_group *group,
+				    struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&group->notifier, nb);
+}
+EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
+
+/**
+ * iommu_group_id - Return ID for a group
+ * @group: the group to ID
+ *
+ * Return the unique ID for the group matching the sysfs group number.
+ */
+int iommu_group_id(struct iommu_group *group)
+{
+	return group->id;
+}
+EXPORT_SYMBOL_GPL(iommu_group_id);
 
-static int remove_iommu_group(struct device *dev)
+static int add_iommu_group(struct device *dev, void *data)
 {
-	unsigned int groupid;
+	struct iommu_ops *ops = data;
+
+	if (!ops->add_device)
+		return -ENODEV;
 
-	if (iommu_device_group(dev, &groupid) == 0)
-		device_remove_file(dev, &dev_attr_iommu_group);
+	WARN_ON(dev->iommu_group);
+
+	ops->add_device(dev);
 
 	return 0;
 }
 
-static int iommu_device_notifier(struct notifier_block *nb,
-				 unsigned long action, void *data)
+static int iommu_bus_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
 {
 	struct device *dev = data;
+	struct iommu_ops *ops = dev->bus->iommu_ops;
+	struct iommu_group *group;
+	unsigned long group_action = 0;
+
+	/*
+	 * ADD/DEL call into iommu driver ops if provided, which may
+	 * result in ADD/DEL notifiers to group->notifier
+	 */
+	if (action == BUS_NOTIFY_ADD_DEVICE) {
+		if (ops->add_device)
+			return ops->add_device(dev);
+	} else if (action == BUS_NOTIFY_DEL_DEVICE) {
+		if (ops->remove_device && dev->iommu_group) {
+			ops->remove_device(dev);
+			return 0;
+		}
+	}
+
+	/*
+	 * Remaining BUS_NOTIFYs get filtered and republished to the
+	 * group, if anyone is listening
+	 */
+	group = iommu_group_get(dev);
+	if (!group)
+		return 0;
+
+	switch (action) {
+	case BUS_NOTIFY_BIND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
+		break;
+	case BUS_NOTIFY_BOUND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
+		break;
+	case BUS_NOTIFY_UNBIND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
+		break;
+	case BUS_NOTIFY_UNBOUND_DRIVER:
+		group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
+		break;
+	}
 
-	if (action == BUS_NOTIFY_ADD_DEVICE)
-		return add_iommu_group(dev, NULL);
-	else if (action == BUS_NOTIFY_DEL_DEVICE)
-		return remove_iommu_group(dev);
+	if (group_action)
+		blocking_notifier_call_chain(&group->notifier,
+					     group_action, dev);
 
+	iommu_group_put(group);
 	return 0;
 }
 
-static struct notifier_block iommu_device_nb = {
-	.notifier_call = iommu_device_notifier,
+static struct notifier_block iommu_bus_nb = {
+	.notifier_call = iommu_bus_notifier,
 };
 
 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
 {
-	bus_register_notifier(bus, &iommu_device_nb);
-	bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
+	bus_register_notifier(bus, &iommu_bus_nb);
+	bus_for_each_dev(bus, NULL, ops, add_iommu_group);
 }
 
 /**
@@ -192,6 +667,45 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
+/*
+ * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * the IOMMU API works on domains and devices.  Bridge that gap by
+ * iterating over the devices in a group.  Ideally we'd have a single
+ * device which represents the requestor ID of the group, but we also
+ * allow IOMMU drivers to create policy defined minimum sets, where
+ * the physical hardware may be able to distiguish members, but we
+ * wish to group them at a higher level (ex. untrusted multi-function
+ * PCI devices).  Thus we attach each device.
+ */
+static int iommu_group_do_attach_device(struct device *dev, void *data)
+{
+	struct iommu_domain *domain = data;
+
+	return iommu_attach_device(domain, dev);
+}
+
+int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
+{
+	return iommu_group_for_each_dev(group, domain,
+					iommu_group_do_attach_device);
+}
+EXPORT_SYMBOL_GPL(iommu_attach_group);
+
+static int iommu_group_do_detach_device(struct device *dev, void *data)
+{
+	struct iommu_domain *domain = data;
+
+	iommu_detach_device(domain, dev);
+
+	return 0;
+}
+
+void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
+{
+	iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_group);
+
 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 			       unsigned long iova)
 {
@@ -336,11 +850,48 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
-int iommu_device_group(struct device *dev, unsigned int *groupid)
+static int __init iommu_init(void)
 {
-	if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
-		return dev->bus->iommu_ops->device_group(dev, groupid);
+	iommu_group_kset = kset_create_and_add("iommu_groups",
+					       NULL, kernel_kobj);
+	ida_init(&iommu_group_ida);
+	mutex_init(&iommu_group_mutex);
+
+	BUG_ON(!iommu_group_kset);
+
+	return 0;
+}
+subsys_initcall(iommu_init);
+
+int iommu_domain_get_attr(struct iommu_domain *domain,
+			  enum iommu_attr attr, void *data)
+{
+	struct iommu_domain_geometry *geometry;
+	int ret = 0;
+
+	switch (attr) {
+	case DOMAIN_ATTR_GEOMETRY:
+		geometry  = data;
+		*geometry = domain->geometry;
+
+		break;
+	default:
+		if (!domain->ops->domain_get_attr)
+			return -EINVAL;
+
+		ret = domain->ops->domain_get_attr(domain, attr, data);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
+
+int iommu_domain_set_attr(struct iommu_domain *domain,
+			  enum iommu_attr attr, void *data)
+{
+	if (!domain->ops->domain_set_attr)
+		return -EINVAL;
 
-	return -ENODEV;
+	return domain->ops->domain_set_attr(domain, attr, data);
 }
-EXPORT_SYMBOL_GPL(iommu_device_group);
+EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c5c274ab5c5a..67da6cff74e8 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -198,10 +198,10 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova)
 
 /**
  * alloc_iova - allocates an iova
- * @iovad - iova domain in question
- * @size - size of page frames to allocate
- * @limit_pfn - max limit address
- * @size_aligned - set if size_aligned address range is required
+ * @iovad: - iova domain in question
+ * @size: - size of page frames to allocate
+ * @limit_pfn: - max limit address
+ * @size_aligned: - set if size_aligned address range is required
  * This function allocates an iova in the range limit_pfn to IOVA_START_PFN
  * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned
  * flag is set then the allocated address iova->pfn_lo will be naturally
@@ -238,8 +238,8 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 
 /**
  * find_iova - find's an iova for a given pfn
- * @iovad - iova domain in question.
- * pfn - page frame number
+ * @iovad: - iova domain in question.
+ * @pfn: - page frame number
  * This function finds and returns an iova belonging to the
  * given doamin which matches the given pfn.
  */
@@ -260,7 +260,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn)
 			/* We are not holding the lock while this iova
 			 * is referenced by the caller as the same thread
 			 * which called this function also calls __free_iova()
-			 * and it is by desing that only one thread can possibly
+			 * and it is by design that only one thread can possibly
 			 * reference a particular iova and hence no conflict.
 			 */
 			return iova;
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 1d29b1c66e72..151690db692c 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -1,6 +1,11 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/cpumask.h>
 #include <linux/errno.h>
+#include <linux/msi.h>
+
+#include <asm/hw_irq.h>
+#include <asm/irq_remapping.h>
 
 #include "irq_remapping.h"
 
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index cee307e86606..6a8870a31668 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -226,6 +226,11 @@ static int msm_iommu_domain_init(struct iommu_domain *domain)
 
 	memset(priv->pgtable, 0, SZ_16K);
 	domain->priv = priv;
+
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = (1ULL << 32) - 1;
+	domain->geometry.force_aperture = true;
+
 	return 0;
 
 fail_nomem:
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
new file mode 100644
index 000000000000..ee249bc959f8
--- /dev/null
+++ b/drivers/iommu/of_iommu.c
@@ -0,0 +1,90 @@
+/*
+ * OF helpers for IOMMU
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/export.h>
+#include <linux/limits.h>
+#include <linux/of.h>
+
+/**
+ * of_get_dma_window - Parse *dma-window property and returns 0 if found.
+ *
+ * @dn: device node
+ * @prefix: prefix for property name if any
+ * @index: index to start to parse
+ * @busno: Returns busno if supported. Otherwise pass NULL
+ * @addr: Returns address that DMA starts
+ * @size: Returns the range that DMA can handle
+ *
+ * This supports different formats flexibly. "prefix" can be
+ * configured if any. "busno" and "index" are optionally
+ * specified. Set 0(or NULL) if not used.
+ */
+int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
+		      unsigned long *busno, dma_addr_t *addr, size_t *size)
+{
+	const __be32 *dma_window, *end;
+	int bytes, cur_index = 0;
+	char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
+
+	if (!dn || !addr || !size)
+		return -EINVAL;
+
+	if (!prefix)
+		prefix = "";
+
+	snprintf(propname, sizeof(propname), "%sdma-window", prefix);
+	snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
+	snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
+
+	dma_window = of_get_property(dn, propname, &bytes);
+	if (!dma_window)
+		return -ENODEV;
+	end = dma_window + bytes / sizeof(*dma_window);
+
+	while (dma_window < end) {
+		u32 cells;
+		const void *prop;
+
+		/* busno is one cell if supported */
+		if (busno)
+			*busno = be32_to_cpup(dma_window++);
+
+		prop = of_get_property(dn, addrname, NULL);
+		if (!prop)
+			prop = of_get_property(dn, "#address-cells", NULL);
+
+		cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
+		if (!cells)
+			return -EINVAL;
+		*addr = of_read_number(dma_window, cells);
+		dma_window += cells;
+
+		prop = of_get_property(dn, sizename, NULL);
+		cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
+		if (!cells)
+			return -EINVAL;
+		*size = of_read_number(dma_window, cells);
+		dma_window += cells;
+
+		if (cur_index++ == index)
+			break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_get_dma_window);
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index e70ee2b59df9..d0b1234581be 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1148,6 +1148,10 @@ static int omap_iommu_domain_init(struct iommu_domain *domain)
 
 	domain->priv = omap_domain;
 
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end   = (1ULL << 32) - 1;
+	domain->geometry.force_aperture = true;
+
 	return 0;
 
 fail_nomem:
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 0c0a37792218..c16e8fc8a4bd 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -165,6 +165,11 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain,
 		return -EINVAL;
 	domain->priv = gart;
 
+	domain->geometry.aperture_start = gart->iovmm_base;
+	domain->geometry.aperture_end   = gart->iovmm_base +
+					gart->page_count * GART_PAGE_SIZE - 1;
+	domain->geometry.force_aperture = true;
+
 	client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL);
 	if (!client)
 		return -ENOMEM;
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 3f3d09d560ea..2a4bb36bc688 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -30,12 +30,15 @@
 #include <linux/sched.h>
 #include <linux/iommu.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_iommu.h>
 
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 
 #include <mach/iomap.h>
 #include <mach/smmu.h>
+#include <mach/tegra-ahb.h>
 
 /* bitmap of the page sizes currently supported */
 #define SMMU_IOMMU_PGSIZES	(SZ_4K)
@@ -111,12 +114,6 @@
 
 #define SMMU_PDE_NEXT_SHIFT		28
 
-/* AHB Arbiter Registers */
-#define AHB_XBAR_CTRL				0xe0
-#define AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE	1
-#define AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT	17
-
-#define SMMU_NUM_ASIDS				4
 #define SMMU_TLB_FLUSH_VA_SECTION__MASK		0xffc00000
 #define SMMU_TLB_FLUSH_VA_SECTION__SHIFT	12 /* right shift */
 #define SMMU_TLB_FLUSH_VA_GROUP__MASK		0xffffc000
@@ -136,6 +133,7 @@
 
 #define SMMU_PAGE_SHIFT 12
 #define SMMU_PAGE_SIZE	(1 << SMMU_PAGE_SHIFT)
+#define SMMU_PAGE_MASK	((1 << SMMU_PAGE_SHIFT) - 1)
 
 #define SMMU_PDIR_COUNT	1024
 #define SMMU_PDIR_SIZE	(sizeof(unsigned long) * SMMU_PDIR_COUNT)
@@ -177,6 +175,8 @@
 #define SMMU_ASID_DISABLE	0
 #define SMMU_ASID_ASID(n)	((n) & ~SMMU_ASID_ENABLE(0))
 
+#define NUM_SMMU_REG_BANKS	3
+
 #define smmu_client_enable_hwgrp(c, m)	smmu_client_set_hwgrp(c, m, 1)
 #define smmu_client_disable_hwgrp(c)	smmu_client_set_hwgrp(c, 0, 0)
 #define __smmu_client_enable_hwgrp(c, m) __smmu_client_set_hwgrp(c, m, 1)
@@ -235,14 +235,12 @@ struct smmu_as {
  * Per SMMU device - IOMMU device
  */
 struct smmu_device {
-	void __iomem	*regs, *regs_ahbarb;
+	void __iomem	*regs[NUM_SMMU_REG_BANKS];
 	unsigned long	iovmm_base;	/* remappable base address */
 	unsigned long	page_count;	/* total remappable size */
 	spinlock_t	lock;
 	char		*name;
 	struct device	*dev;
-	int		num_as;
-	struct smmu_as	*as;		/* Run-time allocated array */
 	struct page *avp_vector_page;	/* dummy page shared by all AS's */
 
 	/*
@@ -252,29 +250,50 @@ struct smmu_device {
 	unsigned long translation_enable_1;
 	unsigned long translation_enable_2;
 	unsigned long asid_security;
+
+	struct device_node *ahb;
+
+	int		num_as;
+	struct smmu_as	as[0];		/* Run-time allocated array */
 };
 
 static struct smmu_device *smmu_handle; /* unique for a system */
 
 /*
- *	SMMU/AHB register accessors
+ *	SMMU register accessors
  */
 static inline u32 smmu_read(struct smmu_device *smmu, size_t offs)
 {
-	return readl(smmu->regs + offs);
-}
-static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
-{
-	writel(val, smmu->regs + offs);
+	BUG_ON(offs < 0x10);
+	if (offs < 0x3c)
+		return readl(smmu->regs[0] + offs - 0x10);
+	BUG_ON(offs < 0x1f0);
+	if (offs < 0x200)
+		return readl(smmu->regs[1] + offs - 0x1f0);
+	BUG_ON(offs < 0x228);
+	if (offs < 0x284)
+		return readl(smmu->regs[2] + offs - 0x228);
+	BUG();
 }
 
-static inline u32 ahb_read(struct smmu_device *smmu, size_t offs)
-{
-	return readl(smmu->regs_ahbarb + offs);
-}
-static inline void ahb_write(struct smmu_device *smmu, u32 val, size_t offs)
+static inline void smmu_write(struct smmu_device *smmu, u32 val, size_t offs)
 {
-	writel(val, smmu->regs_ahbarb + offs);
+	BUG_ON(offs < 0x10);
+	if (offs < 0x3c) {
+		writel(val, smmu->regs[0] + offs - 0x10);
+		return;
+	}
+	BUG_ON(offs < 0x1f0);
+	if (offs < 0x200) {
+		writel(val, smmu->regs[1] + offs - 0x1f0);
+		return;
+	}
+	BUG_ON(offs < 0x228);
+	if (offs < 0x284) {
+		writel(val, smmu->regs[2] + offs - 0x228);
+		return;
+	}
+	BUG();
 }
 
 #define VA_PAGE_TO_PA(va, page)	\
@@ -370,7 +389,7 @@ static void smmu_flush_regs(struct smmu_device *smmu, int enable)
 	FLUSH_SMMU_REGS(smmu);
 }
 
-static void smmu_setup_regs(struct smmu_device *smmu)
+static int smmu_setup_regs(struct smmu_device *smmu)
 {
 	int i;
 	u32 val;
@@ -398,10 +417,7 @@ static void smmu_setup_regs(struct smmu_device *smmu)
 
 	smmu_flush_regs(smmu, 1);
 
-	val = ahb_read(smmu, AHB_XBAR_CTRL);
-	val |= AHB_XBAR_CTRL_SMMU_INIT_DONE_DONE <<
-		AHB_XBAR_CTRL_SMMU_INIT_DONE_SHIFT;
-	ahb_write(smmu, val, AHB_XBAR_CTRL);
+	return tegra_ahb_enable_smmu(smmu->ahb);
 }
 
 static void flush_ptc_and_tlb(struct smmu_device *smmu,
@@ -537,33 +553,42 @@ static inline void put_signature(struct smmu_as *as,
 #endif
 
 /*
- * Caller must lock/unlock as
+ * Caller must not hold as->lock
  */
 static int alloc_pdir(struct smmu_as *as)
 {
-	unsigned long *pdir;
-	int pdn;
+	unsigned long *pdir, flags;
+	int pdn, err = 0;
 	u32 val;
 	struct smmu_device *smmu = as->smmu;
+	struct page *page;
+	unsigned int *cnt;
 
-	if (as->pdir_page)
-		return 0;
+	/*
+	 * do the allocation, then grab as->lock
+	 */
+	cnt = devm_kzalloc(smmu->dev,
+			   sizeof(cnt[0]) * SMMU_PDIR_COUNT,
+			   GFP_KERNEL);
+	page = alloc_page(GFP_KERNEL | __GFP_DMA);
 
-	as->pte_count = devm_kzalloc(smmu->dev,
-		     sizeof(as->pte_count[0]) * SMMU_PDIR_COUNT, GFP_ATOMIC);
-	if (!as->pte_count) {
-		dev_err(smmu->dev,
-			"failed to allocate smmu_device PTE cunters\n");
-		return -ENOMEM;
+	spin_lock_irqsave(&as->lock, flags);
+
+	if (as->pdir_page) {
+		/* We raced, free the redundant */
+		err = -EAGAIN;
+		goto err_out;
 	}
-	as->pdir_page = alloc_page(GFP_ATOMIC | __GFP_DMA);
-	if (!as->pdir_page) {
-		dev_err(smmu->dev,
-			"failed to allocate smmu_device page directory\n");
-		devm_kfree(smmu->dev, as->pte_count);
-		as->pte_count = NULL;
-		return -ENOMEM;
+
+	if (!page || !cnt) {
+		dev_err(smmu->dev, "failed to allocate at %s\n", __func__);
+		err = -ENOMEM;
+		goto err_out;
 	}
+
+	as->pdir_page = page;
+	as->pte_count = cnt;
+
 	SetPageReserved(as->pdir_page);
 	pdir = page_address(as->pdir_page);
 
@@ -579,7 +604,17 @@ static int alloc_pdir(struct smmu_as *as)
 	smmu_write(smmu, val, SMMU_TLB_FLUSH);
 	FLUSH_SMMU_REGS(as->smmu);
 
+	spin_unlock_irqrestore(&as->lock, flags);
+
 	return 0;
+
+err_out:
+	spin_unlock_irqrestore(&as->lock, flags);
+
+	devm_kfree(smmu->dev, cnt);
+	if (page)
+		__free_page(page);
+	return err;
 }
 
 static void __smmu_iommu_unmap(struct smmu_as *as, dma_addr_t iova)
@@ -764,37 +799,38 @@ static void smmu_iommu_detach_dev(struct iommu_domain *domain,
 			goto out;
 		}
 	}
-	dev_err(smmu->dev, "Couldn't find %s\n", dev_name(c->dev));
+	dev_err(smmu->dev, "Couldn't find %s\n", dev_name(dev));
 out:
 	spin_unlock(&as->client_lock);
 }
 
 static int smmu_iommu_domain_init(struct iommu_domain *domain)
 {
-	int i;
+	int i, err = -EAGAIN;
 	unsigned long flags;
 	struct smmu_as *as;
 	struct smmu_device *smmu = smmu_handle;
 
 	/* Look for a free AS with lock held */
 	for  (i = 0; i < smmu->num_as; i++) {
-		struct smmu_as *tmp = &smmu->as[i];
+		as = &smmu->as[i];
+
+		if (as->pdir_page)
+			continue;
 
-		spin_lock_irqsave(&tmp->lock, flags);
-		if (!tmp->pdir_page) {
-			as = tmp;
+		err = alloc_pdir(as);
+		if (!err)
 			goto found;
-		}
-		spin_unlock_irqrestore(&tmp->lock, flags);
+
+		if (err != -EAGAIN)
+			break;
 	}
-	dev_err(smmu->dev, "no free AS\n");
-	return -ENODEV;
+	if (i == smmu->num_as)
+		dev_err(smmu->dev,  "no free AS\n");
+	return err;
 
 found:
-	if (alloc_pdir(as) < 0)
-		goto err_alloc_pdir;
-
-	spin_lock(&smmu->lock);
+	spin_lock_irqsave(&smmu->lock, flags);
 
 	/* Update PDIR register */
 	smmu_write(smmu, SMMU_PTB_ASID_CUR(as->asid), SMMU_PTB_ASID);
@@ -802,17 +838,18 @@ found:
 		   SMMU_MK_PDIR(as->pdir_page, as->pdir_attr), SMMU_PTB_DATA);
 	FLUSH_SMMU_REGS(smmu);
 
-	spin_unlock(&smmu->lock);
+	spin_unlock_irqrestore(&smmu->lock, flags);
 
-	spin_unlock_irqrestore(&as->lock, flags);
 	domain->priv = as;
 
+	domain->geometry.aperture_start = smmu->iovmm_base;
+	domain->geometry.aperture_end   = smmu->iovmm_base +
+		smmu->page_count * SMMU_PAGE_SIZE - 1;
+	domain->geometry.force_aperture = true;
+
 	dev_dbg(smmu->dev, "smmu_as@%p\n", as);
-	return 0;
 
-err_alloc_pdir:
-	spin_unlock_irqrestore(&as->lock, flags);
-	return -ENODEV;
+	return 0;
 }
 
 static void smmu_iommu_domain_destroy(struct iommu_domain *domain)
@@ -873,65 +910,73 @@ static int tegra_smmu_resume(struct device *dev)
 {
 	struct smmu_device *smmu = dev_get_drvdata(dev);
 	unsigned long flags;
+	int err;
 
 	spin_lock_irqsave(&smmu->lock, flags);
-	smmu_setup_regs(smmu);
+	err = smmu_setup_regs(smmu);
 	spin_unlock_irqrestore(&smmu->lock, flags);
-	return 0;
+	return err;
 }
 
 static int tegra_smmu_probe(struct platform_device *pdev)
 {
 	struct smmu_device *smmu;
-	struct resource *regs, *regs2, *window;
 	struct device *dev = &pdev->dev;
-	int i, err = 0;
+	int i, asids, err = 0;
+	dma_addr_t uninitialized_var(base);
+	size_t bytes, uninitialized_var(size);
 
 	if (smmu_handle)
 		return -EIO;
 
 	BUILD_BUG_ON(PAGE_SHIFT != SMMU_PAGE_SHIFT);
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	regs2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	window = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	if (!regs || !regs2 || !window) {
-		dev_err(dev, "No SMMU resources\n");
+	if (of_property_read_u32(dev->of_node, "nvidia,#asids", &asids))
 		return -ENODEV;
-	}
 
-	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
+	bytes = sizeof(*smmu) + asids * sizeof(*smmu->as);
+	smmu = devm_kzalloc(dev, bytes, GFP_KERNEL);
 	if (!smmu) {
 		dev_err(dev, "failed to allocate smmu_device\n");
 		return -ENOMEM;
 	}
 
-	smmu->dev = dev;
-	smmu->num_as = SMMU_NUM_ASIDS;
-	smmu->iovmm_base = (unsigned long)window->start;
-	smmu->page_count = resource_size(window) >> SMMU_PAGE_SHIFT;
-	smmu->regs = devm_ioremap(dev, regs->start, resource_size(regs));
-	smmu->regs_ahbarb = devm_ioremap(dev, regs2->start,
-					 resource_size(regs2));
-	if (!smmu->regs || !smmu->regs_ahbarb) {
-		dev_err(dev, "failed to remap SMMU registers\n");
-		err = -ENXIO;
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(smmu->regs); i++) {
+		struct resource *res;
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			return -ENODEV;
+		smmu->regs[i] = devm_request_and_ioremap(&pdev->dev, res);
+		if (!smmu->regs[i])
+			return -EBUSY;
 	}
 
+	err = of_get_dma_window(dev->of_node, NULL, 0, NULL, &base, &size);
+	if (err)
+		return -ENODEV;
+
+	if (size & SMMU_PAGE_MASK)
+		return -EINVAL;
+
+	size >>= SMMU_PAGE_SHIFT;
+	if (!size)
+		return -EINVAL;
+
+	smmu->ahb = of_parse_phandle(dev->of_node, "nvidia,ahb", 0);
+	if (!smmu->ahb)
+		return -ENODEV;
+
+	smmu->dev = dev;
+	smmu->num_as = asids;
+	smmu->iovmm_base = base;
+	smmu->page_count = size;
+
 	smmu->translation_enable_0 = ~0;
 	smmu->translation_enable_1 = ~0;
 	smmu->translation_enable_2 = ~0;
 	smmu->asid_security = 0;
 
-	smmu->as = devm_kzalloc(dev,
-			sizeof(smmu->as[0]) * smmu->num_as, GFP_KERNEL);
-	if (!smmu->as) {
-		dev_err(dev, "failed to allocate smmu_as\n");
-		err = -ENOMEM;
-		goto fail;
-	}
-
 	for (i = 0; i < smmu->num_as; i++) {
 		struct smmu_as *as = &smmu->as[i];
 
@@ -945,57 +990,28 @@ static int tegra_smmu_probe(struct platform_device *pdev)
 		INIT_LIST_HEAD(&as->client);
 	}
 	spin_lock_init(&smmu->lock);
-	smmu_setup_regs(smmu);
+	err = smmu_setup_regs(smmu);
+	if (err)
+		return err;
 	platform_set_drvdata(pdev, smmu);
 
 	smmu->avp_vector_page = alloc_page(GFP_KERNEL);
 	if (!smmu->avp_vector_page)
-		goto fail;
+		return -ENOMEM;
 
 	smmu_handle = smmu;
 	return 0;
-
-fail:
-	if (smmu->avp_vector_page)
-		__free_page(smmu->avp_vector_page);
-	if (smmu->regs)
-		devm_iounmap(dev, smmu->regs);
-	if (smmu->regs_ahbarb)
-		devm_iounmap(dev, smmu->regs_ahbarb);
-	if (smmu && smmu->as) {
-		for (i = 0; i < smmu->num_as; i++) {
-			if (smmu->as[i].pdir_page) {
-				ClearPageReserved(smmu->as[i].pdir_page);
-				__free_page(smmu->as[i].pdir_page);
-			}
-		}
-		devm_kfree(dev, smmu->as);
-	}
-	devm_kfree(dev, smmu);
-	return err;
 }
 
 static int tegra_smmu_remove(struct platform_device *pdev)
 {
 	struct smmu_device *smmu = platform_get_drvdata(pdev);
-	struct device *dev = smmu->dev;
+	int i;
 
 	smmu_write(smmu, SMMU_CONFIG_DISABLE, SMMU_CONFIG);
-	platform_set_drvdata(pdev, NULL);
-	if (smmu->as) {
-		int i;
-
-		for (i = 0; i < smmu->num_as; i++)
-			free_pdir(&smmu->as[i]);
-		devm_kfree(dev, smmu->as);
-	}
-	if (smmu->avp_vector_page)
-		__free_page(smmu->avp_vector_page);
-	if (smmu->regs)
-		devm_iounmap(dev, smmu->regs);
-	if (smmu->regs_ahbarb)
-		devm_iounmap(dev, smmu->regs_ahbarb);
-	devm_kfree(dev, smmu);
+	for (i = 0; i < smmu->num_as; i++)
+		free_pdir(&smmu->as[i]);
+	__free_page(smmu->avp_vector_page);
 	smmu_handle = NULL;
 	return 0;
 }
@@ -1005,6 +1021,14 @@ const struct dev_pm_ops tegra_smmu_pm_ops = {
 	.resume		= tegra_smmu_resume,
 };
 
+#ifdef CONFIG_OF
+static struct of_device_id tegra_smmu_of_match[] __devinitdata = {
+	{ .compatible = "nvidia,tegra30-smmu", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_smmu_of_match);
+#endif
+
 static struct platform_driver tegra_smmu_driver = {
 	.probe		= tegra_smmu_probe,
 	.remove		= tegra_smmu_remove,
@@ -1012,6 +1036,7 @@ static struct platform_driver tegra_smmu_driver = {
 		.owner	= THIS_MODULE,
 		.name	= "tegra-smmu",
 		.pm	= &tegra_smmu_pm_ops,
+		.of_match_table = of_match_ptr(tegra_smmu_of_match),
 	},
 };
 
@@ -1031,4 +1056,5 @@ module_exit(tegra_smmu_exit);
 
 MODULE_DESCRIPTION("IOMMU API for SMMU in Tegra30");
 MODULE_AUTHOR("Hiroshi DOYU <hdoyu@nvidia.com>");
+MODULE_ALIAS("platform:tegra-smmu");
 MODULE_LICENSE("GPL v2");