From 1ddb5518052e4e28ab489237443f7443b3fd69ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:55 +0200 Subject: x86: convert pci-dma.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551bb..88ddd04cfa98 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void) * using 512M as goal */ align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); + size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); if (dma32_bootmem_ptr) -- cgit v1.2.3 From c647c3bb2d16246a87f49035985ddb7c1eb030df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:44 +0200 Subject: x86: cleanup dma_*_coherent functions All dma_ops implementations support the alloc_coherent and free_coherent callbacks now. This allows a big simplification of the dma_alloc_coherent function which is done with this patch. The dma_free_coherent functions is also cleaned up and calls now the free_coherent callback of the dma_ops implementation. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 121 +++++----------------------------------------- 1 file changed, 12 insertions(+), 109 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec2..613332b26e31 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -241,33 +241,15 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* Allocate DMA memory on node near device */ -static noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - /* * Allocate memory for a coherent mapping. */ -void * + void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory = NULL; - struct page *page; - unsigned long dma_mask = 0; - dma_addr_t bus; - int noretry = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + void *memory; if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; @@ -276,89 +258,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, dev = &fallback_dev; gfp |= GFP_DMA; } - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ - if (gfp & __GFP_DMA) - noretry = 1; - -#ifdef CONFIG_X86_64 - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp |= GFP_DMA32; - if (dma_mask < DMA_32BIT_MASK) - noretry = 1; - } -#endif - again: - page = dma_alloc_pages(dev, - noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (ops->map_simple) { - *dma_handle = ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)memory, get_order(size)); + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); @@ -368,17 +271,17 @@ EXPORT_SYMBOL(dma_alloc_coherent); * The caller must ensure that the device has finished accessing the mapping. */ void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) + void *vaddr, dma_addr_t bus) { struct dma_mapping_ops *ops = get_dma_ops(dev); - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_from_coherent(dev, order, vaddr)) + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; - if (ops->unmap_single) - ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 49 +++---------------------------------------- arch/x86/kernel/pci-gart_64.c | 4 ++-- include/asm-x86/dma-mapping.h | 47 ++++++++++++++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 613332b26e31..0a1408abcc62 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to older i386. */ -struct device fallback_dev = { +struct device x86_dma_fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; +EXPORT_SYMBOL(x86_dma_fallback_dev); int dma_set_mask(struct device *dev, u64 mask) { @@ -241,50 +242,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* - * Allocate memory for a coherent mapping. - */ - void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) { - dev = &fallback_dev; - gfp |= GFP_DMA; - } - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - - WARN_ON(irqs_disabled()); /* for portability */ - - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; - - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); -} -EXPORT_SYMBOL(dma_free_coherent); - static int __init pci_iommu_init(void) { calgary_iommu_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ef753e233580..e81df25dc067 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -276,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) unsigned long bus; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; if (!need_iommu(dev, paddr, size)) return paddr; @@ -427,7 +427,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) return 0; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; out = 0; start = 0; diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index ad9cd6d49bfc..8e16095d1fa9 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -9,10 +9,11 @@ #include #include #include +#include extern dma_addr_t bad_dma_address; extern int iommu_merge; -extern struct device fallback_dev; +extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern int force_iommu; @@ -87,13 +88,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - +#define dma_is_consistent(d, h) (1) extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); @@ -247,7 +242,39 @@ static inline int dma_get_cache_alignment(void) return boot_cpu_data.x86_clflush_size; } -#define dma_is_consistent(d, h) (1) +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + void *memory; + + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) { + dev = &x86_dma_fallback_dev; + gfp |= GFP_DMA; + } + + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); +} -#include #endif -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ Documentation/x86/x86_64/boot-options.txt | 2 -- arch/x86/kernel/pci-dma.c | 13 +++++++++++++ arch/x86/kernel/pci-gart_64.c | 13 ------------- include/asm-x86/iommu.h | 1 + 5 files changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1150444a21ab..40066ceb48fe 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -893,6 +893,10 @@ and is between 256 and 4096 characters. It is defined in the file nomerge forcesac soft + fullflush + Flush IO/TLB at every deallocation + nofullflush + Flush IO/TLB only when addresses are reused (default) intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index b0c7b6c4abda..c83c8e4bc8e5 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -233,8 +233,6 @@ IOMMU (input/output memory management unit) iommu options only relevant to the AMD GART hardware IOMMU: Set the size of the remapping area in bytes. allowed Overwrite iommu off workarounds for specific chipsets. - fullflush Flush IOMMU on each allocation (default). - nofullflush Don't use IOMMU fullflush. leak Turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on). Default number of leak pages is 20. diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc62..d2f2c0158dc1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,6 +16,15 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -171,6 +180,10 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9739d5682093..508ef470b27f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,15 +45,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush = 1; - /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -901,10 +892,6 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 621a1af94c4c..67b2fd56c6da 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,6 +7,7 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; +extern int iommu_fullflush; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 9 +++++---- Documentation/x86/x86_64/boot-options.txt | 2 ++ arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/amd_iommu_init.c | 5 ++++- arch/x86/kernel/pci-dma.c | 13 ------------- arch/x86/kernel/pci-gart_64.c | 13 +++++++++++++ include/asm-x86/amd_iommu_types.h | 6 ++++++ include/asm-x86/iommu.h | 1 - 8 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 40066ceb48fe..040ce30632b5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file isolate - enable device isolation (each device, as far as possible, will get its own protection domain) + fullflush - enable flushing of IO/TLB entries when + they are unmapped. Otherwise they are + flushed before they will be reused, which + is a lot of faster + amd_iommu_size= [HW,X86-64] Define the size of the aperture for the AMD IOMMU driver. Possible values are: @@ -893,10 +898,6 @@ and is between 256 and 4096 characters. It is defined in the file nomerge forcesac soft - fullflush - Flush IO/TLB at every deallocation - nofullflush - Flush IO/TLB only when addresses are reused (default) intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index c83c8e4bc8e5..b0c7b6c4abda 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -233,6 +233,8 @@ IOMMU (input/output memory management unit) iommu options only relevant to the AMD GART hardware IOMMU: Set the size of the remapping area in bytes. allowed Overwrite iommu off workarounds for specific chipsets. + fullflush Flush IOMMU on each allocation (default). + nofullflush Don't use IOMMU fullflush. leak Turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on). Default number of leak pages is 20. diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 70537d117a96..c19212191c98 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -948,7 +948,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) @@ -985,7 +985,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db0c83af44de..148fcfe22f17 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ int amd_iommu_isolate; /* if 1, device isolation is enabled */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -1144,7 +1145,7 @@ int __init amd_iommu_init(void) else printk("disabled\n"); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); @@ -1214,6 +1215,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "fullflush", 11) == 0) + amd_iommu_unmap_flush = true; } return 1; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d2f2c0158dc1..0a1408abcc62 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,15 +16,6 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -180,10 +171,6 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 3dcb1ad86e38..9e390f1bd46a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,6 +45,15 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush = 1; + /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -892,6 +901,10 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index f953309a636e..4ff892f3b0ad 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -376,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; /* will be 1 if device isolation is enabled */ extern int amd_iommu_isolate; +/* + * If true, the addresses will be flushed on unmap time, not when + * they are reused + */ +extern bool amd_iommu_unmap_flush; + /* takes a PCI device id and prints it out in a readable form */ static inline void print_devid(u16 devid, int nl) { diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 67b2fd56c6da..621a1af94c4c 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,7 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; -extern int iommu_fullflush; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From 9f6ac57729724b58df81ca5dc005326759a806fe Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:35 +0900 Subject: x86: export pci-nommu's alloc_coherent This patch exports nommu_alloc_coherent (renamed dma_generic_alloc_coherent). GART needs this function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu.c | 39 +-------------------------------------- include/asm-x86/dma-mapping.h | 3 +++ 3 files changed, 35 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc62..4e612d20170a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -134,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) EXPORT_SYMBOL(iommu_num_pages); #endif +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long dma_mask; + struct page *page; + dma_addr_t addr; + + dma_mask = dma_alloc_coherent_mask(dev, flag); + + flag |= __GFP_ZERO; +again: + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!page) + return NULL; + + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size)) { + __free_pages(page, get_order(size)); + + if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + return NULL; + } + + *dma_addr = addr; + return page_address(page); +} + /* * See for the iommu kernel parameter * documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 1c1c98a31d57..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,43 +72,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void * -nommu_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) -{ - unsigned long dma_mask; - int node; - struct page *page; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - - gfp |= __GFP_ZERO; - - node = dev_to_node(hwdev); -again: - page = alloc_pages_node(node, gfp, get_order(size)); - if (!page) - return NULL; - - addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { - free_pages((unsigned long)page_address(page), get_order(size)); - gfp |= GFP_DMA; - goto again; - } - - if (check_addr("alloc_coherent", hwdev, addr, size)) { - *dma_addr = addr; - flush_write_buffers(); - return page_address(page); - } - - free_pages((unsigned long)page_address(page), get_order(size)); - - return NULL; -} - static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr) { @@ -116,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, } struct dma_mapping_ops nommu_dma_ops = { - .alloc_coherent = nommu_alloc_coherent, + .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index f408e6dd1778..3b808e9bb72c 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -89,6 +89,9 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); +extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag); + static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) -- cgit v1.2.3 From bdab0ba3d9ad8de257ee6236daf314723748fde6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 15 Oct 2008 22:02:07 -0700 Subject: x86: rename iommu_num_pages function to iommu_nr_pages This series of patches re-introduces the iommu_num_pages function so that it can be used by each architecture specific IOMMU implementations. The series also changes IOMMU implementations for X86, Alpha, PowerPC and UltraSparc. The other implementations are not yet changed because the modifications required are not obvious and I can't test them on real hardware. This patch: This is a preparation patch for introducing a generic iommu_num_pages function. Signed-off-by: Joerg Roedel Cc: "David S. Miller" Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Ingo Molnar Cc: Thomas Gleixner Cc: FUJITA Tomonori Cc: Muli Ben-Yehuda Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/amd_iommu.c | 8 ++++---- arch/x86/kernel/pci-dma.c | 4 ++-- arch/x86/kernel/pci-gart_64.c | 8 ++++---- include/asm-x86/iommu.h | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel/pci-dma.c') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 34e4d112b1ef..10646acba9be 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -295,7 +295,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = iommu_num_pages(address, size); + unsigned pages = iommu_nr_pages(address, size); address &= PAGE_MASK; @@ -679,7 +679,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, + int pages = iommu_nr_pages(iommu->exclusion_start, iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -935,7 +935,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned long align_mask = 0; int i; - pages = iommu_num_pages(paddr, size); + pages = iommu_nr_pages(paddr, size); paddr &= PAGE_MASK; if (align) @@ -980,7 +980,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = iommu_num_pages(dma_addr, size); + pages = iommu_nr_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a3824e837b4..192624820217 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -125,13 +125,13 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } -unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) { unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); return size >> PAGE_SHIFT; } -EXPORT_SYMBOL(iommu_num_pages); +EXPORT_SYMBOL(iommu_nr_pages); #endif void *dma_generic_alloc_coherent(struct device *dev, size_t size, diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 145f1c83369f..14f1b41348fd 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -231,7 +231,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir, unsigned long align_mask) { - unsigned long npages = iommu_num_pages(phys_mem, size); + unsigned long npages = iommu_nr_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; @@ -285,7 +285,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = iommu_num_pages(dma_addr, size); + npages = iommu_nr_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -368,7 +368,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = iommu_num_pages(s->offset, s->length); + pages = iommu_nr_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -451,7 +451,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += iommu_num_pages(s->offset, s->length); + pages += iommu_nr_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 546ad3110fea..961e746da977 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -8,7 +8,7 @@ extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; -extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); +extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; -- cgit v1.2.3