diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-03 02:18:45 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-03 02:18:45 +0200 |
commit | 2fcd2b306aa80771e053275ed74b2dfe7e3d1434 (patch) | |
tree | b19f4e3e9552eab00056c833650e692192fe8f5c | |
parent | Merge branch 'sched-wait-for-linus' of git://git.kernel.org/pub/scm/linux/ker... (diff) | |
parent | dma-mapping: Don't clear GFP_ZERO in dma_alloc_attrs (diff) | |
download | linux-2fcd2b306aa80771e053275ed74b2dfe7e3d1434.tar.xz linux-2fcd2b306aa80771e053275ed74b2dfe7e3d1434.zip |
Merge branch 'x86-dma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 dma mapping updates from Ingo Molnar:
"This tree, by Christoph Hellwig, switches over the x86 architecture to
the generic dma-direct and swiotlb code, and also unifies more of the
dma-direct code between architectures. The now unused x86-only
primitives are removed"
* 'x86-dma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
dma-mapping: Don't clear GFP_ZERO in dma_alloc_attrs
swiotlb: Make swiotlb_{alloc,free}_buffer depend on CONFIG_DMA_DIRECT_OPS
dma/swiotlb: Remove swiotlb_{alloc,free}_coherent()
dma/direct: Handle force decryption for DMA coherent buffers in common code
dma/direct: Handle the memory encryption bit in common code
dma/swiotlb: Remove swiotlb_set_mem_attributes()
set_memory.h: Provide set_memory_{en,de}crypted() stubs
x86/dma: Remove dma_alloc_coherent_gfp_flags()
iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc,free}_coherent()
iommu/amd_iommu: Use CONFIG_DMA_DIRECT_OPS=y and dma_direct_{alloc,free}()
x86/dma/amd_gart: Use dma_direct_{alloc,free}()
x86/dma/amd_gart: Look at dev->coherent_dma_mask instead of GFP_DMA
x86/dma: Use generic swiotlb_ops
x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y)
x86/dma: Remove dma_alloc_coherent_mask()
30 files changed, 182 insertions, 558 deletions
diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 5b0a8a421894..b67e5fc1fe43 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,13 +2,13 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) { unsigned int offset = dev_addr & ~PAGE_MASK; return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index c7bb8a407041..7b335ab21697 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -10,7 +10,7 @@ * IP32 changes by Ilya. * Copyright (C) 2010 Cavium Networks, Inc. */ -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/scatterlist.h> #include <linux/bootmem.h> #include <linux/export.h> @@ -182,7 +182,7 @@ struct octeon_dma_map_ops { phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr); }; -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), struct octeon_dma_map_ops, @@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return ops->phys_to_dma(dev, paddr); } -EXPORT_SYMBOL(phys_to_dma); +EXPORT_SYMBOL(__phys_to_dma); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev), struct octeon_dma_map_ops, @@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return ops->dma_to_phys(dev, daddr); } -EXPORT_SYMBOL(dma_to_phys); +EXPORT_SYMBOL(__dma_to_phys); static struct octeon_dma_map_ops octeon_linear_dma_map_ops = { .dma_map_ops = { diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h index 138edf6b5b48..6eb1ee548b11 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h @@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); struct dma_map_ops; extern const struct dma_map_ops *octeon_pci_dma_map_ops; diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h b/arch/mips/include/asm/mach-loongson64/dma-coherence.h index b1b575f5c6c1..64fc44dec0a8 100644 --- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h @@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +extern phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, size_t size) { #ifdef CONFIG_CPU_LOONGSON3 - return phys_to_dma(dev, virt_to_phys(addr)); + return __phys_to_dma(dev, virt_to_phys(addr)); #else return virt_to_phys(addr) | 0x80000000; #endif @@ -41,7 +41,7 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, struct page *page) { #ifdef CONFIG_CPU_LOONGSON3 - return phys_to_dma(dev, page_to_phys(page)); + return __phys_to_dma(dev, page_to_phys(page)); #else return page_to_phys(page) | 0x80000000; #endif @@ -51,7 +51,7 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev, dma_addr_t dma_addr) { #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT) - return dma_to_phys(dev, dma_addr); + return __dma_to_phys(dev, dma_addr); #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT) return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff); #else diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 7bbcf89475f3..6a739f8ae110 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -63,7 +63,7 @@ static int loongson_dma_supported(struct device *dev, u64 mask) return swiotlb_dma_supported(dev, mask); } -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { long nid; #ifdef CONFIG_PHYS48_TO_HT40 @@ -75,7 +75,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr; } -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { long nid; #ifdef CONFIG_PHYS48_TO_HT40 diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index a5b59c765426..7702875aabb7 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -17,12 +17,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return addr + size - 1 <= *dev->dma_mask; } -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + get_dma_offset(dev); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { return daddr - get_dma_offset(dev); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a0fb8bc346d5..27fede438959 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,7 +54,6 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 - select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT @@ -83,6 +82,7 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS + select DMA_DIRECT_OPS select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS @@ -680,6 +680,7 @@ config X86_SUPPORTS_MEMORY_FAILURE config STA2X11 bool "STA2X11 Companion Chip Support" depends on X86_32_NON_STANDARD && PCI + select ARCH_HAS_PHYS_TO_DMA select X86_DEV_DMA_OPS select X86_DMA_REMAP select SWIOTLB diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 5e12c63b47aa..a8f6c809d9b1 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,6 +6,9 @@ struct dev_archdata { #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif +#ifdef CONFIG_STA2X11 + bool is_sta2x11; +#endif }; #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h index 1295bc622ebe..1a19251eaac9 100644 --- a/arch/x86/include/asm/dma-direct.h +++ b/arch/x86/include/asm/dma-direct.h @@ -2,29 +2,8 @@ #ifndef ASM_X86_DMA_DIRECT_H #define ASM_X86_DMA_DIRECT_H 1 -#include <linux/mem_encrypt.h> - -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ #endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6277c83c0eb1..89ce4bfd241f 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,37 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - -static inline unsigned long dma_alloc_coherent_mask(struct device *dev, - gfp_t gfp) -{ - unsigned long dma_mask = 0; - - dma_mask = dev->coherent_dma_mask; - if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); - - return dma_mask; -} - -static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) -{ - unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); - - if (dma_mask <= DMA_BIT_MASK(24)) - gfp |= GFP_DMA; -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - return gfp; -} - #endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d92eb40..baedab8ac538 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 8fe61ad21047..c0643831706e 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -49,8 +49,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); - bool sme_active(void); bool sev_active(void); diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 1c6a6cb230ff..ff6c92eff035 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void) { } #endif - -extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs); -extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73ccf80c09a2..02d6f5cf4e70 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index ecd486cb06ab..f299d8a479bb 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -480,30 +480,21 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs) { - dma_addr_t paddr; - unsigned long align_mask; - struct page *page; - - if (force_iommu && !(flag & GFP_DMA)) { - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - align_mask = (1UL << get_order(size)) - 1; - paddr = dma_map_area(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, align_mask); - - flush_gart(); - if (paddr != bad_dma_addr) { - *dma_addr = paddr; - return page_address(page); - } - __free_pages(page, get_order(size)); - } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag, - attrs); + void *vaddr; + + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + if (!vaddr || + !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) + return vaddr; + *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size, + DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1); + flush_gart(); + if (unlikely(*dma_addr == bad_dma_addr)) + goto out_free; + return vaddr; +out_free: + dma_direct_free(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -513,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -705,7 +696,7 @@ static const struct dma_map_ops gart_dma_ops = { .alloc = gart_alloc_coherent, .free = gart_free_coherent, .mapping_error = gart_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 35c461f21815..bbfc8b1e9104 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/crash_dump.h> #include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/bitmap.h> #include <linux/pci_ids.h> #include <linux/pci.h> @@ -445,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, npages = size >> PAGE_SHIFT; order = get_order(size); - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - /* alloc enough pages (and possibly more) */ ret = (void *)__get_free_pages(flag, order); if (!ret) @@ -493,7 +492,7 @@ static const struct dma_map_ops calgary_dma_ops = { .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, .mapping_error = calgary_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static inline void __iomem * busno_to_bbar(unsigned char num) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index df7ab02f959f..14437116ffea 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -18,7 +18,7 @@ static int forbid_dac __read_mostly; -const struct dma_map_ops *dma_ops = &nommu_dma_ops; +const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -76,70 +76,12 @@ void __init pci_iommu_alloc(void) } } } -void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - unsigned long dma_mask; - struct page *page; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(dev, flag); - -again: - page = NULL; - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(flag)) { - page = dma_alloc_from_contiguous(dev, count, get_order(size), - flag); - if (page) { - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - } - /* fallback */ - if (!page) - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); - if (!page) - return NULL; - - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dma_mask) { - __free_pages(page, get_order(size)); - - if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { - flag = (flag & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - return NULL; - } - memset(page_address(page), 0, size); - *dma_addr = addr; - return page_address(page); -} - -void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct page *page = virt_to_page(vaddr); - - if (!dma_release_from_contiguous(dev, page, count)) - free_pages((unsigned long)vaddr, get_order(size)); -} bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { if (!*dev) *dev = &x86_dma_fallback_dev; - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); - if (!is_device_dma_capable(*dev)) return false; return true; @@ -245,16 +187,6 @@ int arch_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(arch_dma_supported); -int x86_dma_supported(struct device *dev, u64 mask) -{ - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - return 1; -} - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ee0f8f34251..661583662430 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -17,52 +17,6 @@ int swiotlb __read_mostly; -void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) -{ - void *vaddr; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA - * memory allocation ultimately failed. - */ - flags |= __GFP_NOWARN; - - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, - attrs); - if (vaddr) - return vaddr; - - return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); -} - -void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs) -{ - if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) - swiotlb_free_coherent(dev, size, vaddr, dma_addr); - else - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); -} - -static const struct dma_map_ops x86_swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = x86_swiotlb_alloc_coherent, - .free = x86_swiotlb_free_coherent, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = NULL, -}; - /* * pci_swiotlb_detect_override - set swiotlb to 1 if necessary * @@ -112,7 +66,7 @@ void __init pci_swiotlb_init(void) { if (swiotlb) { swiotlb_init(0); - dma_ops = &x86_swiotlb_dma_ops; + dma_ops = &swiotlb_dma_ops; } } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 3a1b5fe4c2ca..b2de398d1fd3 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -195,67 +195,6 @@ void __init sme_early_init(void) swiotlb_force = SWIOTLB_FORCE; } -static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned long dma_mask; - unsigned int order; - struct page *page; - void *vaddr = NULL; - - dma_mask = dma_alloc_coherent_mask(dev, gfp); - order = get_order(size); - - /* - * Memory will be memset to zero after marking decrypted, so don't - * bother clearing it before. - */ - gfp &= ~__GFP_ZERO; - - page = alloc_pages_node(dev_to_node(dev), gfp, order); - if (page) { - dma_addr_t addr; - - /* - * Since we will be clearing the encryption bit, check the - * mask with it already cleared. - */ - addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); - if ((addr + size) > dma_mask) { - __free_pages(page, get_order(size)); - } else { - vaddr = page_address(page); - *dma_handle = addr; - } - } - - if (!vaddr) - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); - - if (!vaddr) - return NULL; - - /* Clear the SME encryption bit for DMA use if not swiotlb area */ - if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { - set_memory_decrypted((unsigned long)vaddr, 1 << order); - memset(vaddr, 0, PAGE_SIZE << order); - *dma_handle = __sme_clr(*dma_handle); - } - - return vaddr; -} - -static void sev_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - /* Set the SME encryption bit for re-use if not swiotlb area */ - if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) - set_memory_encrypted((unsigned long)vaddr, - 1 << get_order(size)); - - swiotlb_free_coherent(dev, size, vaddr, dma_handle); -} - static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -408,20 +347,6 @@ bool sev_active(void) } EXPORT_SYMBOL(sev_active); -static const struct dma_map_ops sev_dma_ops = { - .alloc = sev_alloc, - .free = sev_free, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, -}; - /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void) { @@ -432,12 +357,11 @@ void __init mem_encrypt_init(void) swiotlb_update_mem_attributes(); /* - * With SEV, DMA operations cannot use encryption. New DMA ops - * are required in order to mark the DMA areas as decrypted or - * to use bounce buffers. + * With SEV, DMA operations cannot use encryption, we need to use + * SWIOTLB to bounce buffer DMA operation. */ if (sev_active()) - dma_ops = &sev_dma_ops; + dma_ops = &swiotlb_dma_ops; /* * With SEV, we need to unroll the rep string I/O instructions. @@ -450,11 +374,3 @@ void __init mem_encrypt_init(void) : "Secure Memory Encryption (SME)"); } -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) -{ - WARN(PAGE_ALIGN(size) != size, - "size is not page-aligned (%#lx)\n", size); - - /* Make the SWIOTLB buffer area decrypted */ - set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); -} diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 75577c1490c4..7a5bafb76d77 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev) return p; } -/** - * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers - * returns virtual address. This is the only "special" function here. - * @dev: PCI device - * @size: Size of the buffer - * @dma_handle: DMA address - * @flags: memory flags - */ -static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flags, - unsigned long attrs) -{ - void *vaddr; - - vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); - *dma_handle = p2a(*dma_handle, to_pci_dev(dev)); - return vaddr; -} - -/* We have our own dma_ops: the same as swiotlb but from alloc (above) */ -static const struct dma_map_ops sta2x11_dma_ops = { - .alloc = sta2x11_swiotlb_alloc_coherent, - .free = x86_swiotlb_free_coherent, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = x86_dma_supported, -}; - /* At setup time, we use our own ops if the device is a ConneXt one */ static void sta2x11_setup_pdev(struct pci_dev *pdev) { @@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev) return; pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); - pdev->dev.dma_ops = &sta2x11_dma_ops; + pdev->dev.dma_ops = &swiotlb_dma_ops; + pdev->dev.archdata.is_sta2x11 = true; /* We must enable all devices as master, for audio DMA to work */ pci_set_master(pdev); @@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { struct sta2x11_mapping *map; - if (dev->dma_ops != &sta2x11_dma_ops) { + if (!dev->archdata.is_sta2x11) { if (!dev->dma_mask) return false; return addr + size - 1 <= *dev->dma_mask; @@ -243,13 +207,13 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } /** - * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device + * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device * @dev: device for a PCI device * @paddr: Physical address */ -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { - if (dev->dma_ops != &sta2x11_dma_ops) + if (!dev->archdata.is_sta2x11) return paddr; return p2a(paddr, to_pci_dev(dev)); } @@ -259,9 +223,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) * @dev: device for a PCI device * @daddr: STA2x11 AMBA DMA address */ -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { - if (dev->dma_ops != &sta2x11_dma_ops) + if (!dev->archdata.is_sta2x11) return daddr; return a2p(daddr, to_pci_dev(dev)); } diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f3a21343e636..df171cb85822 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -107,6 +107,7 @@ config IOMMU_PGTABLES_L2 # AMD IOMMU support config AMD_IOMMU bool "AMD IOMMU support" + select DMA_DIRECT_OPS select SWIOTLB select PCI_MSI select PCI_ATS @@ -142,6 +143,7 @@ config DMAR_TABLE config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) + select DMA_DIRECT_OPS select IOMMU_API select IOMMU_IOVA select DMAR_TABLE diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 74788fdeb773..83819d0cbf90 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -28,6 +28,7 @@ #include <linux/debugfs.h> #include <linux/scatterlist.h> #include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/iommu-helper.h> #include <linux/iommu.h> #include <linux/delay.h> @@ -2193,7 +2194,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->dma_ops = &nommu_dma_ops; + dev->dma_ops = &dma_direct_ops; goto out; } init_iommu_group(dev); @@ -2599,51 +2600,32 @@ static void *alloc_coherent(struct device *dev, size_t size, unsigned long attrs) { u64 dma_mask = dev->coherent_dma_mask; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct page *page; - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) { - page = alloc_pages(flag, get_order(size)); - *dma_addr = page_to_phys(page); - return page_address(page); - } else if (IS_ERR(domain)) - return NULL; + struct protection_domain *domain = get_domain(dev); + bool is_direct = false; + void *virt_addr; - dma_dom = to_dma_ops_domain(domain); - size = PAGE_ALIGN(size); - dma_mask = dev->coherent_dma_mask; - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - flag |= __GFP_ZERO; - - page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); - if (!page) { - if (!gfpflags_allow_blocking(flag)) - return NULL; - - page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size), flag); - if (!page) + if (IS_ERR(domain)) { + if (PTR_ERR(domain) != -EINVAL) return NULL; + is_direct = true; } + virt_addr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + if (!virt_addr || is_direct) + return virt_addr; + if (!dma_mask) dma_mask = *dev->dma_mask; - *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), - size, DMA_BIDIRECTIONAL, dma_mask); - + *dma_addr = __map_single(dev, to_dma_ops_domain(domain), + virt_to_phys(virt_addr), PAGE_ALIGN(size), + DMA_BIDIRECTIONAL, dma_mask); if (*dma_addr == AMD_IOMMU_MAPPING_ERROR) goto out_free; - - return page_address(page); + return virt_addr; out_free: - - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); - + dma_direct_free(dev, size, virt_addr, *dma_addr, attrs); return NULL; } @@ -2654,24 +2636,17 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr, unsigned long attrs) { - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct page *page; + struct protection_domain *domain = get_domain(dev); - page = virt_to_page(virt_addr); size = PAGE_ALIGN(size); - domain = get_domain(dev); - if (IS_ERR(domain)) - goto free_mem; + if (!IS_ERR(domain)) { + struct dma_ops_domain *dma_dom = to_dma_ops_domain(domain); - dma_dom = to_dma_ops_domain(domain); - - __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); + __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); + } -free_mem: - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); + dma_direct_free(dev, size, virt_addr, dma_addr, attrs); } /* @@ -2680,7 +2655,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - if (!x86_dma_supported(dev, mask)) + if (!dma_direct_supported(dev, mask)) return 0; return check_device(dev); } @@ -2794,7 +2769,7 @@ int __init amd_iommu_init_dma_ops(void) * continue to be SWIOTLB. */ if (!swiotlb) - dma_ops = &nommu_dma_ops; + dma_ops = &dma_direct_ops; if (amd_iommu_unmap_flush) pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n"); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 582fd01cb7d1..24d1b1b42013 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/dmar.h> #include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/mempool.h> #include <linux/memory.h> #include <linux/cpu.h> @@ -45,6 +46,7 @@ #include <linux/pci-ats.h> #include <linux/memblock.h> #include <linux/dma-contiguous.h> +#include <linux/dma-direct.h> #include <linux/crash_dump.h> #include <asm/irq_remapping.h> #include <asm/cacheflush.h> @@ -3707,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - struct page *page = NULL; - int order; + void *vaddr; - size = PAGE_ALIGN(size); - order = get_order(size); + vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs); + if (iommu_no_mapping(dev) || !vaddr) + return vaddr; - if (!iommu_no_mapping(dev)) - flags &= ~(GFP_DMA | GFP_DMA32); - else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { - if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) - flags |= GFP_DMA; - else - flags |= GFP_DMA32; - } - - if (gfpflags_allow_blocking(flags)) { - unsigned int count = size >> PAGE_SHIFT; - - page = dma_alloc_from_contiguous(dev, count, order, flags); - if (page && iommu_no_mapping(dev) && - page_to_phys(page) + size > dev->coherent_dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - - if (!page) - page = alloc_pages(flags, order); - if (!page) - return NULL; - memset(page_address(page), 0, size); - - *dma_handle = __intel_map_single(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); - if (*dma_handle) - return page_address(page); - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, order); + *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr), + PAGE_ALIGN(size), DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); + if (!*dma_handle) + goto out_free_pages; + return vaddr; +out_free_pages: + dma_direct_free(dev, size, vaddr, *dma_handle, attrs); return NULL; } static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - int order; - struct page *page = virt_to_page(vaddr); - - size = PAGE_ALIGN(size); - order = get_order(size); - - intel_unmap(dev, dma_handle, size); - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, order); + if (!iommu_no_mapping(dev)) + intel_unmap(dev, dma_handle, PAGE_ALIGN(size)); + dma_direct_free(dev, size, vaddr, dma_handle, attrs); } static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, @@ -3871,7 +3842,7 @@ const struct dma_map_ops intel_dma_ops = { .unmap_page = intel_unmap_page, .mapping_error = intel_mapping_error, #ifdef CONFIG_X86 - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, #endif }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 5bb72d3f8337..e1c60899fdbc 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -53,20 +53,6 @@ * API. */ -#ifndef CONFIG_X86 -static unsigned long dma_alloc_coherent_mask(struct device *dev, - gfp_t gfp) -{ - unsigned long dma_mask = 0; - - dma_mask = dev->coherent_dma_mask; - if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); - - return dma_mask; -} -#endif - #define XEN_SWIOTLB_ERROR_CODE (~(dma_addr_t)0x0) static char *xen_io_tlb_start, *xen_io_tlb_end; @@ -328,7 +314,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return ret; if (hwdev && hwdev->coherent_dma_mask) - dma_mask = dma_alloc_coherent_mask(hwdev, flags); + dma_mask = hwdev->coherent_dma_mask; /* At this point dma_handle is the physical address, next we are * going to set it to the machine address. diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index bcdb1a3e4b1f..53ad6a47f513 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -3,18 +3,19 @@ #define _LINUX_DMA_DIRECT_H 1 #include <linux/dma-mapping.h> +#include <linux/mem_encrypt.h> #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include <asm/dma-direct.h> #else -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dev_addr = (dma_addr_t)paddr; return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) { phys_addr_t paddr = (phys_addr_t)dev_addr; @@ -30,6 +31,22 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ +/* + * If memory encryption is supported, phys_to_dma will set the memory encryption + * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma + * and __dma_to_phys versions should only be used on non-encrypted memory for + * special occasions like DMA coherent buffers. + */ +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(__phys_to_dma(dev, paddr)); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(__dma_to_phys(dev, daddr)); +} + #ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN void dma_mark_clean(void *addr, size_t size); #else diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eb9eab4ecd6d..12fedcba9a9a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -518,12 +518,8 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; - /* - * Let the implementation decide on the zone to allocate from, and - * decide on the way of zeroing the memory given that the memory - * returned should always be zeroed. - */ - flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_ZERO); + /* let the implementation decide on the zone to allocate from: */ + flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); if (!arch_dma_alloc_attrs(&dev, &flag)) return NULL; diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index e5140648f638..da5178216da5 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT +static inline int set_memory_encrypted(unsigned long addr, int numpages) +{ + return 0; +} + +static inline int set_memory_decrypted(unsigned long addr, int numpages) +{ + return 0; +} +#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ + #endif /* _LINUX_SET_MEMORY_H_ */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5b1f2a00491c..965be92c33b5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, void swiotlb_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -extern void -*swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags); - -extern void -swiotlb_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle); - extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, diff --git a/lib/dma-direct.c b/lib/dma-direct.c index c9e8e21cb334..c0bba30fef0a 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -9,6 +9,7 @@ #include <linux/scatterlist.h> #include <linux/dma-contiguous.h> #include <linux/pfn.h> +#include <linux/set_memory.h> #define DIRECT_MAPPING_ERROR 0 @@ -20,6 +21,14 @@ #define ARCH_ZONE_DMA_BITS 24 #endif +/* + * For AMD SEV all DMA must be to unencrypted addresses. + */ +static inline bool force_dma_unencrypted(void) +{ + return sev_active(); +} + static bool check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, const char *caller) @@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask; + dma_addr_t addr = force_dma_unencrypted() ? + __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); + return addr + size - 1 <= dev->coherent_dma_mask; } void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, @@ -46,6 +57,10 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; int page_order = get_order(size); struct page *page = NULL; + void *ret; + + /* we always manually zero the memory once we are done: */ + gfp &= ~__GFP_ZERO; /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) @@ -78,10 +93,15 @@ again: if (!page) return NULL; - - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - memset(page_address(page), 0, size); - return page_address(page); + ret = page_address(page); + if (force_dma_unencrypted()) { + set_memory_decrypted((unsigned long)ret, 1 << page_order); + *dma_handle = __phys_to_dma(dev, page_to_phys(page)); + } else { + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + } + memset(ret, 0, size); + return ret; } /* @@ -92,9 +112,12 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int page_order = get_order(size); + if (force_dma_unencrypted()) + set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, get_order(size)); + free_pages((unsigned long)cpu_addr, page_order); } static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index c43ec2271469..47aeb04c1997 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -31,6 +31,7 @@ #include <linux/gfp.h> #include <linux/scatterlist.h> #include <linux/mem_encrypt.h> +#include <linux/set_memory.h> #include <asm/io.h> #include <asm/dma.h> @@ -156,22 +157,6 @@ unsigned long swiotlb_size_or_default(void) return size ? size : (IO_TLB_DEFAULT_SIZE); } -void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { } - -/* For swiotlb, clear memory encryption mask from dma addresses */ -static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev, - phys_addr_t address) -{ - return __sme_clr(phys_to_dma(hwdev, address)); -} - -/* Note that this doesn't work with highmem page */ -static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, - volatile void *address) -{ - return phys_to_dma(hwdev, virt_to_phys(address)); -} - static bool no_iotlb_memory; void swiotlb_print_info(void) @@ -209,12 +194,12 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(io_tlb_start); bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - swiotlb_set_mem_attributes(vaddr, bytes); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); vaddr = phys_to_virt(io_tlb_overflow_buffer); bytes = PAGE_ALIGN(io_tlb_overflow); - swiotlb_set_mem_attributes(vaddr, bytes); + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); } @@ -355,7 +340,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_start = virt_to_phys(tlb); io_tlb_end = io_tlb_start + bytes; - swiotlb_set_mem_attributes(tlb, bytes); + set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); memset(tlb, 0, bytes); /* @@ -366,7 +351,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!v_overflow_buffer) goto cleanup2; - swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow); + set_memory_decrypted((unsigned long)v_overflow_buffer, + io_tlb_overflow >> PAGE_SHIFT); memset(v_overflow_buffer, 0, io_tlb_overflow); io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); @@ -622,7 +608,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size, return SWIOTLB_MAP_ERROR; } - start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start); + start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -706,6 +692,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } +#ifdef CONFIG_DMA_DIRECT_OPS static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, size_t size) { @@ -726,12 +713,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, goto out_warn; phys_addr = swiotlb_tbl_map_single(dev, - swiotlb_phys_to_dma(dev, io_tlb_start), + __phys_to_dma(dev, io_tlb_start), 0, size, DMA_FROM_DEVICE, 0); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; - *dma_handle = swiotlb_phys_to_dma(dev, phys_addr); + *dma_handle = __phys_to_dma(dev, phys_addr); if (dma_coherent_ok(dev, *dma_handle, size)) goto out_unmap; @@ -759,28 +746,6 @@ out_warn: return NULL; } -void * -swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) -{ - int order = get_order(size); - unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0; - void *ret; - - ret = (void *)__get_free_pages(flags, order); - if (ret) { - *dma_handle = swiotlb_virt_to_bus(hwdev, ret); - if (dma_coherent_ok(hwdev, *dma_handle, size)) { - memset(ret, 0, size); - return ret; - } - free_pages((unsigned long)ret, order); - } - - return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs); -} -EXPORT_SYMBOL(swiotlb_alloc_coherent); - static bool swiotlb_free_buffer(struct device *dev, size_t size, dma_addr_t dma_addr) { @@ -799,15 +764,7 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, DMA_ATTR_SKIP_CPU_SYNC); return true; } - -void -swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, - dma_addr_t dev_addr) -{ - if (!swiotlb_free_buffer(hwdev, size, dev_addr)) - free_pages((unsigned long)vaddr, get_order(size)); -} -EXPORT_SYMBOL(swiotlb_free_coherent); +#endif static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, @@ -867,10 +824,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, map = map_single(dev, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } - dev_addr = swiotlb_phys_to_dma(dev, map); + dev_addr = __phys_to_dma(dev, map); /* Ensure that the address returned is DMA'ble */ if (dma_capable(dev, dev_addr, size)) @@ -879,7 +836,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer); + return __phys_to_dma(dev, io_tlb_overflow_buffer); } /* @@ -1009,7 +966,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sg_dma_len(sgl) = 0; return 0; } - sg->dma_address = swiotlb_phys_to_dma(hwdev, map); + sg->dma_address = __phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; sg_dma_len(sg) = sg->length; @@ -1073,7 +1030,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { - return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer)); + return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); } /* @@ -1085,7 +1042,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask; + return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } #ifdef CONFIG_DMA_DIRECT_OPS |