diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2018-02-28 19:47:20 +0100 |
---|---|---|
committer | Will Deacon <will.deacon@arm.com> | 2018-03-06 19:52:32 +0100 |
commit | 1f85b42a691cd8329ba82dbcaeec80ac1231b32a (patch) | |
tree | da12a0975152204ecccb02c335e68cb3f09aed22 /arch/arm64/mm | |
parent | arm64: lse: Pass -fomit-frame-pointer to out-of-line ll/sc atomics (diff) | |
download | linux-1f85b42a691cd8329ba82dbcaeec80ac1231b32a.tar.xz linux-1f85b42a691cd8329ba82dbcaeec80ac1231b32a.zip |
arm64: Revert L1_CACHE_SHIFT back to 6 (64-byte cache line size)
Commit 97303480753e ("arm64: Increase the max granular size") increased
the cache line size to 128 to match Cavium ThunderX, apparently for some
performance benefit which could not be confirmed. This change, however,
has an impact on the network packets allocation in certain
circumstances, requiring slightly over a 4K page with a significant
performance degradation.
This patch reverts L1_CACHE_SHIFT back to 6 (64-byte cache line) while
keeping ARCH_DMA_MINALIGN at 128. The cache_line_size() function was
changed to default to ARCH_DMA_MINALIGN in the absence of a meaningful
CTR_EL0.CWG bit field.
In addition, if a system with ARCH_DMA_MINALIGN < CTR_EL0.CWG is
detected, the kernel will force swiotlb bounce buffering for all
non-coherent devices since DMA cache maintenance on sub-CWG ranges is
not safe, leading to data corruption.
Cc: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Cc: Timur Tabi <timur@codeaurora.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r-- | arch/arm64/mm/dma-mapping.c | 17 | ||||
-rw-r--r-- | arch/arm64/mm/init.c | 3 |
2 files changed, 19 insertions, 1 deletions
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..1e9dac8684ca 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,6 +33,7 @@ #include <asm/cacheflush.h> static int swiotlb __ro_after_init; +DEFINE_STATIC_KEY_FALSE(swiotlb_noncoherent_bounce); static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, bool coherent) @@ -504,6 +505,14 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; + if (WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), + TAINT_CPU_OUT_OF_SPEC, + "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + ARCH_DMA_MINALIGN, cache_line_size())) { + swiotlb = 1; + static_branch_enable(&swiotlb_noncoherent_bounce); + } + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -882,6 +891,14 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + /* + * Enable swiotlb for buffer bouncing if ARCH_DMA_MINALIGN < CWG. + * dma_capable() forces the actual bounce if the device is + * non-coherent. + */ + if (static_branch_unlikely(&swiotlb_noncoherent_bounce) && !coherent) + iommu = NULL; + if (!dev->dma_ops) dev->dma_ops = &arm64_swiotlb_dma_ops; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9f3c47acf8ff..664acf177799 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -586,7 +586,8 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT) || + ARCH_DMA_MINALIGN < cache_line_size()) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; |