From 58661a30f1bcc748475ffd9be6d2fc9e4e6be679 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:42 -0700 Subject: riscv: Flush the instruction cache during SMP bringup Instruction cache flush IPIs are sent only to CPUs in cpu_online_mask, so they will not target a CPU until it calls set_cpu_online() earlier in smp_callin(). As a result, if instruction memory is modified between the CPU coming out of reset and that point, then its instruction cache may contain stale data. Therefore, the instruction cache must be flushed after the set_cpu_online() synchronization point. Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smpboot.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index d41090fc3203..4b3c50da48ba 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -234,9 +234,10 @@ asmlinkage __visible void smp_callin(void) riscv_user_isa_enable(); /* - * Remote TLB flushes are ignored while the CPU is offline, so emit - * a local TLB flush right now just in case. + * Remote cache and TLB flushes are ignored while the CPU is offline, + * so flush them both right now just in case. */ + local_flush_icache_all(); local_flush_tlb_all(); complete(&cpu_running); /* -- cgit v1.2.3 From aaa56c8f378dd798f4a7f633cbf2eb129e98e6a4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:43 -0700 Subject: riscv: Factor out page table TLB synchronization The logic is the same for all page table levels. See commit 69be3fb111e7 ("riscv: enable MMU_GATHER_RCU_TABLE_FREE for SMP && MMU"). Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index deaf971253a2..b34587da8882 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,6 +15,14 @@ #define __HAVE_ARCH_PUD_FREE #include +static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) +{ + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, pt); + else + tlb_remove_ptdesc(tlb, pt); +} + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -102,10 +110,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, struct ptdesc *ptdesc = virt_to_ptdesc(pud); pagetable_pud_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } } @@ -139,12 +144,8 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { - if (pgtable_l5_enabled) { - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d)); - else - tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); - } + if (pgtable_l5_enabled) + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -176,10 +177,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, struct ptdesc *ptdesc = virt_to_ptdesc(pmd); pagetable_pmd_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -190,10 +188,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, struct ptdesc *ptdesc = page_ptdesc(pte); pagetable_pte_dtor(ptdesc); - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, ptdesc); - else - tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, ptdesc); } #endif /* CONFIG_MMU */ -- cgit v1.2.3 From dc892fb443224da7018891a5fac9cb6ac50c14b3 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:44 -0700 Subject: riscv: Use IPIs for remote cache/TLB flushes by default An IPI backend is always required in an SMP configuration, but an SBI implementation is not. For example, SBI will be unavailable when the kernel runs in M mode. For this reason, consider IPI delivery of cache and TLB flushes to be the base case, and any other implementation (such as the SBI remote fence extension) to be an optimization. Generally, if IPIs can be delivered without firmware assistance, they are assumed to be faster than SBI calls due to the SBI context switch overhead. However, when SBI is used as the IPI backend, then the context switch cost must be paid anyway, and performing the cache/TLB flush directly in the SBI implementation is more efficient than injecting an interrupt to S-mode. This is the only existing scenario where riscv_ipi_set_virq_range() is called with use_for_rfence set to false. sbi_ipi_init() already checks riscv_ipi_have_virq_range(), so it only calls riscv_ipi_set_virq_range() when no other IPI device is available. This allows moving the static key and dropping the use_for_rfence parameter. This decouples the static key from the irqchip driver probe order. Furthermore, the static branch only makes sense when CONFIG_RISCV_SBI is enabled. Optherwise, IPIs must be used. Add a fallback definition of riscv_use_sbi_for_rfence() which handles this case and removes the need to check CONFIG_RISCV_SBI elsewhere, such as in cacheflush.c. Reviewed-by: Anup Patel Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 7 ++++--- arch/riscv/include/asm/sbi.h | 4 ++++ arch/riscv/include/asm/smp.h | 15 ++------------- arch/riscv/kernel/sbi-ipi.c | 11 ++++++++++- arch/riscv/kernel/smp.c | 11 +---------- arch/riscv/mm/cacheflush.c | 5 ++--- arch/riscv/mm/tlbflush.c | 31 ++++++++++++++----------------- drivers/clocksource/timer-clint.c | 2 +- 8 files changed, 38 insertions(+), 48 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index b34587da8882..f52264304f77 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_PGALLOC_H #include +#include #include #ifdef CONFIG_MMU @@ -17,10 +18,10 @@ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_ipi_for_rfence()) - tlb_remove_page_ptdesc(tlb, pt); - else + if (riscv_use_sbi_for_rfence()) tlb_remove_ptdesc(tlb, pt); + else + tlb_remove_page_ptdesc(tlb, pt); } static inline void pmd_populate_kernel(struct mm_struct *mm, diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..ea84392ca9d7 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -375,8 +375,12 @@ unsigned long riscv_cached_marchid(unsigned int cpu_id); unsigned long riscv_cached_mimpid(unsigned int cpu_id); #if IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_RISCV_SBI) +DECLARE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +#define riscv_use_sbi_for_rfence() \ + static_branch_unlikely(&riscv_sbi_for_rfence) void sbi_ipi_init(void); #else +static inline bool riscv_use_sbi_for_rfence(void) { return false; } static inline void sbi_ipi_init(void) { } #endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 0d555847cde6..7ac80e9f2288 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -49,12 +49,7 @@ void riscv_ipi_disable(void); bool riscv_ipi_have_virq_range(void); /* Set the IPI interrupt numbers for arch (called by irqchip drivers) */ -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence); - -/* Check if we can use IPIs for remote FENCEs */ -DECLARE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -#define riscv_use_ipi_for_rfence() \ - static_branch_unlikely(&riscv_ipi_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr); /* Check other CPUs stop or not */ bool smp_crash_stop_failed(void); @@ -104,16 +99,10 @@ static inline bool riscv_ipi_have_virq_range(void) return false; } -static inline void riscv_ipi_set_virq_range(int virq, int nr, - bool use_for_rfence) +static inline void riscv_ipi_set_virq_range(int virq, int nr) { } -static inline bool riscv_use_ipi_for_rfence(void) -{ - return false; -} - #endif /* CONFIG_SMP */ #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index a4559695ce62..1026e22955cc 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -13,6 +13,9 @@ #include #include +DEFINE_STATIC_KEY_FALSE(riscv_sbi_for_rfence); +EXPORT_SYMBOL_GPL(riscv_sbi_for_rfence); + static int sbi_ipi_virq; static void sbi_ipi_handle(struct irq_desc *desc) @@ -72,6 +75,12 @@ void __init sbi_ipi_init(void) "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); - riscv_ipi_set_virq_range(virq, BITS_PER_BYTE, false); + riscv_ipi_set_virq_range(virq, BITS_PER_BYTE); pr_info("providing IPIs using SBI IPI extension\n"); + + /* + * Use the SBI remote fence extension to avoid + * the extra context switch needed to handle IPIs. + */ + static_branch_enable(&riscv_sbi_for_rfence); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 45dd4035416e..8e6eb64459af 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -171,10 +171,7 @@ bool riscv_ipi_have_virq_range(void) return (ipi_virq_base) ? true : false; } -DEFINE_STATIC_KEY_FALSE(riscv_ipi_for_rfence); -EXPORT_SYMBOL_GPL(riscv_ipi_for_rfence); - -void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) +void riscv_ipi_set_virq_range(int virq, int nr) { int i, err; @@ -197,12 +194,6 @@ void riscv_ipi_set_virq_range(int virq, int nr, bool use_for_rfence) /* Enabled IPIs for boot CPU immediately */ riscv_ipi_enable(); - - /* Update RFENCE static key */ - if (use_for_rfence) - static_branch_enable(&riscv_ipi_for_rfence); - else - static_branch_disable(&riscv_ipi_for_rfence); } static const char * const ipi_names[] = { diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index bc61ee5975e4..d76fc73e594b 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,7 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) + if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -69,8 +69,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI) && - !riscv_use_ipi_for_rfence()) { + } else if (riscv_use_sbi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..0435605b07d0 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,10 +79,10 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_ipi_for_rfence()) - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); - else + if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); + else + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); } struct flush_tlb_range_data { @@ -103,7 +103,6 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - struct flush_tlb_range_data ftd; bool broadcast; if (cpumask_empty(cmask)) @@ -119,20 +118,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, broadcast = true; } - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { + if (!broadcast) { local_flush_tlb_range_asid(start, size, stride, asid); + } else if (riscv_use_sbi_for_rfence()) { + sbi_remote_sfence_vma_asid(cmask, start, size, asid); + } else { + struct flush_tlb_range_data ftd; + + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } if (cmask != cpu_online_mask) diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c index 09fd292eb83d..0bdd9d7ec545 100644 --- a/drivers/clocksource/timer-clint.c +++ b/drivers/clocksource/timer-clint.c @@ -251,7 +251,7 @@ static int __init clint_timer_init_dt(struct device_node *np) } irq_set_chained_handler(clint_ipi_irq, clint_ipi_interrupt); - riscv_ipi_set_virq_range(rc, BITS_PER_BYTE, true); + riscv_ipi_set_virq_range(rc, BITS_PER_BYTE); clint_clear_ipi(); #endif -- cgit v1.2.3 From 038ac18aae935c89c874649acde5a82f588a12b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:45 -0700 Subject: riscv: mm: Broadcast kernel TLB flushes only when needed __flush_tlb_range() avoids broadcasting TLB flushes when an mm context is only active on the local CPU. Apply this same optimization to TLB flushes of kernel memory when only one CPU is online. This check can be constant-folded when SMP is disabled. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-5-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0435605b07d0..da821315d43e 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -103,22 +103,15 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - bool broadcast; + unsigned int cpu; if (cpumask_empty(cmask)) return; - if (cmask != cpu_online_mask) { - unsigned int cpuid; + cpu = get_cpu(); - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; - } else { - broadcast = true; - } - - if (!broadcast) { + /* Check if the TLB flush needs to be sent to other CPUs. */ + if (cpumask_any_but(cmask, cpu) >= nr_cpu_ids) { local_flush_tlb_range_asid(start, size, stride, asid); } else if (riscv_use_sbi_for_rfence()) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); @@ -132,8 +125,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } - if (cmask != cpu_online_mask) - put_cpu(); + put_cpu(); } static inline unsigned long get_mm_asid(struct mm_struct *mm) -- cgit v1.2.3 From 9546f00410ed88b8117388cfd74ea59f4616a158 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:46 -0700 Subject: riscv: Only send remote fences when some other CPU is online If no other CPU is online, a local cache or TLB flush is sufficient. These checks can be constant-folded when SMP is disabled. Signed-off-by: Samuel Holland Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240327045035.368512-6-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/cacheflush.c | 4 +++- arch/riscv/mm/tlbflush.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index d76fc73e594b..f5be1fec8191 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -21,7 +21,9 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + return; + else if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index da821315d43e..0901aa47b58f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -79,7 +79,9 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_sbi_for_rfence()) + if (num_online_cpus() < 2) + local_flush_tlb_all(); + else if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); else on_each_cpu(__ipi_flush_tlb_all, NULL, 1); -- cgit v1.2.3 From c6026d35b6abb5bd954788478bfa800a942e2033 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:47 -0700 Subject: riscv: mm: Combine the SMP and UP TLB flush code In SMP configurations, all TLB flushing narrower than flush_tlb_all() goes through __flush_tlb_range(). Do the same in UP configurations. This allows UP configurations to take advantage of recent improvements to the code in tlbflush.c, such as support for huge pages and flushing multiple-page ranges. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-7-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- arch/riscv/include/asm/tlbflush.h | 31 +++---------------------------- arch/riscv/mm/Makefile | 5 +---- 3 files changed, 5 insertions(+), 33 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index be09c8836d56..442532819a44 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -60,7 +60,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP && MMU + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4112cc8d1d69..4f86424b1ba5 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -27,12 +27,7 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } -#else /* CONFIG_MMU */ -#define local_flush_tlb_all() do { } while (0) -#define local_flush_tlb_page(addr) do { } while (0) -#endif /* CONFIG_MMU */ -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -54,28 +49,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); - -#else /* CONFIG_SMP && CONFIG_MMU */ - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb_all(); -} - -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - local_flush_tlb_all(); -} - -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ +#else /* CONFIG_MMU */ +#define local_flush_tlb_all() do { } while (0) +#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2c869f8026a8..cbe4d775ef56 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,14 +13,11 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o tlbflush.o obj-y += cacheflush.o obj-y += context.o obj-y += pmem.o -ifeq ($(CONFIG_MMU),y) -obj-$(CONFIG_SMP) += tlbflush.o -endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o -- cgit v1.2.3 From 20e03d702e00a3e0269a1d6f9549c2e370492054 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:48 -0700 Subject: riscv: Apply SiFive CIP-1200 workaround to single-ASID sfence.vma commit 3f1e782998cd ("riscv: add ASID-based tlbflushing methods") added calls to the sfence.vma instruction with rs2 != x0. These single-ASID instruction variants are also affected by SiFive errata CIP-1200. Until now, the errata workaround was not needed for the single-ASID sfence.vma variants, because they were only used when the ASID allocator was enabled, and the affected SiFive platforms do not support multiple ASIDs. However, we are going to start using those sfence.vma variants regardless of ASID support, so now we need alternatives covering them. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-8-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/errata_list.h | 12 +++++++++++- arch/riscv/include/asm/tlbflush.h | 19 ++++++++++++++++++- arch/riscv/mm/tlbflush.c | 23 ----------------------- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 1f2dbfb8a8bf..35ce26899960 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -43,11 +43,21 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \ CONFIG_ERRATA_SIFIVE_CIP_453) #else /* !__ASSEMBLY__ */ -#define ALT_FLUSH_TLB_PAGE(x) \ +#define ALT_SFENCE_VMA_ASID(asid) \ +asm(ALTERNATIVE("sfence.vma x0, %0", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (asid) : "memory") + +#define ALT_SFENCE_VMA_ADDR(addr) \ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ : : "r" (addr) : "memory") +#define ALT_SFENCE_VMA_ADDR_ASID(addr, asid) \ +asm(ALTERNATIVE("sfence.vma %0, %1", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (addr), "r" (asid) : "memory") + /* * _val is marked as "will be overwritten", so need to set it to 0 * in the default case. diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 4f86424b1ba5..463b615d7728 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,10 +22,27 @@ static inline void local_flush_tlb_all(void) __asm__ __volatile__ ("sfence.vma" : : : "memory"); } +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ASID(asid); + else + local_flush_tlb_all(); +} + /* Flush one page from local TLB */ static inline void local_flush_tlb_page(unsigned long addr) { - ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); + ALT_SFENCE_VMA_ADDR(addr); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + ALT_SFENCE_VMA_ADDR_ASID(addr, asid); + else + local_flush_tlb_page(addr); } void flush_tlb_all(void); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 0901aa47b58f..ad7bdcfcc219 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -7,29 +7,6 @@ #include #include -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); - else - local_flush_tlb_all(); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); - else - local_flush_tlb_page(addr); -} - /* * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. -- cgit v1.2.3 From d6dcdabafcd7c612b164079d00da6d9775863a0b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:49 -0700 Subject: riscv: Avoid TLB flush loops when affected by SiFive CIP-1200 Implementations affected by SiFive errata CIP-1200 have a bug which forces the kernel to always use the global variant of the sfence.vma instruction. When affected by this errata, do not attempt to flush a range of addresses; each iteration of the loop would actually flush the whole TLB instead. Instead, minimize the overall number of sfence.vma instructions. Signed-off-by: Samuel Holland Reviewed-by: Yunhui Cui Link: https://lore.kernel.org/r/20240327045035.368512-9-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/sifive/errata.c | 5 +++++ arch/riscv/include/asm/tlbflush.h | 2 ++ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 3d9a32d791f7..716cfedad3a2 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -42,6 +42,11 @@ static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long imp return false; if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626) return false; + +#ifdef CONFIG_MMU + tlb_flush_all_threshold = 0; +#endif + return true; } diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 463b615d7728..8e329721375b 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -66,6 +66,8 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, unsigned long uaddr); void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); + +extern unsigned long tlb_flush_all_threshold; #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ad7bdcfcc219..18af7b5053af 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -11,7 +11,7 @@ * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. */ -static unsigned long tlb_flush_all_threshold __read_mostly = 64; +unsigned long tlb_flush_all_threshold __read_mostly = 64; static void local_flush_tlb_range_threshold_asid(unsigned long start, unsigned long size, -- cgit v1.2.3 From 74cd17792d28162fe692d5a25fe5cc081203ad19 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:50 -0700 Subject: riscv: mm: Introduce cntx2asid/cntx2version helper macros When using the ASID allocator, the MM context ID contains two values: the ASID in the lower bits, and the allocator version number in the remaining bits. Use macros to make this separation more obvious. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-10-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 3 +++ arch/riscv/mm/context.c | 12 ++++++------ arch/riscv/mm/tlbflush.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 355504b37f8e..a550fbf770be 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,6 +26,9 @@ typedef struct { #endif } mm_context_t; +#define cntx2asid(cntx) ((cntx) & asid_mask) +#define cntx2version(cntx) ((cntx) & ~asid_mask) + void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ba8eb3944687..b562b3c44487 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -81,7 +81,7 @@ static void __flush_context(void) if (cntx == 0) cntx = per_cpu(reserved_context, i); - __set_bit(cntx & asid_mask, context_asid_map); + __set_bit(cntx2asid(cntx), context_asid_map); per_cpu(reserved_context, i) = cntx; } @@ -102,7 +102,7 @@ static unsigned long __new_context(struct mm_struct *mm) lockdep_assert_held(&context_lock); if (cntx != 0) { - unsigned long newcntx = ver | (cntx & asid_mask); + unsigned long newcntx = ver | cntx2asid(cntx); /* * If our current CONTEXT was active during a rollover, we @@ -115,7 +115,7 @@ static unsigned long __new_context(struct mm_struct *mm) * We had a valid CONTEXT in a previous life, so try to * re-use it if possible. */ - if (!__test_and_set_bit(cntx & asid_mask, context_asid_map)) + if (!__test_and_set_bit(cntx2asid(cntx), context_asid_map)) return newcntx; } @@ -168,7 +168,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) */ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu)); if (old_active_cntx && - ((cntx & ~asid_mask) == atomic_long_read(¤t_version)) && + (cntx2version(cntx) == atomic_long_read(¤t_version)) && atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu), old_active_cntx, cntx)) goto switch_mm_fast; @@ -177,7 +177,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current_version. */ cntx = atomic_long_read(&mm->context.id); - if ((cntx & ~asid_mask) != atomic_long_read(¤t_version)) { + if (cntx2version(cntx) != atomic_long_read(¤t_version)) { cntx = __new_context(mm); atomic_long_set(&mm->context.id, cntx); } @@ -191,7 +191,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | - ((cntx & asid_mask) << SATP_ASID_SHIFT) | + (cntx2asid(cntx) << SATP_ASID_SHIFT) | satp_mode); if (need_flush_tlb) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 18af7b5053af..35266dd9a9a2 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -110,7 +110,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { return static_branch_unlikely(&use_asid_allocator) ? - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; + cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; } void flush_tlb_mm(struct mm_struct *mm) -- cgit v1.2.3 From f58e5dc45fa93a2f2a0028381b2e06fe74ae9e2c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:51 -0700 Subject: riscv: mm: Use a fixed layout for the MM context ID Currently, the size of the ASID field in the MM context ID dynamically depends on the number of hardware-supported ASID bits. This requires reading a global variable to extract either field from the context ID. Instead, allocate the maximum possible number of bits to the ASID field, so the layout of the context ID is known at compile-time. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-11-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 4 ++-- arch/riscv/include/asm/tlbflush.h | 2 -- arch/riscv/mm/context.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index a550fbf770be..dc0273f7905f 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -26,8 +26,8 @@ typedef struct { #endif } mm_context_t; -#define cntx2asid(cntx) ((cntx) & asid_mask) -#define cntx2version(cntx) ((cntx) & ~asid_mask) +#define cntx2asid(cntx) ((cntx) & SATP_ASID_MASK) +#define cntx2version(cntx) ((cntx) & ~SATP_ASID_MASK) void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, pgprot_t prot); diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 8e329721375b..72e559934952 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -15,8 +15,6 @@ #define FLUSH_TLB_NO_ASID ((unsigned long)-1) #ifdef CONFIG_MMU -extern unsigned long asid_mask; - static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index b562b3c44487..5315af06cd4d 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -22,7 +22,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; -unsigned long asid_mask; static atomic_long_t current_version; @@ -128,7 +127,7 @@ static unsigned long __new_context(struct mm_struct *mm) goto set_asid; /* We're out of ASIDs, so increment current_version */ - ver = atomic_long_add_return_relaxed(num_asids, ¤t_version); + ver = atomic_long_add_return_relaxed(BIT(SATP_ASID_BITS), ¤t_version); /* Flush everything */ __flush_context(); @@ -247,7 +246,6 @@ static int __init asids_init(void) /* Pre-compute ASID details */ if (asid_bits) { num_asids = 1 << asid_bits; - asid_mask = num_asids - 1; } /* @@ -255,7 +253,7 @@ static int __init asids_init(void) * at-least twice more than CPUs */ if (num_asids > (2 * num_possible_cpus())) { - atomic_long_set(¤t_version, num_asids); + atomic_long_set(¤t_version, BIT(SATP_ASID_BITS)); context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL); if (!context_asid_map) -- cgit v1.2.3 From 8d3e7613f97e4c117467be126d9c0013e9937f77 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:52 -0700 Subject: riscv: mm: Make asid_bits a local variable This variable is only used inside asids_init(). Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-12-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 5315af06cd4d..0bf6d0070a14 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -20,7 +20,6 @@ DEFINE_STATIC_KEY_FALSE(use_asid_allocator); -static unsigned long asid_bits; static unsigned long num_asids; static atomic_long_t current_version; @@ -226,7 +225,7 @@ static inline void set_mm(struct mm_struct *prev, static int __init asids_init(void) { - unsigned long old; + unsigned long asid_bits, old; /* Figure-out number of ASID bits in HW */ old = csr_read(CSR_SATP); -- cgit v1.2.3 From 8fc21cc672e8ea7954fdc6b59d9ce350730e9c92 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:53 -0700 Subject: riscv: mm: Preserve global TLB entries when switching contexts If the CPU does not support multiple ASIDs, all MM contexts use ASID 0. In this case, it is still beneficial to flush the TLB by ASID, as the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. This optimization is recommended by the RISC-V privileged specification: If the implementation does not provide ASIDs, or software chooses to always use ASID 0, then after every satp write, software should execute SFENCE.VMA with rs1=x0. In the common case that no global translations have been modified, rs2 should be set to a register other than x0 but which contains the value zero, so that global translations are not flushed. It is not possible to apply this optimization when using the ASID allocator, because that code must flush the TLB for all ASIDs at once when incrementing the version number. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-13-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 0bf6d0070a14..60cb0b82240e 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -200,7 +200,7 @@ static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); - local_flush_tlb_all(); + local_flush_tlb_all_asid(0); } static inline void set_mm(struct mm_struct *prev, -- cgit v1.2.3 From daef19263fc102551d734f39d9ad417098ffb360 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 26 Mar 2024 21:49:54 -0700 Subject: riscv: mm: Always use an ASID to flush mm contexts Even if multiple ASIDs are not supported, using the single-ASID variant of the sfence.vma instruction preserves TLB entries for global (kernel) pages. So it is always more efficient to use the single-ASID code path. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327045035.368512-14-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 35266dd9a9a2..44e7ed4e194f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -109,8 +109,7 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, static inline unsigned long get_mm_asid(struct mm_struct *mm) { - return static_branch_unlikely(&use_asid_allocator) ? - cntx2asid(atomic_long_read(&mm->context.id)) : FLUSH_TLB_NO_ASID; + return cntx2asid(atomic_long_read(&mm->context.id)); } void flush_tlb_mm(struct mm_struct *mm) -- cgit v1.2.3