diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 19:23:33 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 19:23:33 +0200 |
commit | c90fca951e90ba470a3dc6087667edffcf8db21b (patch) | |
tree | b131279a9290826e78884ee706cc0a4f1d82be4e /arch/powerpc/mm/tlb-radix.c | |
parent | Merge tag 'microblaze-v4.18-rc1' of git://git.monstr.eu/linux-2.6-microblaze (diff) | |
parent | powerpc/64s/radix: Fix missing ptesync in flush_cache_vmap (diff) | |
download | linux-c90fca951e90ba470a3dc6087667edffcf8db21b.tar.xz linux-c90fca951e90ba470a3dc6087667edffcf8db21b.zip |
Merge tag 'powerpc-4.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Notable changes:
- Support for split PMD page table lock on 64-bit Book3S (Power8/9).
- Add support for HAVE_RELIABLE_STACKTRACE, so we properly support
live patching again.
- Add support for patching barrier_nospec in copy_from_user() and
syscall entry.
- A couple of fixes for our data breakpoints on Book3S.
- A series from Nick optimising TLB/mm handling with the Radix MMU.
- Numerous small cleanups to squash sparse/gcc warnings from Mathieu
Malaterre.
- Several series optimising various parts of the 32-bit code from
Christophe Leroy.
- Removal of support for two old machines, "SBC834xE" and "C2K"
("GEFanuc,C2K"), which is why the diffstat has so many deletions.
And many other small improvements & fixes.
There's a few out-of-area changes. Some minor ftrace changes OK'ed by
Steve, and a fix to our powernv cpuidle driver. Then there's a series
touching mm, x86 and fs/proc/task_mmu.c, which cleans up some details
around pkey support. It was ack'ed/reviewed by Ingo & Dave and has
been in next for several weeks.
Thanks to: Akshay Adiga, Alastair D'Silva, Alexey Kardashevskiy, Al
Viro, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Arnd
Bergmann, Balbir Singh, Cédric Le Goater, Christophe Leroy, Christophe
Lombard, Colin Ian King, Dave Hansen, Fabio Estevam, Finn Thain,
Frederic Barrat, Gautham R. Shenoy, Haren Myneni, Hari Bathini, Ingo
Molnar, Jonathan Neuschäfer, Josh Poimboeuf, Kamalesh Babulal,
Madhavan Srinivasan, Mahesh Salgaonkar, Mark Greer, Mathieu Malaterre,
Matthew Wilcox, Michael Neuling, Michal Suchanek, Naveen N. Rao,
Nicholas Piggin, Nicolai Stange, Olof Johansson, Paul Gortmaker, Paul
Mackerras, Peter Rosin, Pridhiviraj Paidipeddi, Ram Pai, Rashmica
Gupta, Ravi Bangoria, Russell Currey, Sam Bobroff, Samuel
Mendoza-Jonas, Segher Boessenkool, Shilpasri G Bhat, Simon Guo,
Souptick Joarder, Stewart Smith, Thiago Jung Bauermann, Torsten Duwe,
Vaibhav Jain, Wei Yongjun, Wolfram Sang, Yisheng Xie, YueHaibing"
* tag 'powerpc-4.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (251 commits)
powerpc/64s/radix: Fix missing ptesync in flush_cache_vmap
cpuidle: powernv: Fix promotion from snooze if next state disabled
powerpc: fix build failure by disabling attribute-alias warning in pci_32
ocxl: Fix missing unlock on error in afu_ioctl_enable_p9_wait()
powerpc-opal: fix spelling mistake "Uniterrupted" -> "Uninterrupted"
powerpc: fix spelling mistake: "Usupported" -> "Unsupported"
powerpc/pkeys: Detach execute_only key on !PROT_EXEC
powerpc/powernv: copy/paste - Mask SO bit in CR
powerpc: Remove core support for Marvell mv64x60 hostbridges
powerpc/boot: Remove core support for Marvell mv64x60 hostbridges
powerpc/boot: Remove support for Marvell mv64x60 i2c controller
powerpc/boot: Remove support for Marvell MPSC serial controller
powerpc/embedded6xx: Remove C2K board support
powerpc/lib: optimise PPC32 memcmp
powerpc/lib: optimise 32 bits __clear_user()
powerpc/time: inline arch_vtime_task_switch()
powerpc/Makefile: set -mcpu=860 flag for the 8xx
powerpc: Implement csum_ipv6_magic in assembly
powerpc/32: Optimise __csum_partial()
powerpc/lib: Adjust .balign inside string functions for PPC32
...
Diffstat (limited to 'arch/powerpc/mm/tlb-radix.c')
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 366 |
1 files changed, 338 insertions, 28 deletions
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a5d7309c2d05..67a6e86d3e7e 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,6 +12,8 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/memblock.h> +#include <linux/mmu_context.h> +#include <linux/sched/mm.h> #include <asm/ppc-opcode.h> #include <asm/tlb.h> @@ -118,6 +120,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_lpid(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + +static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + + static inline void __tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -150,6 +199,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + static inline void fixup_tlbie(void) { unsigned long pid = 0; @@ -161,6 +226,16 @@ static inline void fixup_tlbie(void) } } +static inline void fixup_tlbie_lpid(unsigned long lpid) +{ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -214,6 +289,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + asm volatile("ptesync": : :"memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_lpid(lpid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_lpid(lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_lpid(lpid, RIC_FLUSH_ALL); + } + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + +static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid_guest(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); +} + + static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -268,6 +423,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long psize, unsigned long ric) +{ + unsigned long ap = mmu_get_ap(psize); + + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, ap, ric); + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + static inline void _tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize, bool also_pwc) @@ -340,6 +506,15 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_is_singlethreaded(struct mm_struct *mm) +{ + if (atomic_read(&mm->context.copros) > 0) + return false; + if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) + return true; + return false; +} + static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* @@ -347,10 +522,47 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) * caching PTEs and not flushing them properly when * RIC = 0 for a PID/LPID invalidate */ - return atomic_read(&mm->context.copros) != 0; + if (atomic_read(&mm->context.copros) > 0) + return true; + return false; } #ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + unsigned long pid = mm->context.id; + + if (current->mm == mm) + return; /* Local CPU */ + + if (current->active_mm == mm) { + /* + * Must be a kernel thread because sender is single-threaded. + */ + BUG_ON(current->mm); + mmgrab(&init_mm); + switch_mm(mm, &init_mm, current); + current->active_mm = &init_mm; + mmdrop(mm); + } + _tlbiel_pid(pid, RIC_FLUSH_ALL); +} + +static void exit_flush_lazy_tlbs(struct mm_struct *mm) +{ + /* + * Would be nice if this was async so it could be run in + * parallel with our local flush, but generic code does not + * give a good API for it. Could extend the generic code or + * make a special powerpc IPI for flushing TLBs. + * For now it's not too performance critical. + */ + smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, + (void *)mm, 1); + mm_reset_thread_local(mm); +} + void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; @@ -360,18 +572,30 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); + /* + * Order loads of mm_cpumask vs previous stores to clear ptes before + * the invalidate. See barrier in switch_mm_irqs_off + */ + smp_mb(); if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + exit_flush_lazy_tlbs(mm); + goto local; + } + if (mm_needs_flush_escalation(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbie_pid(pid, RIC_FLUSH_TLB); - } else + } else { +local: _tlbiel_pid(pid, RIC_FLUSH_TLB); + } preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); -void radix__flush_all_mm(struct mm_struct *mm) +static void __flush_all_mm(struct mm_struct *mm, bool fullmm) { unsigned long pid; @@ -380,12 +604,25 @@ void radix__flush_all_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) + smp_mb(); /* see radix__flush_tlb_mm */ + if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + if (!fullmm) { + exit_flush_lazy_tlbs(mm); + goto local; + } + } _tlbie_pid(pid, RIC_FLUSH_ALL); - else + } else { +local: _tlbiel_pid(pid, RIC_FLUSH_ALL); + } preempt_enable(); } +void radix__flush_all_mm(struct mm_struct *mm) +{ + __flush_all_mm(mm, false); +} EXPORT_SYMBOL(radix__flush_all_mm); void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) @@ -404,10 +641,17 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, return; preempt_disable(); - if (!mm_is_thread_local(mm)) + smp_mb(); /* see radix__flush_tlb_mm */ + if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + exit_flush_lazy_tlbs(mm); + goto local; + } _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); - else + } else { +local: _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + } preempt_enable(); } @@ -466,14 +710,22 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, return; preempt_disable(); - if (mm_is_thread_local(mm)) { - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } else { + smp_mb(); /* see radix__flush_tlb_mm */ + if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + if (end != TLB_FLUSH_ALL) { + exit_flush_lazy_tlbs(mm); + goto is_local; + } + } local = false; full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling); + } else { +is_local: + local = true; + full = (end == TLB_FLUSH_ALL || + nr_pages > tlb_local_single_page_flush_ceiling); } if (full) { @@ -534,6 +786,49 @@ static int radix_get_mmu_psize(int page_size) return psize; } +/* + * Flush partition scoped LPID address translation for all CPUs. + */ +void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size) +{ + int psize = radix_get_mmu_psize(page_size); + + _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); +} +EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); + +/* + * Flush partition scoped PWC from LPID for all CPUs. + */ +void radix__flush_pwc_lpid(unsigned int lpid) +{ + _tlbie_lpid(lpid, RIC_FLUSH_PWC); +} +EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); + +/* + * Flush partition scoped translations from LPID (=LPIDR) + */ +void radix__local_flush_tlb_lpid(unsigned int lpid) +{ + _tlbiel_lpid(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); + +/* + * Flush process scoped translations from LPID (=LPIDR). + * Important difference, the guest normally manages its own translations, + * but some cases e.g., vCPU CPU migration require KVM to flush. + */ +void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +{ + _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); + + static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize); @@ -551,7 +846,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) * See the comment for radix in arch_exit_mmap(). */ if (tlb->fullmm) { - radix__flush_all_mm(mm); + __flush_all_mm(mm, true); } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->need_flush_all) radix__flush_tlb_mm(mm); @@ -584,24 +879,33 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, return; preempt_disable(); - if (mm_is_thread_local(mm)) { - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } else { + smp_mb(); /* see radix__flush_tlb_mm */ + if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + if (end != TLB_FLUSH_ALL) { + exit_flush_lazy_tlbs(mm); + goto is_local; + } + } local = false; full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling); + } else { +is_local: + local = true; + full = (end == TLB_FLUSH_ALL || + nr_pages > tlb_local_single_page_flush_ceiling); } if (full) { - if (!local && mm_needs_flush_escalation(mm)) - also_pwc = true; - - if (local) + if (local) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); - else - _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL: RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + also_pwc = true; + + _tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); + } } else { if (local) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); @@ -642,11 +946,17 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); - - if (mm_is_thread_local(mm)) { - _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - } else { + smp_mb(); /* see radix__flush_tlb_mm */ + if (!mm_is_thread_local(mm)) { + if (unlikely(mm_is_singlethreaded(mm))) { + exit_flush_lazy_tlbs(mm); + goto local; + } _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + goto local; + } else { +local: + _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } preempt_enable(); |