diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2019-07-01 06:04:39 +0200 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2019-07-01 06:04:39 +0200 |
commit | 8b8dc695143642c6a8bee2242f2f7af4232298ab (patch) | |
tree | c36e89fcf8594e923f14016dc33f12736fd0f5af | |
parent | KVM: PPC: Book3S HV: Clear pending decrementer exceptions on nested guest entry (diff) | |
parent | Merge tag 'powerpc-5.2-6' into fixes (diff) | |
download | linux-8b8dc695143642c6a8bee2242f2f7af4232298ab.tar.xz linux-8b8dc695143642c6a8bee2242f2f7af4232298ab.zip |
Merge branch 'fixes' into next
Merge our fixes branch into next, this brings in a number of commits
that fix bugs we don't want to hit in next, in particular the fix for
CVE-2019-12817.
21 files changed, 230 insertions, 24 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ac6eb9816b64..5faceeefd9f9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -884,6 +884,23 @@ static inline int pmd_present(pmd_t pmd) return false; } +static inline int pmd_is_serializing(pmd_t pmd) +{ + /* + * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear + * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate). + * + * This condition may also occur when flushing a pmd while flushing + * it (see ptep_modify_prot_start), so callers must ensure this + * case is fine as well. + */ + if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) == + cpu_to_be64(_PAGE_INVALID)) + return true; + + return false; +} + static inline int pmd_bad(pmd_t pmd) { if (radix_enabled()) @@ -1100,6 +1117,19 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_access_permitted pmd_access_permitted static inline bool pmd_access_permitted(pmd_t pmd, bool write) { + /* + * pmdp_invalidate sets this combination (which is not caught by + * !pte_present() check in pte_access_permitted), to prevent + * lock-free lookups, as part of the serialize_against_pte_lookup() + * synchronisation. + * + * This also catches the case where the PTE's hardware PRESENT bit is + * cleared while TLB is flushed, which is suboptimal but should not + * be frequent. + */ + if (pmd_is_serializing(pmd)) + return false; + return pte_access_permitted(pmd_pte(pmd), write); } diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h index 3ffad030393c..461b0f193864 100644 --- a/arch/powerpc/include/asm/btext.h +++ b/arch/powerpc/include/asm/btext.h @@ -13,7 +13,11 @@ extern void btext_update_display(unsigned long phys, int width, int height, int depth, int pitch); extern void btext_setup_display(int width, int height, int depth, int pitch, unsigned long address); +#ifdef CONFIG_PPC32 extern void btext_prepare_BAT(void); +#else +static inline void btext_prepare_BAT(void) { } +#endif extern void btext_map(void); extern void btext_unmap(void); diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 4a585cba1787..c68476818753 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,6 +94,9 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } +void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, + unsigned long start_address) __noreturn; + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index dbc8c0679480..3d013e4696e9 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -323,6 +323,13 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include <asm/slice.h> +/* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. + */ +#ifdef CONFIG_PPC32 +#define ARCH_ZONE_DMA_BITS 30 +#else #define ARCH_ZONE_DMA_BITS 31 +#endif #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d4fd2ad5b7d..6c51aa845bce 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -315,7 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 755fab9641d6..c82947a3892a 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -757,6 +757,7 @@ __secondary_start: stw r0,0(r3) /* load up the MMU */ + bl load_segment_registers bl load_up_mmu /* ptr to phys current thread */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bfeb469e8106..2ae635df9026 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -83,7 +83,7 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -.macro SYSCALL_ENTRY trapno intno +.macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION @@ -94,7 +94,7 @@ BEGIN_FTR_SECTION mfspr r11, SPRN_SRR1 mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ bf 3, 1975f - b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 + b kvmppc_handler_\intno\()_\srr1 1975: mr r12, r13 lwz r13, THREAD_NORMSAVE(2)(r10) @@ -145,9 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,3 - add r11,r11,r9 + lwz r10, TASK_CPU(r2) + slwi r10, r10, 3 + add r11, r11, r10 #endif lwz r12,0(r11) mtspr SPRN_DBCR0,r12 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 6621f230cc37..2b39f42c3676 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -413,7 +413,7 @@ interrupt_base: /* System Call Interrupt */ START_EXCEPTION(SystemCall) - SYSCALL_ENTRY 0xc00 SYSCALL + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c index affe5dcce7f4..2b160d68db49 100644 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ b/arch/powerpc/kernel/machine_kexec_32.c @@ -30,7 +30,6 @@ typedef void (*relocate_new_kernel_t)( */ void default_machine_kexec(struct kimage *image) { - extern const unsigned char relocate_new_kernel[]; extern const unsigned int relocate_new_kernel_size; unsigned long page_list; unsigned long reboot_code_buffer, reboot_code_buffer_phys; @@ -58,6 +57,9 @@ void default_machine_kexec(struct kimage *image) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); + if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) + relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); + /* now call it */ rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer_phys, image->start); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 17f1ae7fae2c..a3fb90bb5a39 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2349,6 +2349,7 @@ static void __init prom_check_displays(void) prom_printf("W=%d H=%d LB=%d addr=0x%x\n", width, height, pitch, addr); btext_setup_display(width, height, 8, pitch, addr); + btext_prepare_BAT(); } #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 4cac45cb5de5..acf63ad8f4ce 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -27,7 +27,7 @@ fi WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -logo_linux_clut224 +logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 6035d24f1d1d..a46286f73eec 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -833,6 +833,7 @@ static void flush_guest_tlb(struct kvm *kvm) } } asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index f9b2620fbecd..bc18366cd1ba 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2507,17 +2507,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) LOAD_REG_ADDR(r11, dawr_force_enable) lbz r11, 0(r11) cmpdi r11, 0 + bne 3f li r3, H_HARDWARE - beqlr + blr +3: /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT clrrdi r4, r4, 3 std r4, VCPU_DAWR(r3) std r5, VCPU_DAWRX(r3) + /* + * If came in through the real mode hcall handler then it is necessary + * to write the registers since the return path won't. Otherwise it is + * sufficient to store then in the vcpu struct as they will be loaded + * next time the vcpu is run. + */ + mfmsr r6 + andi. r6, r6, MSR_DR /* in real mode? */ + bne 4f mtspr SPRN_DAWR, r4 mtspr SPRN_DAWRX, r5 - li r3, 0 +4: li r3, 0 blr _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index cb2b08635508..6d8f06b04022 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -55,20 +55,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL); - if (!mm->context.hash_context) { - ida_free(&mmu_context_ida, index); + if (!mm->context.hash_context) return -ENOMEM; - } /* * The old code would re-promote on fork, we don't do that when using @@ -96,13 +128,20 @@ static int hash__init_new_context(struct mm_struct *mm) mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); if (!mm->context.hash_context->spt) { - ida_free(&mmu_context_ida, index); kfree(mm->context.hash_context); return -ENOMEM; } } #endif + } + index = realloc_context_ids(&mm->context); + if (index < 0) { +#ifdef CONFIG_PPC_SUBPAGE_PROT + kfree(mm->context.hash_context->spt); +#endif + kfree(mm->context.hash_context); + return index; } pkey_mm_init(mm); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index ad3dd977c22d..953850a602f7 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -116,6 +116,9 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, /* * This ensures that generic code that rely on IRQ disabling * to prevent a parallel THP split work as expected. + * + * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires + * a special case check in pmd_access_permitted. */ serialize_against_pte_lookup(vma->vm_mm); return __pmd(old_pmd); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e885fe2aafcc..40bd4153ab09 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -253,7 +253,8 @@ void __init paging_init(void) (long int)((top_of_ram - total_ram) >> 20)); #ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffffffUL >> PAGE_SHIFT); + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index db4a6253df92..533fc6fa6726 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -372,13 +372,25 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, pdshift = PMD_SHIFT; pmdp = pmd_offset(&pud, ea); pmd = READ_ONCE(*pmdp); + /* - * A hugepage collapse is captured by pmd_none, because - * it mark the pmd none and do a hpte invalidate. + * A hugepage collapse is captured by this condition, see + * pmdp_collapse_flush. */ if (pmd_none(pmd)) return NULL; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * A hugepage split is captured by this condition, see + * pmdp_invalidate. + * + * Huge page modification can be caught here too. + */ + if (pmd_is_serializing(pmd)) + return NULL; +#endif + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { if (is_thp) *is_thp = true; diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index f834a19ed772..c02d8c503b29 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -7,6 +7,7 @@ config PPC_PMAC select PPC_INDIRECT_PCI if PPC32 select PPC_MPC106 if PPC32 select PPC_NATIVE + select ZONE_DMA if PPC32 default y config PPC_PMAC64 diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index ba919308fe30..d503b8764a8e 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -3,4 +3,5 @@ subpage_prot tempfile prot_sao segv_errors -wild_bctr
\ No newline at end of file +wild_bctr +large_vm_fork_separation
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 43d68420e363..f1fbc15800c4 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,8 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ + large_vm_fork_separation TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 +$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c new file mode 100644 index 000000000000..2363a7f3ab0d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that allocating memory beyond the memory limit and then forking is +// handled correctly, ie. the child is able to access the mappings beyond the +// memory limit and the child's writes are not visible to the parent. + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + + +static int test(void) +{ + int p2c[2], c2p[2], rc, status, c, *p; + unsigned long page_size; + pid_t pid; + + page_size = sysconf(_SC_PAGESIZE); + SKIP_IF(page_size != 65536); + + // Create a mapping at 512TB to allocate an extended_id + p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n"); + return 1; + } + + printf("parent writing %p = 1\n", p); + *p = 1; + + FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1); + + pid = fork(); + if (pid == 0) { + FAIL_IF(read(p2c[0], &c, 1) != 1); + + pid = getpid(); + printf("child writing %p = %d\n", p, pid); + *p = pid; + + FAIL_IF(write(c2p[1], &c, 1) != 1); + FAIL_IF(read(p2c[0], &c, 1) != 1); + exit(0); + } + + c = 0; + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(read(c2p[0], &c, 1) != 1); + + // Prevent compiler optimisation + barrier(); + + rc = 0; + printf("parent reading %p = %d\n", p, *p); + if (*p != 1) { + printf("Error: BUG! parent saw child's write! *p = %d\n", *p); + rc = 1; + } + + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(waitpid(pid, &status, 0) == -1); + FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status)); + + if (rc == 0) + printf("success: test completed OK\n"); + + return rc; +} + +int main(void) +{ + return test_harness(test, "large_vm_fork_separation"); +} |