diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-11-09 10:37:10 +0100 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-11-09 10:37:10 +0100 |
commit | 9b085d6e889076928d307f05cabf76e35db1e63b (patch) | |
tree | 89c83eec98ed9bf07d07d8a7b0dea51564ecd61a /arch | |
parent | dt-bindings: serial: 8250_omap: Add compatible for UART controller on AM64 SoC (diff) | |
parent | Linux 5.10-rc3 (diff) | |
download | linux-9b085d6e889076928d307f05cabf76e35db1e63b.tar.xz linux-9b085d6e889076928d307f05cabf76e35db1e63b.zip |
Merge 5.10-rc3 into tty-next
We need the TTY/vt/serial fixes in here as well.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch')
52 files changed, 462 insertions, 294 deletions
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 17fd1ed700cc..9152782444b5 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -67,7 +67,22 @@ sr r5, [ARC_REG_LPB_CTRL] 1: #endif /* CONFIG_ARC_LPB_DISABLE */ -#endif + + /* On HSDK, CCMs need to remapped super early */ +#ifdef CONFIG_ARC_SOC_HSDK + mov r6, 0x60000000 + lr r5, [ARC_REG_ICCM_BUILD] + breq r5, 0, 1f + sr r6, [ARC_REG_AUX_ICCM] +1: + lr r5, [ARC_REG_DCCM_BUILD] + breq r5, 0, 2f + sr r6, [ARC_REG_AUX_DCCM] +2: +#endif /* CONFIG_ARC_SOC_HSDK */ + +#endif /* CONFIG_ISA_ARCV2 */ + ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations DSP_EARLY_INIT diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index feba91c9d969..b23986f98450 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -112,7 +112,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; @@ -132,6 +132,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b63fc095b99..b3ea1fa11f87 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(".data") = 0; #define ARC_CCM_UNUSED_ADDR 0x60000000 -static void __init hsdk_init_per_cpu(unsigned int cpu) -{ - /* - * By default ICCM is mapped to 0x7z while this area is used for - * kernel virtual mappings, so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].iccm.sz) - write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); - - /* - * By default DCCM is mapped to 0x8z while this area is used by kernel, - * so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].dccm.sz) - write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); -} #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) @@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = { MACHINE_START(SIMULATION, "hsdk") .dt_compat = hsdk_compat, .init_early = hsdk_init_early, - .init_per_cpu = hsdk_init_per_cpu, MACHINE_END diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5..c23dbf8bebee 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -354,8 +354,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1d466addb078..1515f6f153a0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1002,7 +1002,7 @@ config NUMA config NODES_SHIFT int "Maximum NUMA Nodes (as a power of 2)" range 1 10 - default "2" + default "4" depends on NEED_MULTIPLE_NODES help Specify the maximum number of NUMA Nodes available on the target diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161d..ec7720dbe2c8 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 0b298f48f5bf..657c921fd784 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a2..8699ce30f587 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include <linux/percpu.h> #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index af9987c154ca..66adee8b5fc8 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -43,7 +43,7 @@ static void *image_load(struct kimage *image, u64 flags, value; bool be_image, be_kernel; struct kexec_buf kbuf; - unsigned long text_offset; + unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; int ret; @@ -88,11 +88,37 @@ static void *image_load(struct kimage *image, /* Adjust kernel segment with TEXT_OFFSET */ kbuf.memsz += text_offset; - ret = kexec_add_buffer(&kbuf); - if (ret) + kernel_segment_number = image->nr_segments; + + /* + * The location of the kernel segment may make it impossible to satisfy + * the other segment requirements, so we try repeatedly to find a + * location that will work. + */ + while ((ret = kexec_add_buffer(&kbuf)) == 0) { + /* Try to load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, + kernel_segment->memsz, initrd, + initrd_len, cmdline); + if (!ret) + break; + + /* + * We couldn't find space for the other segments; erase the + * kernel segment and try the next available hole. + */ + image->nr_segments -= 1; + kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + } + + if (ret) { + pr_err("Could not find any suitable kernel location!"); return ERR_PTR(ret); + } - kernel_segment = &image->segment[image->nr_segments - 1]; + kernel_segment = &image->segment[kernel_segment_number]; kernel_segment->mem += text_offset; kernel_segment->memsz -= text_offset; image->start = kernel_segment->mem; @@ -101,12 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - /* Load additional data */ - ret = load_other_segments(image, - kernel_segment->mem, kernel_segment->memsz, - initrd, initrd_len, cmdline); - - return ERR_PTR(ret); + return 0; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 5b0e67b93cdc..03210f644790 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -240,6 +240,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) return ret; } +/* + * Tries to add the initrd and DTB to the image. If it is not possible to find + * valid locations, this function will undo changes to the image and return non + * zero. + */ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -248,7 +253,8 @@ int load_other_segments(struct kimage *image, { struct kexec_buf kbuf; void *headers, *dtb = NULL; - unsigned long headers_sz, initrd_load_addr = 0, dtb_len; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len, + orig_segments = image->nr_segments; int ret = 0; kbuf.image = image; @@ -334,6 +340,7 @@ int load_other_segments(struct kimage *image, return 0; out_err: + image->nr_segments = orig_segments; vfree(dtb); return ret; } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index deba738142ed..f11a1a1f7026 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ - void *addrs[1]; - u32 insns[1]; - - addrs[0] = addr; - insns[0] = opcode; - - return aarch64_insn_patch_text(addrs, insns, 1); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + kprobe_opcode_t *addr = p->ainsn.api.insn; + void *addrs[] = {addr, addr + 1}; + u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; + /* prepare insn slot */ - patch_text(p->ainsn.api.insn, p->opcode); + aarch64_insn_patch_text(addrs, insns, 2); - flush_icache_range((uintptr_t) (p->ainsn.api.insn), - (uintptr_t) (p->ainsn.api.insn) + - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); /* * Needs restoring of return address after stepping xol. @@ -128,13 +119,18 @@ void *alloc_insn_page(void) /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - patch_text(p->addr, BRK64_OPCODE_KPROBES); + void *addr = p->addr; + u32 insn = BRK64_OPCODE_KPROBES; + + aarch64_insn_patch_text(&addr, &insn, 1); } /* disarm kprobe: remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, p->opcode); + void *addr = p->addr; + + aarch64_insn_patch_text(&addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -163,20 +159,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated. */ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { kcb->saved_irqflag = regs->pstate & DAIF_MASK; - regs->pstate |= PSR_I_BIT; - /* Unmask PSTATE.D for enabling software step exceptions. */ - regs->pstate &= ~PSR_D_BIT; + regs->pstate |= DAIF_MASK; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -219,10 +210,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ - - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); instruction_pointer_set(regs, slot); } else { /* insn simulation */ @@ -273,12 +261,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) } /* call post handler */ kcb->kprobe_status = KPROBE_HIT_SSDONE; - if (cur->post_handler) { - /* post_handler can hit breakpoint and single step - * again, so we enable D-flag for recursive exception. - */ + if (cur->post_handler) cur->post_handler(cur, regs, 0); - } reset_current_kprobe(); } @@ -302,8 +286,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -365,10 +347,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) * pre-handler and it returned non-zero, it will * modify the execution path and no need to single * stepping. Let's just reset current kprobe and exit. - * - * pre_handler can hit a breakpoint and can step thru - * before return, keep PSTATE D-flag enabled until - * pre_handler return back. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); @@ -399,7 +377,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) } static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; @@ -409,16 +387,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) if (retval == DBG_HOOK_HANDLED) { kprobes_restore_local_irqflag(kcb, regs); - kernel_disable_single_step(); - post_kprobe_handler(kcb, regs); } return retval; } -static struct step_hook kprobes_step_hook = { - .fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { + .imm = KPROBES_BRK_SS_IMM, + .fn = kprobe_breakpoint_ss_handler, }; static int __kprobes @@ -486,7 +463,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { register_kernel_break_hook(&kprobes_break_hook); - register_kernel_step_hook(&kprobes_step_hook); + register_kernel_break_hook(&kprobes_break_ss_hook); return 0; } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 85ed2390fb99..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), "Bug: fault blocked by AP register !"); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 1d9ac0f9c794..0bd1b144eb76 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -33,19 +33,18 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MI_APG_INIT 0x40000000 - -/* - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MI_APG_KUEP 0x60000000 + * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say + * "all User" rules, that will lead to NA for all. + * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED + * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) + * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) + * 2 => User => 11 (all accesses performed according as user iaw page definition) + * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT + * => 10 (all accesses performed according to swaped page definition) for KUEP + * 4-15 => Not Used + */ +#define MI_APG_INIT 0xdc000000 +#define MI_APG_KUEP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -106,25 +105,9 @@ #define MD_Ks 0x80000000 /* Should not be set */ #define MD_Kp 0x40000000 /* Should always be set */ -/* - * All pages' PP data bits are set to either 000 or 011 or 001, which means - * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user and NA for ALL. - * Then we use the APG to say whether accesses are according to Page rules or - * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MD_APG_INIT 0x40000000 - -/* - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MD_APG_KUAP 0x60000000 +/* See explanation above at the definition of MI_APG_INIT */ +#define MD_APG_INIT 0xdc000000 +#define MD_APG_KUAP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 66f403a7da44..1581204467e1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -39,9 +39,9 @@ * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */ #define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */ -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_SPECIAL 0x0080 /* SW entry */ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ @@ -59,11 +59,12 @@ #define _PMD_PRESENT 0x0001 #define _PMD_PRESENT_MASK _PMD_PRESENT -#define _PMD_BAD 0x0fd0 +#define _PMD_BAD 0x0f90 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 -#define _PMD_USER 0x0020 /* APG 1 */ +#define _PMD_ACCESSED 0x0020 /* APG 1 */ +#define _PMD_USER 0x0040 /* APG 2 */ #define _PTE_NONE_MASK 0 diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8728590f514a..3beeb030cd78 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -6,6 +6,7 @@ struct device; struct device_node; +struct drmem_lmb; #ifdef CONFIG_NUMA @@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu) */ return (nid < 0) ? 0 : nid; } + +int of_drconf_to_nid_single(struct drmem_lmb *lmb); + #else static inline int early_cpu_to_node(int cpu) { return 0; } @@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) return 0; } -#endif /* CONFIG_NUMA */ +static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) +{ + return first_online_node; +} -struct drmem_lmb; -int of_drconf_to_nid_single(struct drmem_lmb *lmb); +#endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ef5bbb705c08..501c9a79038c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -178,7 +178,7 @@ do { \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -191,7 +191,7 @@ do { \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 6b50bf15d8c1..bf3270426d82 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) { struct pci_io_addr_range *piar; struct rb_node *n; + unsigned long flags; - spin_lock(&pci_io_addr_cache_root.piar_lock); + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); } - spin_unlock(&pci_io_addr_cache_root.piar_lock); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return 0; } diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 44c9018aed1b..a1ae00689e0f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r9, _PAGE_PRESENT -#endif andc. r9, r9, r11 /* Check permission */ bne 5f @@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r9, _PAGE_PRESENT | _PAGE_EXEC -#endif andc. r9, r9, r11 /* Check permission */ bne 5f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f359d3fba74..ee0bfebc375f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -202,9 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -224,25 +222,13 @@ InstructionTLBMiss: 3: mtcr r11 #endif -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 -#else - lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ - mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 -#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MI_TWC, r11 -#endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -256,9 +242,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -268,9 +252,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -297,30 +279,16 @@ DataStoreTLBMiss: mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ - /* Insert the Guarded flag into the TWC from the Linux PTE. + /* Insert Guarded and Accessed flags into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 - /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. - * We also need to know if the insn is a load/store, so: - * Clear _PAGE_PRESENT and load that which will - * trap into DTLB Error with store bit set accordinly. - */ - /* PRESENT=0x1, ACCESSED=0x20 - * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); - * r10 = (r10 & ~PRESENT) | r11; - */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -711,7 +679,7 @@ initial_mmu: li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ - li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ + li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID li r11, MI_BOOTINIT /* Create RPN for address 0 */ 1: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ @@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) li r0, 4 @@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb) LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb) #endif #ifdef CONFIG_PIN_TLB_IMMR LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) - LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED) mfspr r8, SPRN_IMMR rlwinm r8, r8, 0, 0xfff80000 ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..2aa16d5368e1 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -457,11 +457,7 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -523,11 +519,7 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -603,11 +595,7 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3c6b9822f978..8c2857cbd960 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu) /* Activate a secondary processor. */ void start_secondary(void *unused) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu = raw_smp_processor_id(); mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); + rcu_cpu_starting(cpu); preempt_disable(); cpu_callin_map[cpu] = 1; diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index c47e6b35c551..824b2c9da75b 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -476,7 +476,7 @@ do { \ do { \ long __kr_err; \ \ - __put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ goto err_label; \ } while (0) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 99e12faa5498..765b62434f30 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Linaro Limited * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 11e2a4fe66e0..7e849797c9c3 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -35,6 +35,10 @@ ENTRY(_start) .word 0 #endif .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ .dword 0x200000 @@ -42,6 +46,7 @@ ENTRY(_start) /* Image load offset(4MB) from start of RAM */ .dword 0x400000 #endif +#endif /* Effective size of kernel image */ .dword _end - _start .dword __HEAD_FLAGS diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 11ebee9e4c1d..3a19def868ec 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vdso.lds *.tmp +vdso-syms.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 7d6a94d45ec9..cb8f9e4cfcbf 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -43,19 +43,14 @@ $(obj)/vdso.o: $(obj)/vdso.so SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + -Wl,--build-id -Wl,--hash-style=both # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then # refer to these symbols in the kernel code rather than hand-coded addresses. - -SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id=sha1 -Wl,--hash-style=both -$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE - $(call if_changed,vdsold) - -LDFLAGS_vdso-syms.o := -r --just-symbols -$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE - $(call if_changed,ld) +$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE + $(call if_changed,so2s) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -73,6 +68,11 @@ quiet_cmd_vdsold = VDSOLD $@ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ rm $@.tmp +# Extracts symbol offsets from the VDSO, converting them into an assembly file +# that contains the same symbols at the same offsets. +quiet_cmd_so2s = SO2S $@ + cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ + # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh new file mode 100755 index 000000000000..e64cb6d9440e --- /dev/null +++ b/arch/riscv/kernel/vdso/so2s.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com> + +sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \ +| grep '^\.' diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 1359e21c0c62..3c8b9e433c67 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -86,6 +86,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; + unsigned long pfn; /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) @@ -100,7 +101,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * of a task switch. */ index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pfn = csr_read(CSR_SATP) & SATP_PPN; + pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ea933b789a88..8e577f14f120 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -154,9 +154,8 @@ disable: void __init setup_bootmem(void) { - phys_addr_t mem_size = 0; - phys_addr_t total_mem = 0; - phys_addr_t mem_start, start, end = 0; + phys_addr_t mem_start = 0; + phys_addr_t start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); u64 i; @@ -164,21 +163,18 @@ void __init setup_bootmem(void) /* Find the memory region containing the kernel */ for_each_mem_range(i, &start, &end) { phys_addr_t size = end - start; - if (!total_mem) + if (!mem_start) mem_start = start; if (start <= vmlinux_start && vmlinux_end <= end) BUG_ON(size == 0); - total_mem = total_mem + size; } /* - * Remove memblock from the end of usable area to the - * end of region + * The maximal physical memory size is -PAGE_OFFSET. + * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed + * as it is unusable by kernel. */ - mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET); - if (mem_start + mem_size < end) - memblock_remove(mem_start + mem_size, - end - mem_start - mem_size); + memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); @@ -297,6 +293,7 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { @@ -494,6 +491,18 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); +#ifndef __PAGETABLE_PMD_FOLDED + /* Setup early PMD for DTB */ + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); + /* Create two consecutive PMD mappings for FDT early scan */ + pa = dtb_pa & ~(PMD_SIZE - 1); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + pa, PMD_SIZE, PAGE_KERNEL); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else /* Create two consecutive PGD mappings for FDT early scan */ pa = dtb_pa & ~(PGDIR_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, @@ -501,6 +510,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); +#endif dtb_early_pa = dtb_pa; /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0784bf3caf43..a4d3c578fbd8 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -93,9 +93,10 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -378,7 +379,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y @@ -386,7 +386,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -689,6 +689,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -709,7 +710,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -753,6 +753,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m @@ -829,6 +830,7 @@ CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAULT_INJECTION_USERCOPY=y CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 905bc8c4cfaf..17d5df2c1eff 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -87,9 +87,10 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -371,7 +372,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y @@ -379,7 +379,7 @@ CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -680,6 +680,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -701,7 +702,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -745,6 +745,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8f67c55625f9..a302630341ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -17,11 +17,11 @@ CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y -# CONFIG_SECCOMP is not set # CONFIG_PFAULT is not set # CONFIG_S390_HYPFS_FS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set +# CONFIG_SECCOMP is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6b8d8c69b1a1..b5dbae78969b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -692,16 +692,6 @@ static inline int pud_large(pud_t pud) return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } -static inline unsigned long pud_pfn(pud_t pud) -{ - unsigned long origin_mask; - - origin_mask = _REGION_ENTRY_ORIGIN; - if (pud_large(pud)) - origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; - return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_leaf pmd_large static inline int pmd_large(pmd_t pmd) { @@ -747,16 +737,6 @@ static inline int pmd_none(pmd_t pmd) return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - unsigned long origin_mask; - - origin_mask = _SEGMENT_ENTRY_ORIGIN; - if (pmd_large(pmd)) - origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; - return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { @@ -1238,11 +1218,39 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) -#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +static inline unsigned long pmd_deref(pmd_t pmd) +{ + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return pmd_val(pmd) & origin_mask; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return pmd_deref(pmd) >> PAGE_SHIFT; +} + +static inline unsigned long pud_deref(pud_t pud) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + if (pud_large(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return pud_val(pud) & origin_mask; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + return pud_deref(pud) >> PAGE_SHIFT; +} + /* * The pgd_offset function *always* adds the index for the top-level * region/segment table. This is done to get a sequence like the diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/s390/include/asm/vdso/vdso.h +++ /dev/null diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ece58f2217cb..2012c1cf0853 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -61,14 +61,6 @@ int main(void) BLANK(); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); - /* constants used by the vdso */ - DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ebfe86d097f0..390d97daa2b3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -855,13 +855,14 @@ void __init smp_detect_cpus(void) static void smp_init_secondary(void) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d33f21545dfd..9a6bae503fe6 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -101,6 +101,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (ret) break; + /* the PCI function will be scanned once function 0 appears */ + if (!zdev->zbus->bus) + break; + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); if (!pdev) break; diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a5e5db6ada3c..39b2eded7bc2 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode) add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* Load the new page-table. */ + sev_verify_cbit(top_level_pgt); write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b11..aa561795efd1 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit) SYM_FUNC_END(get_sev_encryption_bit) .code64 + +#include "../../kernel/sev_verify_cbit.S" + SYM_FUNC_START(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp @@ -81,6 +84,19 @@ SYM_FUNC_START(set_sev_encryption_mask) bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ + /* + * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in + * get_sev_encryption_bit() because this function is 32-bit code and + * shared between 64-bit and 32-bit boot path. + */ + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + + /* Store MSR value in sev_status */ + shlq $32, %rdx + orq %rdx, %rax + movq %rax, sev_status(%rip) + .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */ @@ -96,5 +112,7 @@ SYM_FUNC_END(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) +SYM_DATA(sev_check_data, .quad 0) #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 6d31f1b4c4d1..d9a631c5973c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -159,4 +159,6 @@ void boot_page_fault(void); void boot_stage1_vc(void); void boot_stage2_vc(void); +unsigned long sev_verify_cbit(unsigned long cr3); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 40e0e322161d..284e73661a18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -273,11 +273,15 @@ void __init hv_apic_init(void) pr_info("Hyper-V: Using enlightened APIC (%s mode)", x2apic_enabled() ? "x2apic" : "xapic"); /* - * With x2apic, architectural x2apic MSRs are equivalent to the - * respective synthetic MSRs, so there's no need to override - * the apic accessors. The only exception is - * hv_apic_eoi_write, because it benefits from lazy EOI when - * available, but it works for both xapic and x2apic modes. + * When in x2apic mode, don't use the Hyper-V specific APIC + * accessors since the field layout in the ICR register is + * different in x2apic mode. Furthermore, the architectural + * x2apic MSRs function just as well as the Hyper-V + * synthetic APIC MSRs, so there's no benefit in having + * separate Hyper-V accessors for x2apic mode. The only + * exception is hv_apic_eoi_write, because it benefits from + * lazy EOI when available, but the same accessor works for + * both xapic and x2apic because the field layout is the same. */ apic_set_eoi_write(hv_apic_eoi_write); if (!x2apic_enabled()) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 714233cee0b5..3115caa7d7d0 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -290,6 +290,9 @@ static void __init uv_stringify(int len, char *to, char *from) { /* Relies on 'to' being NULL chars so result will be NULL terminated */ strncpy(to, from, len-1); + + /* Trim trailing spaces */ + (void)strim(to); } /* Find UV arch type entry in UVsystab */ @@ -366,7 +369,7 @@ static int __init early_get_arch_type(void) return ret; } -static int __init uv_set_system_type(char *_oem_id) +static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) { /* Save OEM_ID passed from ACPI MADT */ uv_stringify(sizeof(oem_id), oem_id, _oem_id); @@ -386,13 +389,23 @@ static int __init uv_set_system_type(char *_oem_id) /* (Not hubless), not a UV */ return 0; + /* Is UV hubless system */ + uv_hubless_system = 0x01; + + /* UV5 Hubless */ + if (strncmp(uv_archtype, "NSGI5", 5) == 0) + uv_hubless_system |= 0x20; + /* UV4 Hubless: CH */ - if (strncmp(uv_archtype, "NSGI4", 5) == 0) - uv_hubless_system = 0x11; + else if (strncmp(uv_archtype, "NSGI4", 5) == 0) + uv_hubless_system |= 0x10; /* UV3 Hubless: UV300/MC990X w/o hub */ else - uv_hubless_system = 0x9; + uv_hubless_system |= 0x8; + + /* Copy APIC type */ + uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id); pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n", oem_id, oem_table_id, uv_system_type, uv_hubless_system); @@ -456,7 +469,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id) uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; /* If not UV, return. */ - if (likely(uv_set_system_type(_oem_id) == 0)) + if (uv_set_system_type(_oem_id, _oem_table_id) == 0) return 0; /* Save and Decode OEM Table ID */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3f0db463f96..581fb7223ad0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { @@ -1261,16 +1269,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; + /* - * Indirect branch speculation is always disabled in strict - * mode. It can neither be enabled if it was force-disabled - * by a previous prctl call. + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + if (!is_spec_ib_user_controlled() || task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1283,10 +1301,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1351,20 +1369,17 @@ static int ib_prctl_get(struct task_struct *task) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) - return PR_SPEC_DISABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || - spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - } else + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else return PR_SPEC_NOT_AFFECTED; } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7eb2a1c87969..3c417734790f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) /* Setup early boot stage 4-/5-level pagetables. */ addq phys_base(%rip), %rax + + /* + * For SEV guests: Verify that the C-bit is correct. A malicious + * hypervisor could lie about the C-bit position to perform a ROP + * attack on the guest by writing to the unencrypted stack and wait for + * the next RET instruction. + * %rsi carries pointer to realmode data and is callee-clobbered. Save + * and restore it. + */ + pushq %rsi + movq %rax, %rdi + call sev_verify_cbit + popq %rsi + + /* Switch to new page-table */ movq %rax, %cr3 /* Ensure I am executing from virtual addresses */ @@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" +#include "sev_verify_cbit.S" #ifdef CONFIG_HOTPLUG_CPU /* diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 5f83ccaab877..7d04b356d44d 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) goto fail; regs->dx = val >> 32; + /* + * This is a VC handler and the #VC is only raised when SEV-ES is + * active, which means SEV must be active too. Do sanity checks on the + * CPUID results to make sure the hypervisor does not trick the kernel + * into the no-sev path. This could map sensitive data unencrypted and + * make it accessible to the hypervisor. + * + * In particular, check for: + * - Hypervisor CPUID bit + * - Availability of CPUID leaf 0x8000001f + * - SEV CPUID bit. + * + * The hypervisor might still report the wrong C-bit position, but this + * can't be checked here. + */ + + if ((fn == 1 && !(regs->cx & BIT(31)))) + /* Hypervisor bit */ + goto fail; + else if (fn == 0x80000000 && (regs->ax < 0x8000001f)) + /* SEV leaf check */ + goto fail; + else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) + /* SEV bit */ + goto fail; + /* Skip over the CPUID two-byte opcode */ regs->ip += 2; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 4a96726fbaf8..0bd1a0fc587e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -374,8 +374,8 @@ fault: return ES_EXCEPTION; } -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned long vaddr, phys_addr_t *paddr) +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned long vaddr, phys_addr_t *paddr) { unsigned long va = (unsigned long)vaddr; unsigned int level; @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, if (user_mode(ctxt->regs)) ctxt->fi.error_code |= X86_PF_USER; - return false; + return ES_EXCEPTION; } + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) + /* Emulated MMIO to/from encrypted memory not supported */ + return ES_UNSUPPORTED; + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; pa |= va & ~page_level_mask(level); *paddr = pa; - return true; + return ES_OK; } /* Include code shared with pre-decompression boot stage */ @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, { u64 exit_code, exit_info_1, exit_info_2; unsigned long ghcb_pa = __pa(ghcb); + enum es_result res; phys_addr_t paddr; void __user *ref; @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; - if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { - if (!read) + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); + if (res != ES_OK) { + if (res == ES_EXCEPTION && !read) ctxt->fi.error_code |= X86_PF_WRITE; - return ES_EXCEPTION; + return res; } exit_info_1 = paddr; diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S new file mode 100644 index 000000000000..ee04941a6546 --- /dev/null +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * sev_verify_cbit.S - Code for verification of the C-bit position reported + * by the Hypervisor when running with SEV enabled. + * + * Copyright (c) 2020 Joerg Roedel (jroedel@suse.de) + * + * sev_verify_cbit() is called before switching to a new long-mode page-table + * at boot. + * + * Verify that the C-bit position is correct by writing a random value to + * an encrypted memory location while on the current page-table. Then it + * switches to the new page-table to verify the memory content is still the + * same. After that it switches back to the current page-table and when the + * check succeeded it returns. If the check failed the code invalidates the + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to + * make sure no interrupt or exception can get the CPU out of the hlt loop. + * + * New page-table pointer is expected in %rdi (first parameter) + * + */ +SYM_FUNC_START(sev_verify_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* First check if a C-bit was detected */ + movq sme_me_mask(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */ + movq sev_status(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* Save CR4 in %rsi */ + movq %cr4, %rsi + + /* Disable Global Pages */ + movq %rsi, %rdx + andq $(~X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* + * Verified that running under SEV - now get a random value using + * RDRAND. This instruction is mandatory when running as an SEV guest. + * + * Don't bail out of the loop if RDRAND returns errors. It is better to + * prevent forward progress than to work with a non-random value here. + */ +1: rdrand %rdx + jnc 1b + + /* Store value to memory and keep it in %rdx */ + movq %rdx, sev_check_data(%rip) + + /* Backup current %cr3 value to restore it later */ + movq %cr3, %rcx + + /* Switch to new %cr3 - This might unmap the stack */ + movq %rdi, %cr3 + + /* + * Compare value in %rdx with memory location. If C-bit is incorrect + * this would read the encrypted data and make the check fail. + */ + cmpq %rdx, sev_check_data(%rip) + + /* Restore old %cr3 */ + movq %rcx, %cr3 + + /* Restore previous CR4 */ + movq %rsi, %cr4 + + /* Check CMPQ result */ + je 3f + + /* + * The check failed, prevent any forward progress to prevent ROP + * attacks, invalidate the stack and go into a hlt loop. + */ + xorq %rsp, %rsp + subq $0x1000, %rsp +2: hlt + jmp 2b +3: +#endif + /* Return page-table pointer */ + movq %rdi, %rax + ret +SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 037faac46b0c..1e299ac73c86 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 7ff00ea64e4f..41902fe8b859 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,9 +24,7 @@ * Output: * rax: dest */ -.weak memmove - -SYM_FUNC_START_ALIAS(memmove) +SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9ff15ee404a4..0bfd26e4ca9e 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -6,8 +6,6 @@ #include <asm/alternative-asm.h> #include <asm/export.h> -.weak memset - /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is @@ -19,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index efbb3de472df..bc0833713be9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -39,6 +39,7 @@ */ u64 sme_me_mask __section(".data") = 0; u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index c6fc83efee0c..8731b7ad9308 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -89,8 +89,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) |