diff options
Diffstat (limited to 'arch')
54 files changed, 466 insertions, 196 deletions
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 3c7037c6ba9b..b618017205a3 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -202,7 +202,7 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ({ \ set_thread_flag(TIF_32BIT); \ }) -#ifdef CONFIG_GENERIC_COMPAT_VDSO +#ifdef CONFIG_COMPAT_VDSO #define COMPAT_ARCH_DLINFO \ do { \ /* \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 87a4b2ddc1a1..3f5461f7b560 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -301,7 +301,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) /* * Huge pte definitions. */ -#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) /* diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fd5b1a4efc70..844e2964b0f5 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -193,6 +193,16 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) regs->pmr_save = GIC_PRIO_IRQON; } +static inline void set_ssbs_bit(struct pt_regs *regs) +{ + regs->pstate |= PSR_SSBS_BIT; +} + +static inline void set_compat_ssbs_bit(struct pt_regs *regs) +{ + regs->pstate |= PSR_AA32_SSBS_BIT; +} + static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { @@ -200,7 +210,7 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate = PSR_MODE_EL0t; if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - regs->pstate |= PSR_SSBS_BIT; + set_ssbs_bit(regs); regs->sp = sp; } @@ -219,7 +229,7 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, #endif if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - regs->pstate |= PSR_AA32_SSBS_BIT; + set_compat_ssbs_bit(regs); regs->compat_sp = sp; } diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index df45af931459..4d9b1f48dc39 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -8,19 +8,12 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/types.h> #include <asm/memory.h> #include <asm/ptrace.h> #include <asm/sdei.h> -struct stackframe { - unsigned long fp; - unsigned long pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - int graph; -#endif -}; - enum stack_type { STACK_TYPE_UNKNOWN, STACK_TYPE_TASK, @@ -28,6 +21,7 @@ enum stack_type { STACK_TYPE_OVERFLOW, STACK_TYPE_SDEI_NORMAL, STACK_TYPE_SDEI_CRITICAL, + __NR_STACK_TYPES }; struct stack_info { @@ -36,6 +30,37 @@ struct stack_info { enum stack_type type; }; +/* + * A snapshot of a frame record or fp/lr register values, along with some + * accounting information necessary for robust unwinding. + * + * @fp: The fp value in the frame record (or the real fp) + * @pc: The fp value in the frame record (or the real lr) + * + * @stacks_done: Stacks which have been entirely unwound, for which it is no + * longer valid to unwind to. + * + * @prev_fp: The fp that pointed to this frame record, or a synthetic value + * of 0. This is used to ensure that within a stack, each + * subsequent frame record is at an increasing address. + * @prev_type: The type of stack this frame record was on, or a synthetic + * value of STACK_TYPE_UNKNOWN. This is used to detect a + * transition from one stack to another. + * + * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a + * replacement lr value in the ftrace graph stack. + */ +struct stackframe { + unsigned long fp; + unsigned long pc; + DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES); + unsigned long prev_fp; + enum stack_type prev_type; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + int graph; +#endif +}; + extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); @@ -64,8 +89,9 @@ static inline bool on_irq_stack(unsigned long sp, return true; } -static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp, - struct stack_info *info) +static inline bool on_task_stack(const struct task_struct *tsk, + unsigned long sp, + struct stack_info *info) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; @@ -112,10 +138,13 @@ static inline bool on_overflow_stack(unsigned long sp, * We can only safely access per-cpu stacks from current in a non-preemptible * context. */ -static inline bool on_accessible_stack(struct task_struct *tsk, - unsigned long sp, - struct stack_info *info) +static inline bool on_accessible_stack(const struct task_struct *tsk, + unsigned long sp, + struct stack_info *info) { + if (info) + info->type = STACK_TYPE_UNKNOWN; + if (on_task_stack(tsk, sp, info)) return true; if (tsk != current || preemptible()) @@ -130,4 +159,27 @@ static inline bool on_accessible_stack(struct task_struct *tsk, return false; } +static inline void start_backtrace(struct stackframe *frame, + unsigned long fp, unsigned long pc) +{ + frame->fp = fp; + frame->pc = pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame->graph = 0; +#endif + + /* + * Prime the first unwind. + * + * In unwind_frame() we'll check that the FP points to a valid stack, + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be + * treated as a transition to whichever stack that happens to be. The + * prev_fp value won't be used, but we set it to 0 such that it is + * definitely not an accessible stack address. + */ + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; +} + #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9cdc4592da3e..320a30dbe35e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -586,10 +586,8 @@ el1_sync: b.eq el1_ia cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el1_sp_pc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el1_sp_pc + b.eq el1_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 @@ -611,9 +609,11 @@ el1_da: bl do_mem_abort kernel_exit 1 -el1_sp_pc: +el1_pc: /* - * Stack or PC alignment exception handling + * PC alignment exception handling. We don't handle SP alignment faults, + * since we will have hit a recursive exception when trying to push the + * initial pt_regs. */ mrs x0, far_el1 inherit_daif pstate=x23, tmp=x2 @@ -732,9 +732,9 @@ el0_sync: ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el0_sp_pc + b.eq el0_sp cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_sp_pc + b.eq el0_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 @@ -758,7 +758,7 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_sp_pc + b.eq el0_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap @@ -858,11 +858,15 @@ el0_fpsimd_exc: mov x1, sp bl do_fpsimd_exc b ret_to_user +el0_sp: + ldr x26, [sp, #S_SP] + b el0_sp_pc +el0_pc: + mrs x26, far_el1 el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x26, far_el1 gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index eec4776ae5f0..37d3912cfe06 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -406,6 +406,18 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) +static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, + unsigned int vq) +{ + unsigned int i; + __uint128_t *p; + + for (i = 0; i < SVE_NUM_ZREGS; ++i) { + p = (__uint128_t *)ZREG(sst, vq, i); + *p = arm64_cpu_to_le128(fst->vregs[i]); + } +} + /* * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * task->thread.sve_state. @@ -423,17 +435,12 @@ static void fpsimd_to_sve(struct task_struct *task) unsigned int vq; void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - unsigned int i; - __uint128_t *p; if (!system_supports_sve()) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) { - p = (__uint128_t *)ZREG(sst, vq, i); - *p = arm64_cpu_to_le128(fst->vregs[i]); - } + __fpsimd_to_sve(sst, fst, vq); } /* @@ -459,7 +466,7 @@ static void sve_to_fpsimd(struct task_struct *task) return; vq = sve_vq_from_vl(task->thread.sve_vl); - for (i = 0; i < 32; ++i) { + for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); } @@ -550,8 +557,6 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) unsigned int vq; void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - unsigned int i; - __uint128_t *p; if (!test_tsk_thread_flag(task, TIF_SVE)) return; @@ -559,11 +564,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) vq = sve_vq_from_vl(task->thread.sve_vl); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); - - for (i = 0; i < 32; ++i) { - p = (__uint128_t *)ZREG(sst, vq, i); - *p = arm64_cpu_to_le128(fst->vregs[i]); - } + __fpsimd_to_sve(sst, fst, vq); } int sve_set_vector_length(struct task_struct *task, diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 9d63514b9836..b0e03e052dd1 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -154,12 +154,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, return; } - frame.fp = regs->regs[29]; - frame.pc = regs->pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif - + start_backtrace(&frame, regs->regs[29], regs->pc); walk_stackframe(current, &frame, callchain_trace, entry); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6a869d9f304f..f674f28df663 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, childregs->pstate |= PSR_UAO_BIT; if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - childregs->pstate |= PSR_SSBS_BIT; + set_ssbs_bit(childregs); if (system_uses_irq_prio_masking()) childregs->pmr_save = GIC_PRIO_IRQON; @@ -443,6 +443,32 @@ void uao_thread_switch(struct task_struct *next) } /* + * Force SSBS state on context-switch, since it may be lost after migrating + * from a CPU which treats the bit as RES0 in a heterogeneous system. + */ +static void ssbs_thread_switch(struct task_struct *next) +{ + struct pt_regs *regs = task_pt_regs(next); + + /* + * Nothing to do for kernel threads, but 'regs' may be junk + * (e.g. idle task) so check the flags and bail early. + */ + if (unlikely(next->flags & PF_KTHREAD)) + return; + + /* If the mitigation is enabled, then we leave SSBS clear. */ + if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || + test_tsk_thread_flag(next, TIF_SSBD)) + return; + + if (compat_user_mode(regs)) + set_compat_ssbs_bit(regs); + else if (user_mode(regs)) + set_ssbs_bit(regs); +} + +/* * We store our current task in sp_el0, which is clobbered by userspace. Keep a * shadow copy so that we can restore this upon entry from userspace. * @@ -471,6 +497,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ptrauth_thread_switch(next); + ssbs_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -498,11 +525,8 @@ unsigned long get_wchan(struct task_struct *p) if (!stack_page) return 0; - frame.fp = thread_saved_fp(p); - frame.pc = thread_saved_pc(p); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif + start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p)); + do { if (unwind_frame(p, &frame)) goto out; diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index b21cba90f82d..c4ae647d2306 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -38,12 +38,9 @@ void *return_address(unsigned int level) data.level = level + 2; data.addr = NULL; - frame.fp = (unsigned long)__builtin_frame_address(0); - frame.pc = (unsigned long)return_address; /* dummy */ -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif - + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)return_address); walk_stackframe(current, &frame, save_return_addr, &data); if (!data.level) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 62d395151abe..2b160ae594eb 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -29,9 +29,18 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ + +/* + * Unwind from one frame record (A) to the next frame record (B). + * + * We terminate early if the location of B indicates a malformed chain of frame + * records (e.g. a cycle), determined based on the location and fp value of A + * and the location (but not the fp value) of B. + */ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long fp = frame->fp; + struct stack_info info; if (fp & 0xf) return -EINVAL; @@ -39,11 +48,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp, NULL)) + if (!on_accessible_stack(tsk, fp, &info)) + return -EINVAL; + + if (test_bit(info.type, frame->stacks_done)) return -EINVAL; + /* + * As stacks grow downward, any valid record on the same stack must be + * at a strictly higher address than the prior record. + * + * Stacks can nest in several valid orders, e.g. + * + * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL + * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW + * + * ... but the nesting itself is strict. Once we transition from one + * stack to another, it's never valid to unwind back to that first + * stack. + */ + if (info.type == frame->prev_type) { + if (fp <= frame->prev_fp) + return -EINVAL; + } else { + set_bit(frame->prev_type, frame->stacks_done); + } + + /* + * Record this frame record's values and location. The prev_fp and + * prev_type are only meaningful to the next unwind_frame() invocation. + */ frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + frame->prev_fp = fp; + frame->prev_type = info.type; #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && @@ -122,12 +160,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) data.skip = trace->skip; data.no_sched_functions = 0; - frame.fp = regs->regs[29]; - frame.pc = regs->pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif - + start_backtrace(&frame, regs->regs[29], regs->pc); walk_stackframe(current, &frame, save_trace, &data); } EXPORT_SYMBOL_GPL(save_stack_trace_regs); @@ -146,17 +179,15 @@ static noinline void __save_stack_trace(struct task_struct *tsk, data.no_sched_functions = nosched; if (tsk != current) { - frame.fp = thread_saved_fp(tsk); - frame.pc = thread_saved_pc(tsk); + start_backtrace(&frame, thread_saved_fp(tsk), + thread_saved_pc(tsk)); } else { /* We don't want this function nor the caller */ data.skip += 2; - frame.fp = (unsigned long)__builtin_frame_address(0); - frame.pc = (unsigned long)__save_stack_trace; + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)__save_stack_trace); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif walk_stackframe(tsk, &frame, save_trace, &data); diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 9f25aedeac9d..0b2946414dc9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -38,11 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs) if (!in_lock_functions(regs->pc)) return regs->pc; - frame.fp = regs->regs[29]; - frame.pc = regs->pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif + start_backtrace(&frame, regs->regs[29], regs->pc); + do { int ret = unwind_frame(NULL, &frame); if (ret < 0) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8c03456dade6..d3313797cca9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -100,18 +100,17 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) return; if (tsk == current) { - frame.fp = (unsigned long)__builtin_frame_address(0); - frame.pc = (unsigned long)dump_backtrace; + start_backtrace(&frame, + (unsigned long)__builtin_frame_address(0), + (unsigned long)dump_backtrace); } else { /* * task blocked in __switch_to */ - frame.fp = thread_saved_fp(tsk); - frame.pc = thread_saved_pc(tsk); + start_backtrace(&frame, + thread_saved_fp(tsk), + thread_saved_pc(tsk)); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = 0; -#endif printk("Call trace:\n"); do { diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 4ab863045188..dd2514bb1511 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -32,10 +32,10 @@ UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n -ifeq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -else -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) endif # Clang versions less than 8 do not support -mcmodel=tiny @@ -57,8 +57,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,ld) - $(call if_changed,vdso_check) + $(call if_changed,vdsold_and_vdso_check) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -74,8 +73,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Actual build commands -quiet_cmd_vdsocc = VDSOCC $@ - cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< +quiet_cmd_vdsold_and_vdso_check = LD $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check) # Install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 60a4c6239712..1fba0776ed40 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -144,8 +144,7 @@ $(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) - $(call if_changed,vdso_check) + $(call if_changed,vdsold_and_vdso_check) # Compilation rules for the vDSO sources $(c-obj-vdso): %.o: %.c FORCE @@ -156,14 +155,17 @@ $(asm-obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold_and_vdso_check = LD32 $@ + cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) + +quiet_cmd_vdsold = LD32 $@ cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ -quiet_cmd_vdsocc = VDSOC $@ +quiet_cmd_vdsocc = CC32 $@ cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< -quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@ +quiet_cmd_vdsocc_gettimeofday = CC32 $@ cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = AS32 $@ cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< quiet_cmd_vdsomunge = MUNGE $@ diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index e09cf2deeafe..904034da4974 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -50,6 +50,10 @@ struct kprobe_ctlblk { int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs); int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs); +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + return 0; +} #endif /* CONFIG_KPROBES */ #endif /* _PARISC_KPROBES_H */ diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index ba67893a1d72..df46b0e5a915 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -63,7 +63,7 @@ ENTRY_CFI(flush_tlb_all_local) /* Flush Instruction Tlb */ - LDREG ITLB_SID_BASE(%r1), %r20 +88: LDREG ITLB_SID_BASE(%r1), %r20 LDREG ITLB_SID_STRIDE(%r1), %r21 LDREG ITLB_SID_COUNT(%r1), %r22 LDREG ITLB_OFF_BASE(%r1), %arg0 @@ -103,6 +103,7 @@ fitonemiddle: /* Loop if LOOP = 1 */ add %r21, %r20, %r20 /* increment space */ fitdone: + ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP) /* Flush Data Tlb */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d8dcd8820369..77f6ebf97113 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -121,6 +121,7 @@ config PPC select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 463c63a9fcf1..11112023e327 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -302,9 +302,14 @@ #define H_SCM_UNBIND_MEM 0x3F0 #define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4 #define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8 -#define H_SCM_MEM_QUERY 0x3FC -#define H_SCM_BLOCK_CLEAR 0x400 -#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR +#define H_SCM_UNBIND_ALL 0x3FC +#define H_SCM_HEALTH 0x400 +#define H_SCM_PERFORMANCE_STATS 0x418 +#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS + +/* Scope args for H_SCM_UNBIND_ALL */ +#define H_UNBIND_SCOPE_ALL (0x1) +#define H_UNBIND_SCOPE_DRC (0x2) /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index dc9a1ca70edf..c6bbe9778d3c 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -27,11 +27,10 @@ static inline void ppc_set_pmu_inuse(int inuse) #ifdef CONFIG_PPC_PSERIES get_lppaca()->pmcregs_in_use = inuse; #endif - } else { + } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - get_paca()->pmcregs_in_use = inuse; + get_paca()->pmcregs_in_use = inuse; #endif - } #endif } diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index 01555c6ae0f5..be48c2215fa2 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -31,7 +31,7 @@ * Struct fields are always 32 or 64 bit aligned, depending on them being 32 * or 64 bit wide respectively. * - * See Documentation/virtual/kvm/ppc-pv.txt + * See Documentation/virt/kvm/ppc-pv.txt */ struct kvm_vcpu_arch_shared { __u64 scratch1; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 56dfa7a2a6f2..ea0c69236789 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -49,7 +49,8 @@ obj-y := cputable.o ptrace.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o \ + dma-common.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c new file mode 100644 index 000000000000..dc7ef6b17b69 --- /dev/null +++ b/arch/powerpc/kernel/dma-common.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Contains common dma routines for all powerpc platforms. + * + * Copyright (C) 2019 Shawn Anastasio. + */ + +#include <linux/mm.h> +#include <linux/dma-noncoherent.h> + +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + if (!dev_is_dma_coherent(dev)) + return pgprot_noncached(prot); + return prot; +} diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eee5bef736c8..6ba3cc2ef8ab 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1531,7 +1531,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * * Call convention: * - * syscall register convention is in Documentation/powerpc/syscall64-abi.txt + * syscall register convention is in Documentation/powerpc/syscall64-abi.rst * * For hypercalls, the register convention is as follows: * r0 volatile diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index f50b708d6d77..98600b276f76 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1198,6 +1198,9 @@ SYSCALL_DEFINE0(rt_sigreturn) goto bad; if (MSR_TM_ACTIVE(msr_hi<<32)) { + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto bad; /* We only recheckpoint on return if we're * transaction. */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2f80e270c7b0..117515564ec7 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -771,6 +771,11 @@ SYSCALL_DEFINE0(rt_sigreturn) if (MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; + + /* Trying to start TM on non TM system */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto badframe; + if (__get_user(uc_transact, &uc->uc_link)) goto badframe; if (restore_tm_sigcontexts(current, &uc->uc_mcontext, diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ec1804f822af..cde3f5a4b3e4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3569,9 +3569,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); if (kvmhv_on_pseries()) { + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_load_hv_regs_and_go() below otherwise. + */ + unsigned long host_psscr; /* call our hypervisor to load up HV regs and go */ struct hv_guest_state hvregs; + host_psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); kvmhv_save_hv_regs(vcpu, &hvregs); hvregs.lpcr = lpcr; vcpu->arch.regs.msr = vcpu->arch.shregs.msr; @@ -3590,6 +3599,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.shregs.msr = vcpu->arch.regs.msr; vcpu->arch.shregs.dar = mfspr(SPRN_DAR); vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, host_psscr); /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && @@ -3654,6 +3665,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.vpa.dirty = 1; save_pmu = lp->pmcregs_in_use; } + /* Must save pmu if this guest is capable of running nested guests */ + save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6ca0d7376a9f..e3ba67095895 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1986,10 +1986,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->single_escalation = xive_native_has_single_escalation(); - if (ret) { - kfree(xive); + if (ret) return ret; - } return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5596c8ec221a..a998823f68a3 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1090,9 +1090,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) xive->ops = &kvmppc_xive_native_ops; if (ret) - kfree(xive); + return ret; - return ret; + return 0; } /* diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9a5963e07a82..b8ad14bb1170 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1899,11 +1899,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, * * For guests on platforms before POWER9, we clamp the it limit to 1G * to avoid some funky things such as RTAS bugs etc... + * + * On POWER9 we limit to 1TB in case the host erroneously told us that + * the RMA was >1TB. Effective address bits 0:23 are treated as zero + * (meaning the access is aliased to zero i.e. addr = addr % 1TB) + * for virtual real mode addressing and so it doesn't make sense to + * have an area larger than 1TB as it can't be addressed. */ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { ppc64_rma_size = first_memblock_size; if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); + else + ppc64_rma_size = min_t(u64, ppc64_rma_size, + 1UL << SID_SHIFT_1T); /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9259337d7374..9191a66b3bc5 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -239,7 +239,7 @@ void __init paging_init(void) #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); + 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c8ec670ee924..2c07908359b2 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/libnvdimm.h> #include <linux/platform_device.h> +#include <linux/delay.h> #include <asm/plpar_wrappers.h> @@ -43,8 +44,9 @@ struct papr_scm_priv { static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; uint64_t saved = 0; + uint64_t token; + int64_t rc; /* * When the hypervisor cannot map all the requested memory in a single @@ -64,6 +66,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p) } while (rc == H_BUSY); if (rc) { + /* H_OVERLAP needs a separate error path */ + if (rc == H_OVERLAP) + return -EBUSY; + dev_err(&p->pdev->dev, "bind err: %lld\n", rc); return -ENXIO; } @@ -78,22 +84,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p) static int drc_pmem_unbind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; - uint64_t rc, token; + uint64_t token = 0; + int64_t rc; - token = 0; + dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index); - /* NB: unbind has the same retry requirements mentioned above */ + /* NB: unbind has the same retry requirements as drc_pmem_bind() */ do { - rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, - p->bound_addr, p->blocks, token); + + /* Unbind of all SCM resources associated with drcIndex */ + rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, + p->drc_index, token); token = ret[0]; - cond_resched(); + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); if (rc) dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + else + dev_dbg(&p->pdev->dev, "unbind drc %x complete\n", + p->drc_index); - return !!rc; + return rc == H_SUCCESS ? 0 : -ENXIO; } static int papr_scm_meta_get(struct papr_scm_priv *p, @@ -389,6 +409,14 @@ static int papr_scm_probe(struct platform_device *pdev) /* request the hypervisor to bind this region to somewhere in memory */ rc = drc_pmem_bind(p); + + /* If phyp says drc memory still bound then force unbound and retry */ + if (rc == -EBUSY) { + dev_warn(&pdev->dev, "Retrying bind after unbinding\n"); + drc_pmem_unbind(p); + rc = drc_pmem_bind(p); + } + if (rc) goto err; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 082c7e1c20f0..1cdb39575eae 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -479,7 +479,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask, * Now go through the entire mask until we find a valid * target. */ - for (;;) { + do { /* * We re-check online as the fallback case passes us * an untested affinity mask @@ -487,12 +487,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask, if (cpu_online(cpu) && xive_try_pick_target(cpu)) return cpu; cpu = cpumask_next(cpu, mask); - if (cpu == first) - break; /* Wrap around */ if (cpu >= nr_cpu_ids) cpu = cpumask_first(mask); - } + } while (cpu != first); + return -1; } diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 40983491b95f..9bf63f0ab253 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -217,5 +217,20 @@ #size-cells = <0>; status = "disabled"; }; + eth0: ethernet@10090000 { + compatible = "sifive,fu540-c000-gem"; + interrupt-parent = <&plic0>; + interrupts = <53>; + reg = <0x0 0x10090000 0x0 0x2000 + 0x0 0x100a0000 0x0 0x1000>; + local-mac-address = [00 00 00 00 00 00]; + clock-names = "pclk", "hclk"; + clocks = <&prci PRCI_CLK_GEMGXLPLL>, + <&prci PRCI_CLK_GEMGXLPLL>; + #address-cells = <1>; + #size-cells = <0>; + status = "disabled"; + }; + }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 0b55c53c08c7..93d68cbd64fe 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -76,3 +76,12 @@ disable-wp; }; }; + +ð0 { + status = "okay"; + phy-mode = "gmii"; + phy-handle = <&phy0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; +}; diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 1efaeddf1e4b..16970f246860 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mm-arch-hooks.h +generic-y += msi.h generic-y += percpu.h generic-y += preempt.h generic-y += sections.h diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 0e2eeeb1fd27..13ce76cc5aff 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,6 +18,7 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT +#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ #include <asm-generic/unistd.h> diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 7cba96e7587b..4cf0bddb7d92 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -36,7 +36,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += ctype.o text_dma.o +obj-y += version.o ctype.o text_dma.o obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index ad57c2205a71..082905d97309 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -12,6 +12,7 @@ void print_missing_facilities(void); unsigned long get_random_base(unsigned long safe_addr); extern int kaslr_enabled; +extern const char kernel_version[]; unsigned long read_ipl_report(unsigned long safe_offset); diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 028aab03a9e7..2087bed6e60f 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -361,6 +361,7 @@ ENTRY(startup_kdump) .quad 0 # INITRD_SIZE .quad 0 # OLDMEM_BASE .quad 0 # OLDMEM_SIZE + .quad kernel_version # points to kernel version string .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c new file mode 100644 index 000000000000..d32e58bdda6a --- /dev/null +++ b/arch/s390/boot/version.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <generated/utsrelease.h> +#include <generated/compile.h> +#include "boot.h" + +const char kernel_version[] = UTS_RELEASE + " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION; diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 42f2375c203e..e1fcc03159ef 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -118,7 +118,7 @@ do { \ return PTR_ERR(rc); \ } while(0) -static int hpyfs_vm_create_guest(struct dentry *systems_dir, +static int hypfs_vm_create_guest(struct dentry *systems_dir, struct diag2fc_data *data) { char guest_name[NAME_LEN + 1] = {}; @@ -219,7 +219,7 @@ int hypfs_vm_create_files(struct dentry *root) } for (i = 0; i < count; i++) { - rc = hpyfs_vm_create_guest(dir, &(data[i])); + rc = hypfs_vm_create_guest(dir, &(data[i])); if (rc) goto failed; } diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 9900d655014c..b8833ac983fa 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -35,6 +35,7 @@ #include <linux/typecheck.h> #include <linux/compiler.h> +#include <linux/types.h> #include <asm/atomic_ops.h> #include <asm/barrier.h> @@ -55,7 +56,7 @@ __bitops_byte(unsigned long nr, volatile unsigned long *ptr) return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3); } -static inline void set_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -76,7 +77,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_or(mask, (long *)addr); } -static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -97,7 +98,8 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_and(mask, (long *)addr); } -static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long mask; @@ -118,8 +120,8 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) __atomic64_xor(mask, (long *)addr); } -static inline int -test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch_test_and_set_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long old, mask; @@ -129,8 +131,8 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) return (old & mask) != 0; } -static inline int -test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch_test_and_clear_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long old, mask; @@ -140,8 +142,8 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) return (old & ~mask) != 0; } -static inline int -test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch_test_and_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned long *addr = __bitops_word(nr, ptr); unsigned long old, mask; @@ -151,30 +153,31 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) return (old & mask) != 0; } -static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch___set_bit(unsigned long nr, volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); *addr |= 1 << (nr & 7); } -static inline void -__clear_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch___clear_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); *addr &= ~(1 << (nr & 7)); } -static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) +static inline void arch___change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); *addr ^= 1 << (nr & 7); } -static inline int -__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch___test_and_set_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; @@ -184,8 +187,8 @@ __test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) return (ch >> (nr & 7)) & 1; } -static inline int -__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch___test_and_clear_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; @@ -195,8 +198,8 @@ __test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) return (ch >> (nr & 7)) & 1; } -static inline int -__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) +static inline bool arch___test_and_change_bit(unsigned long nr, + volatile unsigned long *ptr) { unsigned char *addr = __bitops_byte(nr, ptr); unsigned char ch; @@ -206,7 +209,8 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) return (ch >> (nr & 7)) & 1; } -static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) +static inline bool arch_test_bit(unsigned long nr, + const volatile unsigned long *ptr) { const volatile unsigned char *addr; @@ -215,28 +219,30 @@ static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) return (*addr >> (nr & 7)) & 1; } -static inline int test_and_set_bit_lock(unsigned long nr, - volatile unsigned long *ptr) +static inline bool arch_test_and_set_bit_lock(unsigned long nr, + volatile unsigned long *ptr) { - if (test_bit(nr, ptr)) + if (arch_test_bit(nr, ptr)) return 1; - return test_and_set_bit(nr, ptr); + return arch_test_and_set_bit(nr, ptr); } -static inline void clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) +static inline void arch_clear_bit_unlock(unsigned long nr, + volatile unsigned long *ptr) { smp_mb__before_atomic(); - clear_bit(nr, ptr); + arch_clear_bit(nr, ptr); } -static inline void __clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) +static inline void arch___clear_bit_unlock(unsigned long nr, + volatile unsigned long *ptr) { smp_mb(); - __clear_bit(nr, ptr); + arch___clear_bit(nr, ptr); } +#include <asm-generic/bitops-instrumented.h> + /* * Functions which use MSB0 bit numbering. * The bits are numbered: @@ -261,7 +267,8 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr) +static inline bool test_and_clear_bit_inv(unsigned long nr, + volatile unsigned long *ptr) { return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } @@ -276,8 +283,8 @@ static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -static inline int test_bit_inv(unsigned long nr, - const volatile unsigned long *ptr) +static inline bool test_bit_inv(unsigned long nr, + const volatile unsigned long *ptr) { return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a4d38092530a..823578c6b9e2 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,8 @@ static inline int devmem_is_allowed(unsigned long pfn) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define ARCH_ZONE_DMA_BITS 31 + #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 82deb8fc8319..c5cfff7b1f91 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -54,6 +54,7 @@ #define INITRD_SIZE_OFFSET 0x10410 #define OLDMEM_BASE_OFFSET 0x10418 #define OLDMEM_SIZE_OFFSET 0x10420 +#define KERNEL_VERSION_OFFSET 0x10428 #define COMMAND_LINE_OFFSET 0x10480 #ifndef __ASSEMBLY__ @@ -74,7 +75,8 @@ struct parmarea { unsigned long initrd_size; /* 0x10410 */ unsigned long oldmem_base; /* 0x10418 */ unsigned long oldmem_size; /* 0x10420 */ - char pad1[0x10480 - 0x10428]; /* 0x10428 - 0x10480 */ + unsigned long kernel_version; /* 0x10428 */ + char pad1[0x10480 - 0x10430]; /* 0x10430 - 0x10480 */ char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */ }; diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index b6755685c7b8..9e9f75ef046a 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -34,5 +34,6 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_S390_UNISTD_H_ */ diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 494c34c50716..8c5755f41dde 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -20,6 +20,7 @@ #include <linux/ioctl.h> #include <linux/compiler.h> +#include <linux/types.h> /* Name of the zcrypt device driver. */ #define ZCRYPT_NAME "zcrypt" @@ -160,17 +161,17 @@ struct ica_xcRB { * @payload_len: Payload length */ struct ep11_cprb { - uint16_t cprb_len; + __u16 cprb_len; unsigned char cprb_ver_id; unsigned char pad_000[2]; unsigned char flags; unsigned char func_id[2]; - uint32_t source_id; - uint32_t target_id; - uint32_t ret_code; - uint32_t reserved1; - uint32_t reserved2; - uint32_t payload_len; + __u32 source_id; + __u32 target_id; + __u32 ret_code; + __u32 reserved1; + __u32 reserved2; + __u32 payload_len; } __attribute__((packed)); /** @@ -179,8 +180,8 @@ struct ep11_cprb { * @dom_id: Usage domain id */ struct ep11_target_dev { - uint16_t ap_id; - uint16_t dom_id; + __u16 ap_id; + __u16 dom_id; }; /** @@ -195,14 +196,14 @@ struct ep11_target_dev { * @resp: Addr to response block */ struct ep11_urb { - uint16_t targets_num; - uint64_t targets; - uint64_t weight; - uint64_t req_no; - uint64_t req_len; - uint64_t req; - uint64_t resp_len; - uint64_t resp; + __u16 targets_num; + __u64 targets; + __u64 weight; + __u64 req_no; + __u64 req_len; + __u64 req; + __u64 resp_len; + __u64 resp; } __attribute__((packed)); /** diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index a90d3e945445..3054e9c035a3 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -437,4 +437,4 @@ 432 common fsmount sys_fsmount sys_fsmount 433 common fspick sys_fspick sys_fspick 434 common pidfd_open sys_pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 common clone3 sys_clone3 sys_clone3 diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 99e06213a22b..54fcdf66ae96 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -17,8 +17,6 @@ #ifdef CONFIG_PGSTE -static int page_table_allocate_pgste_min = 0; -static int page_table_allocate_pgste_max = 1; int page_table_allocate_pgste = 0; EXPORT_SYMBOL(page_table_allocate_pgste); @@ -29,8 +27,8 @@ static struct ctl_table page_table_sysctl[] = { .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, .proc_handler = proc_dointvec_minmax, - .extra1 = &page_table_allocate_pgste_min, - .extra2 = &page_table_allocate_pgste_max, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { } }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8282b8d41209..7b0a4ee77313 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -607,15 +607,16 @@ struct kvm_vcpu_arch { /* * QEMU userspace and the guest each have their own FPU state. - * In vcpu_run, we switch between the user, maintained in the - * task_struct struct, and guest FPU contexts. While running a VCPU, - * the VCPU thread will have the guest FPU context. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The * "guest_fpu" state here contains the guest FPU context, with the * host PRKU bits. */ + struct fpu *user_fpu; struct fpu *guest_fpu; u64 xcr0; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8f72526e2f68..24843cf49579 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See - * Documentation/virtual/kvm/locking.txt to get more detail. + * Documentation/virt/kvm/locking.txt to get more detail. */ fault_handled = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 19f69df96758..7eafc6907861 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2143,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } + svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!svm->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_svm; + } + svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!svm->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_svm; + goto free_user_fpu; } err = kvm_vcpu_init(&svm->vcpu, kvm, id); @@ -2211,6 +2219,8 @@ uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); free_partial_svm: kmem_cache_free(kvm_vcpu_cache, svm); out: @@ -2241,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, svm); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0f1378789bd0..ced9fba32598 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -220,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu) if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; + kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); + vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); @@ -232,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); + vmx->nested.cached_vmcs12 = NULL; kfree(vmx->nested.cached_shadow_vmcs12); + vmx->nested.cached_shadow_vmcs12 = NULL; /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a279447eb75b..074385c86c09 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6598,6 +6598,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); kmem_cache_free(kvm_vcpu_cache, vmx); } @@ -6613,12 +6614,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); + vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); + if (!vmx->vcpu.arch.user_fpu) { + printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); + err = -ENOMEM; + goto free_partial_vcpu; + } + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL_ACCOUNT); if (!vmx->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; - goto free_partial_vcpu; + goto free_user_fpu; } vmx->vpid = allocate_vpid(); @@ -6721,6 +6730,8 @@ uninit_vcpu: free_vcpu: free_vpid(vmx->vpid); kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); +free_user_fpu: + kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); free_partial_vcpu: kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58305cf81182..c6d951cbd76c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3306,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -7202,7 +7206,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) rcu_read_unlock(); - if (target) + if (target && READ_ONCE(target->ready)) kvm_vcpu_yield_to(target); } @@ -7242,6 +7246,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; case KVM_HC_KICK_CPU: kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); + kvm_sched_yield(vcpu->kvm, a1); ret = 0; break; #ifdef CONFIG_X86_64 @@ -7990,9 +7995,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); + /* The preempt notifier should have taken care of the FPU already. */ + WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8270,7 +8274,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(¤t->thread.fpu); + copy_fpregs_to_fpstate(vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8287,7 +8291,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) fpregs_lock(); copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(¤t->thread.fpu.state); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); fpregs_unlock(); |