diff options
39 files changed, 1400 insertions, 232 deletions
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 96b7a477a8db..8226d0b71fd3 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -static struct of_device_id armv6_pmu_of_device_ids[] = { +static const struct of_device_id armv6_pmu_of_device_ids[] = { {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 80559977a83e..b52db8bb1270 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI @@ -21,6 +22,7 @@ config ARM64 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -246,6 +248,9 @@ config PGTABLE_LEVELS config ARCH_SUPPORTS_UPROBES def_bool y +config ARCH_PROC_KCORE_TEXT + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -984,7 +989,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE + depends on RANDOMIZE_BASE default y help Randomizes the location of the module region without considering the diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f839ecd919f9..9b41f1e3b1a0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -52,17 +52,19 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian +CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian +CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL UTS_MACHINE := aarch64 endif -CHECKFLAGS += -D__aarch64__ +CHECKFLAGS += -D__aarch64__ -m64 ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large @@ -70,6 +72,9 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +ifeq ($(CONFIG_DYNAMIC_FTRACE),y) +KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o +endif endif # Default value diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 5392dbeffa45..f72779aad276 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev); /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { - if (!dev) - return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 5d1700425efe..ac3fb7441510 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; ({ \ clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ clear_thread_flag(TIF_32BIT); \ + current->personality &= ~READ_IMPLIES_EXEC; \ }) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ @@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread +/* + * Unlike the native SET_PERSONALITY macro, the compat version inherits + * READ_IMPLIES_EXEC across a fork() since this is the behaviour on + * arch/arm/. + */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ set_bit(TIF_32BIT, ¤t->mm->context.flags); \ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index d57693f5d4ec..19bd97671bb8 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -30,6 +30,9 @@ struct mod_plt_sec { struct mod_arch_specific { struct mod_plt_sec core; struct mod_plt_sec init; + + /* for CONFIG_DYNAMIC_FTRACE */ + void *ftrace_trampoline; }; #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd056c..6eae342ced6b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) -#define pud_present(pud) (pud_val(pud)) +#define pud_present(pud) pte_present(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9428b93fefb2..64c9e78f9882 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -104,6 +104,9 @@ struct thread_struct { #define task_user_tls(t) (&(t)->thread.tp_value) #endif +/* Sync TPIDR_EL0 back to thread_struct for current */ +void tls_preserve_current_state(void); + #define INIT_THREAD { } static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 801a16dbbdf6..5b6eafccc5d8 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -30,5 +30,6 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 95aa4423247d..07aa8e3c5630 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, int sig, int code, const char *name); struct mm_struct; -extern void show_pte(struct mm_struct *mm, unsigned long addr); +extern void show_pte(unsigned long addr); extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index ee469be1ae1d..f0a76b9fcd6e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -34,6 +34,26 @@ struct sigcontext { }; /* + * Allocation of __reserved[]: + * (Note: records do not necessarily occur in the order shown here.) + * + * size description + * + * 0x210 fpsimd_context + * 0x10 esr_context + * 0x20 extra_context (optional) + * 0x10 terminator (null _aarch64_ctx) + * + * 0xdb0 (reserved for future allocation) + * + * New records that can exceed this space need to be opt-in for userspace, so + * that an expanded signal frame is not generated unexpectedly. The mechanism + * for opting in will depend on the extension that generates each new record. + * The above table documents the maximum set and sizes of records than can be + * generated when userspace does not opt in for any such extension. + */ + +/* * Header to be used at the beginning of structures extending the user * context. Such structures must be placed after the rt_sigframe on the stack * and be 16-byte aligned. The last structure must be a dummy one with the @@ -61,4 +81,39 @@ struct esr_context { __u64 esr; }; +/* + * extra_context: describes extra space in the signal frame for + * additional structures that don't fit in sigcontext.__reserved[]. + * + * Note: + * + * 1) fpsimd_context, esr_context and extra_context must be placed in + * sigcontext.__reserved[] if present. They cannot be placed in the + * extra space. Any other record can be placed either in the extra + * space or in sigcontext.__reserved[], unless otherwise specified in + * this file. + * + * 2) There must not be more than one extra_context. + * + * 3) If extra_context is present, it must be followed immediately in + * sigcontext.__reserved[] by the terminating null _aarch64_ctx. + * + * 4) The extra space to which datap points must start at the first + * 16-byte aligned address immediately after the terminating null + * _aarch64_ctx that follows the extra_context structure in + * __reserved[]. The extra space may overrun the end of __reserved[], + * as indicated by a sufficiently large value for the size field. + * + * 5) The extra space must itself be terminated with a null + * _aarch64_ctx. + */ +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1dcb69d3d0e5..f2b4e816b6de 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif + +# will be included by each individual module but not by the core kernel itself +extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 817ce3365e20..9f9e0064c8c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); +static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +{ + /* file-wide pr_fmt adds "CPU features: " prefix */ + pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + return 0; +} + +static struct notifier_block cpu_hwcaps_notifier = { + .notifier_call = dump_cpu_hwcaps +}; + +static int __init register_cpu_hwcaps_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &cpu_hwcaps_notifier); + return 0; +} +__initcall(register_cpu_hwcaps_dumper); + DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); @@ -639,8 +658,10 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); + if (taint) { + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } u64 read_sanitised_ftr_reg(u32 id) diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S new file mode 100644 index 000000000000..00c4025be4ff --- /dev/null +++ b/arch/arm64/kernel/ftrace-mod.S @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .section ".text.ftrace_trampoline", "ax" + .align 3 +0: .quad 0 +__ftrace_trampoline: + ldr x16, 0b + br x16 +ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 40ad08ac569a..c13b1fca0e5b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -10,10 +10,12 @@ */ #include <linux/ftrace.h> +#include <linux/module.h> #include <linux/swab.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/debug-monitors.h> #include <asm/ftrace.h> #include <asm/insn.h> @@ -70,6 +72,58 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS + unsigned long *trampoline; + struct module *mod; + + /* + * On kernels that support module PLTs, the offset between the + * branch instruction and its target may legally exceed the + * range of an ordinary relative 'bl' opcode. In this case, we + * need to branch via a trampoline in the module. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + trampoline = (unsigned long *)mod->arch.ftrace_trampoline; + if (trampoline[0] != addr) { + if (trampoline[0] != 0) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + + /* point the trampoline to our ftrace entry point */ + module_disable_ro(mod); + trampoline[0] = addr; + module_enable_ro(mod, true); + + /* update trampoline before patching in the branch */ + smp_wmb(); + } + addr = (unsigned long)&trampoline[1]; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ + } old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -84,12 +138,55 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - u32 old, new; + bool validate = true; + u32 old = 0, new; + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS + u32 replaced; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + } + + /* + * The instruction we are about to patch may be a branch and + * link instruction that was redirected via a PLT entry. In + * this case, the normal validation will fail, but we can at + * least check that we are dealing with a branch and link + * instruction that points into the right module. + */ + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (!aarch64_insn_is_bl(replaced) || + !within_module(pc + aarch64_get_branch_offset(replaced), + mod)) + return -EINVAL; + + validate = false; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ + } else { + old = aarch64_insn_gen_branch_imm(pc, addr, + AARCH64_INSN_BRANCH_LINK); + } - old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, true); + return ftrace_modify_code(pc, old, new, validate); } void arch_ftrace_update_code(int command) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f035ff6fb223..8c3a7264fb0f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { apply_alternatives((void *)s->sh_addr, s->sh_size); - return 0; } +#ifdef CONFIG_ARM64_MODULE_PLTS + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) + me->arch.ftrace_trampoline = (void *)s->sh_addr; +#endif } return 0; diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index c7e3e6387a49..a7f6c01c13b9 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) if (!acpi_disabled) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); } return 0; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 83a1b1ad189f..b5798ba21189 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { .is_visible = armv8pmu_event_attr_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-9"); +PMU_FORMAT_ATTR(event, "config:0-15"); static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c5c45942fb6e..d849d9804011 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) pr_err("current sp %lx does not match saved sp %lx\n", orig_sp, stack_addr); pr_err("Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); + __show_regs(saved_regs); pr_err("Current registers\n"); - show_regs(regs); + __show_regs(regs); BUG(); } unpause_graph_tracing(); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ae2a835898d7..659ae8094ed5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); + dump_backtrace(regs, NULL); } static void tls_thread_flush(void) @@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, return 0; } +void tls_preserve_current_state(void) +{ + *task_user_tls(current) = read_sysreg(tpidr_el0); +} + static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - tpidr = read_sysreg(tpidr_el0); - *task_user_tls(current) = tpidr; + tls_preserve_current_state(); tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c142459a88f3..35846f1550db 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, { struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + + if (target == current) + fpsimd_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); } @@ -648,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { unsigned long *tls = &target->thread.tp_value; + + if (target == current) + tls_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); } @@ -894,21 +902,27 @@ static int compat_vfp_get(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (target == current) + fpsimd_preserve_current_state(); + /* * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + 0, vregs_end_pos); if (count && !ret) { fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - ret = put_user(fpscr, (compat_ulong_t *)ubuf); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); } return ret; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2c822ef94f34..d4b740538ad5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } name = of_flat_dt_get_machine_name(); + if (!name) + return; + pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c7b6de62f9d3..089c3747995d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,10 +19,14 @@ #include <linux/compat.h> #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/signal.h> #include <linux/personality.h> #include <linux/freezer.h> +#include <linux/stddef.h> #include <linux/uaccess.h> +#include <linux/sizes.h> +#include <linux/string.h> #include <linux/tracehook.h> #include <linux/ratelimit.h> @@ -41,10 +45,133 @@ struct rt_sigframe { struct siginfo info; struct ucontext uc; +}; + +struct frame_record { u64 fp; u64 lr; }; +struct rt_sigframe_user_layout { + struct rt_sigframe __user *sigframe; + struct frame_record __user *next_frame; + + unsigned long size; /* size of allocated sigframe data */ + unsigned long limit; /* largest allowed size */ + + unsigned long fpsimd_offset; + unsigned long esr_offset; + unsigned long extra_offset; + unsigned long end_offset; +}; + +#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + +static void init_user_layout(struct rt_sigframe_user_layout *user) +{ + const size_t reserved_size = + sizeof(user->sigframe->uc.uc_mcontext.__reserved); + + memset(user, 0, sizeof(*user)); + user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); + + user->limit = user->size + reserved_size; + + user->limit -= TERMINATOR_SIZE; + user->limit -= EXTRA_CONTEXT_SIZE; + /* Reserve space for extension and terminator ^ */ +} + +static size_t sigframe_size(struct rt_sigframe_user_layout const *user) +{ + return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); +} + +/* + * Sanity limit on the approximate maximum size of signal frame we'll + * try to generate. Stack alignment padding and the frame record are + * not taken into account. This limit is not a guarantee and is + * NOT ABI. + */ +#define SIGFRAME_MAXSZ SZ_64K + +static int __sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size, bool extend) +{ + size_t padded_size = round_up(size, 16); + + if (padded_size > user->limit - user->size && + !user->extra_offset && + extend) { + int ret; + + user->limit += EXTRA_CONTEXT_SIZE; + ret = __sigframe_alloc(user, &user->extra_offset, + sizeof(struct extra_context), false); + if (ret) { + user->limit -= EXTRA_CONTEXT_SIZE; + return ret; + } + + /* Reserve space for the __reserved[] terminator */ + user->size += TERMINATOR_SIZE; + + /* + * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for + * the terminator: + */ + user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE; + } + + /* Still not enough space? Bad luck! */ + if (padded_size > user->limit - user->size) + return -ENOMEM; + + *offset = user->size; + user->size += padded_size; + + return 0; +} + +/* + * Allocate space for an optional record of <size> bytes in the user + * signal frame. The offset from the signal frame base address to the + * allocated block is assigned to *offset. + */ +static int sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size) +{ + return __sigframe_alloc(user, offset, size, true); +} + +/* Allocate the null terminator record and prevent further allocations */ +static int sigframe_alloc_end(struct rt_sigframe_user_layout *user) +{ + int ret; + + /* Un-reserve the space reserved for the terminator: */ + user->limit += TERMINATOR_SIZE; + + ret = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + if (ret) + return ret; + + /* Prevent further allocation: */ + user->limit = user->size; + return 0; +} + +static void __user *apply_user_offset( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char __user *base = (char __user *)user->sigframe; + + return base + offset; +} + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -92,12 +219,159 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } +struct user_ctxs { + struct fpsimd_context __user *fpsimd; +}; + +static int parse_user_sigframe(struct user_ctxs *user, + struct rt_sigframe __user *sf) +{ + struct sigcontext __user *const sc = &sf->uc.uc_mcontext; + struct _aarch64_ctx __user *head; + char __user *base = (char __user *)&sc->__reserved; + size_t offset = 0; + size_t limit = sizeof(sc->__reserved); + bool have_extra_context = false; + char const __user *const sfp = (char const __user *)sf; + + user->fpsimd = NULL; + + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + while (1) { + int err = 0; + u32 magic, size; + char const __user *userp; + struct extra_context const __user *extra; + u64 extra_datap; + u32 extra_size; + struct _aarch64_ctx const __user *end; + u32 end_magic, end_size; + + if (limit - offset < sizeof(*head)) + goto invalid; + + if (!IS_ALIGNED(offset, 16)) + goto invalid; + + head = (struct _aarch64_ctx __user *)(base + offset); + __get_user_error(magic, &head->magic, err); + __get_user_error(size, &head->size, err); + if (err) + return err; + + if (limit - offset < size) + goto invalid; + + switch (magic) { + case 0: + if (size) + goto invalid; + + goto done; + + case FPSIMD_MAGIC: + if (user->fpsimd) + goto invalid; + + if (size < sizeof(*user->fpsimd)) + goto invalid; + + user->fpsimd = (struct fpsimd_context __user *)head; + break; + + case ESR_MAGIC: + /* ignore */ + break; + + case EXTRA_MAGIC: + if (have_extra_context) + goto invalid; + + if (size < sizeof(*extra)) + goto invalid; + + userp = (char const __user *)head; + + extra = (struct extra_context const __user *)userp; + userp += size; + + __get_user_error(extra_datap, &extra->datap, err); + __get_user_error(extra_size, &extra->size, err); + if (err) + return err; + + /* Check for the dummy terminator in __reserved[]: */ + + if (limit - offset - size < TERMINATOR_SIZE) + goto invalid; + + end = (struct _aarch64_ctx const __user *)userp; + userp += TERMINATOR_SIZE; + + __get_user_error(end_magic, &end->magic, err); + __get_user_error(end_size, &end->size, err); + if (err) + return err; + + if (end_magic || end_size) + goto invalid; + + /* Prevent looping/repeated parsing of extra_context */ + have_extra_context = true; + + base = (__force void __user *)extra_datap; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + if (!IS_ALIGNED(extra_size, 16)) + goto invalid; + + if (base != userp) + goto invalid; + + /* Reject "unreasonably large" frames: */ + if (extra_size > sfp + SIGFRAME_MAXSZ - userp) + goto invalid; + + /* + * Ignore trailing terminator in __reserved[] + * and start parsing extra data: + */ + offset = 0; + limit = extra_size; + continue; + + default: + goto invalid; + } + + if (size < sizeof(*head)) + goto invalid; + + if (limit - offset < size) + goto invalid; + + offset += size; + } + +done: + if (!user->fpsimd) + goto invalid; + + return 0; + +invalid: + return -EINVAL; +} + static int restore_sigframe(struct pt_regs *regs, struct rt_sigframe __user *sf) { sigset_t set; int i, err; - void *aux = sf->uc.uc_mcontext.__reserved; + struct user_ctxs user; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -116,12 +390,11 @@ static int restore_sigframe(struct pt_regs *regs, regs->syscallno = ~0UL; err |= !valid_user_regs(®s->user_regs, current); + if (err == 0) + err = parse_user_sigframe(&user, sf); - if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); - err |= restore_fpsimd_context(fpsimd_ctx); - } + if (err == 0) + err = restore_fpsimd_context(user.fpsimd); return err; } @@ -162,16 +435,37 @@ badframe: return 0; } -static int setup_sigframe(struct rt_sigframe __user *sf, +/* Determine the layout of optional records in the signal frame */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +{ + int err; + + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + + /* fault information, if valid */ + if (current->thread.fault_code) { + err = sigframe_alloc(user, &user->esr_offset, + sizeof(struct esr_context)); + if (err) + return err; + } + + return sigframe_alloc_end(user); +} + + +static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - void *aux = sf->uc.uc_mcontext.__reserved; - struct _aarch64_ctx *end; + struct rt_sigframe __user *sf = user->sigframe; /* set up the stack frame for unwinding */ - __put_user_error(regs->regs[29], &sf->fp, err); - __put_user_error(regs->regs[30], &sf->lr, err); + __put_user_error(regs->regs[29], &user->next_frame->fp, err); + __put_user_error(regs->regs[30], &user->next_frame->lr, err); for (i = 0; i < 31; i++) __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], @@ -185,58 +479,103 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); + struct fpsimd_context __user *fpsimd_ctx = + apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); - aux += sizeof(*fpsimd_ctx); } /* fault information, if valid */ - if (current->thread.fault_code) { - struct esr_context *esr_ctx = - container_of(aux, struct esr_context, head); + if (err == 0 && user->esr_offset) { + struct esr_context __user *esr_ctx = + apply_user_offset(user, user->esr_offset); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); - aux += sizeof(*esr_ctx); + } + + if (err == 0 && user->extra_offset) { + char __user *sfp = (char __user *)user->sigframe; + char __user *userp = + apply_user_offset(user, user->extra_offset); + + struct extra_context __user *extra; + struct _aarch64_ctx __user *end; + u64 extra_datap; + u32 extra_size; + + extra = (struct extra_context __user *)userp; + userp += EXTRA_CONTEXT_SIZE; + + end = (struct _aarch64_ctx __user *)userp; + userp += TERMINATOR_SIZE; + + /* + * extra_datap is just written to the signal frame. + * The value gets cast back to a void __user * + * during sigreturn. + */ + extra_datap = (__force u64)userp; + extra_size = sfp + round_up(user->size, 16) - userp; + + __put_user_error(EXTRA_MAGIC, &extra->head.magic, err); + __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err); + __put_user_error(extra_datap, &extra->datap, err); + __put_user_error(extra_size, &extra->size, err); + + /* Add the terminator */ + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); } /* set the "end" magic */ - end = aux; - __put_user_error(0, &end->magic, err); - __put_user_error(0, &end->size, err); + if (err == 0) { + struct _aarch64_ctx __user *end = + apply_user_offset(user, user->end_offset); + + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } return err; } -static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig, - struct pt_regs *regs) +static int get_sigframe(struct rt_sigframe_user_layout *user, + struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; - struct rt_sigframe __user *frame; + int err; + + init_user_layout(user); + err = setup_sigframe_layout(user); + if (err) + return err; sp = sp_top = sigsp(regs->sp, ksig); - sp = (sp - sizeof(struct rt_sigframe)) & ~15; - frame = (struct rt_sigframe __user *)sp; + sp = round_down(sp - sizeof(struct frame_record), 16); + user->next_frame = (struct frame_record __user *)sp; + + sp = round_down(sp, 16) - sigframe_size(user); + user->sigframe = (struct rt_sigframe __user *)sp; /* * Check that we can actually write to the signal frame. */ - if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) - frame = NULL; + if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp)) + return -EFAULT; - return frame; + return 0; } static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, - void __user *frame, int usig) + struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; regs->regs[0] = usig; - regs->sp = (unsigned long)frame; - regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); + regs->sp = (unsigned long)user->sigframe; + regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; if (ka->sa.sa_flags & SA_RESTORER) @@ -250,20 +589,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; int err = 0; - frame = get_sigframe(ksig, regs); - if (!frame) + if (get_sigframe(&user, ksig, regs)) return 1; + frame = user.sigframe; + __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(frame, regs, set); + err |= setup_sigframe(&user, regs, set); if (err == 0) { - setup_return(regs, &ksig->ka, frame, usig); + setup_return(regs, &ksig->ka, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, &ksig->info); regs->regs[1] = (unsigned long)&frame->info; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index feac80c22f61..09d37d66b630 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) put_task_stack(tsk); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0805b44f986a..3ebfb1d00b53 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; unsigned long irq_stack_ptr; @@ -728,8 +728,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) break; case BUG_TRAP_TYPE_WARN: - /* Ideally, report_bug() should backtrace for us... but no. */ - dump_backtrace(regs, NULL); break; default: diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..ae35f1855e06 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -37,7 +37,7 @@ #include <asm/vdso.h> #include <asm/vdso_datapage.h> -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; /* @@ -125,14 +125,14 @@ static int __init vdso_init(void) struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -145,7 +145,7 @@ static int __init vdso_init(void) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(&vdso_start); + pfn = sym_to_pfn(vdso_start); for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3216e098c058..3e340b625436 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return NULL; - } - if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; @@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size, bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return; - } freed = dma_release_from_contiguous(dev, phys_to_page(paddr), diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a8c9f2db20ba..c7861c9864e6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -31,6 +31,7 @@ #include <linux/highmem.h> #include <linux/perf_event.h> #include <linux/preempt.h> +#include <linux/hugetlb.h> #include <asm/bug.h> #include <asm/cpufeature.h> @@ -82,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) #endif /* - * Dump out the page tables associated with 'addr' in mm 'mm'. + * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(unsigned long addr) { + struct mm_struct *mm; pgd_t *pgd; - if (!mm) + if (addr < TASK_SIZE) { + /* TTBR0 */ + mm = current->active_mm; + if (mm == &init_mm) { + pr_alert("[%016lx] user address but active_mm is swapper\n", + addr); + return; + } + } else if (addr >= VA_START) { + /* TTBR1 */ mm = &init_mm; + } else { + pr_alert("[%016lx] address between user and kernel address ranges\n", + addr); + return; + } - pr_alert("pgd = %p\n", mm->pgd); + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", + mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, + VA_BITS, mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); do { pud_t *pud; @@ -198,8 +216,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, /* * The kernel tried to access some page that wasn't present. */ -static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, struct pt_regs *regs) +static void __do_kernel_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { const char *msg; @@ -229,7 +247,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, addr); - show_pte(mm, addr); + show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); do_exit(SIGKILL); @@ -241,18 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, */ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int esr, unsigned int sig, int code, - struct pt_regs *regs) + struct pt_regs *regs, int fault) { struct siginfo si; const struct fault_info *inf; + unsigned int lsb = 0; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); - pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", + pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(tsk->mm, addr); - show_regs(regs); + print_vma_addr(KERN_CONT ", in ", regs->pc); + pr_cont("\n"); + __show_regs(regs); } tsk->thread.fault_address = addr; @@ -261,13 +281,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + si.si_addr_lsb = lsb; + force_sig_info(sig, &si, tsk); } static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->active_mm; const struct fault_info *inf; /* @@ -276,9 +306,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { inf = esr_to_fault_info(esr); - __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0); } else - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); } #define VM_FAULT_BADMAP 0x010000 @@ -331,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { struct task_struct *tsk; struct mm_struct *mm; - int fault, sig, code; + int fault, sig, code, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -370,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, die("Accessing user space memory outside uaccess.h routines", regs, esr); } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -393,24 +425,42 @@ retry: } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + major |= fault & VM_FAULT_MAJOR; - /* - * If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because it - * would already be released in __lock_page_or_retry in mm/filemap.c. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return 0; + if (fault & VM_FAULT_RETRY) { + /* + * If we need to retry but a fatal signal is pending, + * handle the signal first. We do not need to release + * the mmap_sem because it would already be released + * in __lock_page_or_retry in mm/filemap.c. + */ + if (fatal_signal_pending(current)) + return 0; + + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of + * starvation. + */ + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + up_read(&mm->mmap_sem); /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. + * Handle the "normal" (no error) case first. */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | + VM_FAULT_BADACCESS)))) { + /* + * Major/minor page fault accounting is only done + * once. If we go through a retry, it is extremely + * likely that the page will be found in page cache at + * that point. + */ + if (major) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); @@ -419,25 +469,9 @@ retry: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } - if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; - mm_flags |= FAULT_FLAG_TRIED; - goto retry; - } - } - - up_read(&mm->mmap_sem); - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) return 0; + } /* * If we are in kernel mode at this point, we have no context to @@ -463,6 +497,9 @@ retry: */ sig = SIGBUS; code = BUS_ADRERR; + } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { + sig = SIGBUS; + code = BUS_MCEERR_AR; } else { /* * Something tried to access memory that isn't in our memory @@ -473,11 +510,11 @@ retry: SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, esr, sig, code, regs); + __do_user_fault(tsk, addr, esr, sig, code, regs, fault); return 0; no_context: - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); return 0; } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7514a000e361..69b8200b1cfd 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; - pte_t *pte = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; + pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) + if (pud_none(*pud)) return NULL; - - if (pud_huge(*pud)) + /* swap or huge page */ + if (!pud_present(*pud) || pud_huge(*pud)) return (pte_t *)pud; + /* table; check the next level */ + pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) return NULL; - - if (pte_cont(pmd_pte(*pmd))) { - pmd = pmd_offset( - pud, (addr & CONT_PMD_MASK)); - return (pte_t *)pmd; - } - if (pmd_huge(*pmd)) + if (!pmd_present(*pmd) || pmd_huge(*pmd)) return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, addr); - if (pte_present(*pte) && pte_cont(*pte)) { - pte = pte_offset_kernel( - pmd, (addr & CONT_PTE_MASK)); - return pte; - } + return NULL; } diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7b0d55756eb1..adc208c2ae9c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -18,6 +18,7 @@ #include <linux/elf.h> #include <linux/fs.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/export.h> @@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ int valid_phys_addr_range(phys_addr_t addr, size_t size) { - if (addr < PHYS_OFFSET) - return 0; - if (addr + size > __pa(high_memory - 1) + 1) - return 0; - - return 1; + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && + memblock_is_map_memory(addr); } /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..23c2d89a362e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -31,6 +31,7 @@ #include <linux/fs.h> #include <linux/io.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/barrier.h> #include <asm/cputype.h> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c5fecf97ee2f..8c77598dd6aa 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, return NULL; } -static acpi_status -iort_match_type_callback(struct acpi_iort_node *node, void *context) -{ - return AE_OK; -} - -bool iort_node_match(u8 type) -{ - struct acpi_iort_node *node; - - node = iort_scan_node(type, iort_match_type_callback, NULL); - - return node != NULL; -} - static acpi_status iort_match_node_callback(struct acpi_iort_node *node, void *context) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 31adbebf812e..8102ee7b3247 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -17,6 +17,8 @@ config DEVMEM config DEVKMEM bool "/dev/kmem virtual device support" + # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write + depends on !ARM64 help Say Y here if you want to support the /dev/kmem device. The /dev/kmem device is rarely used, but can be used for certain diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 478f8ace2664..4a57b8f21488 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -39,7 +39,6 @@ config ARM_GIC_V3_ITS bool depends on PCI depends on PCI_MSI - select ACPI_IORT if ACPI config ARM_NVIC bool diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index aa587edaf9ea..e5197ffb7422 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -3,9 +3,10 @@ # menu "Performance monitor support" + depends on PERF_EVENTS config ARM_PMU - depends on PERF_EVENTS && (ARM || ARM64) + depends on ARM || ARM64 bool "ARM PMU framework" default y help @@ -18,7 +19,7 @@ config ARM_PMU_ACPI config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" - depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI + depends on ARCH_QCOM && ARM64 && ACPI help Provides support for the L2 cache performance monitor unit (PMU) in Qualcomm Technologies processors. @@ -27,7 +28,7 @@ config QCOM_L2_PMU config QCOM_L3_PMU bool "Qualcomm Technologies L3-cache PMU" - depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI + depends on ARCH_QCOM && ARM64 && ACPI select QCOM_IRQ_COMBINER help Provides support for the L3 cache performance monitor unit (PMU) @@ -36,7 +37,7 @@ config QCOM_L3_PMU monitoring L3 cache events. config XGENE_PMU - depends on PERF_EVENTS && ARCH_XGENE + depends on ARCH_XGENE bool "APM X-Gene SoC PMU" default n help diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 35b5289bc5da..e841282d690c 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -37,6 +37,8 @@ #define CSW_CSWCR 0x0000 #define CSW_CSWCR_DUALMCB_MASK BIT(0) +#define CSW_CSWCR_MCB0_ROUTING(x) (((x) & 0x0C) >> 2) +#define CSW_CSWCR_MCB1_ROUTING(x) (((x) & 0x30) >> 4) #define MCBADDRMR 0x0000 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2) @@ -50,8 +52,17 @@ #define PCPPMU_INT_L3C BIT(2) #define PCPPMU_INT_IOB BIT(3) +#define PCPPMU_V3_INTMASK 0x00FF33FF +#define PCPPMU_V3_INTENMASK 0xFFFFFFFF +#define PCPPMU_V3_INTCLRMASK 0xFF00CC00 +#define PCPPMU_V3_INT_MCU 0x000000FF +#define PCPPMU_V3_INT_MCB 0x00000300 +#define PCPPMU_V3_INT_L3C 0x00FF0000 +#define PCPPMU_V3_INT_IOB 0x00003000 + #define PMU_MAX_COUNTERS 4 -#define PMU_CNT_MAX_PERIOD 0x100000000ULL +#define PMU_CNT_MAX_PERIOD 0xFFFFFFFFULL +#define PMU_V3_CNT_MAX_PERIOD 0xFFFFFFFFFFFFFFFFULL #define PMU_OVERFLOW_MASK 0xF #define PMU_PMCR_E BIT(0) #define PMU_PMCR_P BIT(1) @@ -73,6 +84,10 @@ #define PMU_PMOVSR 0xC80 #define PMU_PMCR 0xE04 +/* PMU registers for V3 */ +#define PMU_PMOVSCLR 0xC80 +#define PMU_PMOVSSET 0xCC0 + #define to_pmu_dev(p) container_of(p, struct xgene_pmu_dev, pmu) #define GET_CNTR(ev) (ev->hw.idx) #define GET_EVENTID(ev) (ev->hw.config & 0xFFULL) @@ -96,14 +111,33 @@ struct xgene_pmu_dev { struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS]; }; +struct xgene_pmu_ops { + void (*mask_int)(struct xgene_pmu *pmu); + void (*unmask_int)(struct xgene_pmu *pmu); + u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx); + void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val); + void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val); + void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val); + void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val); + void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*reset_counters)(struct xgene_pmu_dev *pmu_dev); + void (*start_counters)(struct xgene_pmu_dev *pmu_dev); + void (*stop_counters)(struct xgene_pmu_dev *pmu_dev); +}; + struct xgene_pmu { struct device *dev; int version; void __iomem *pcppmu_csr; u32 mcb_active_mask; u32 mc_active_mask; + u32 l3c_active_mask; cpumask_t cpu; raw_spinlock_t lock; + const struct xgene_pmu_ops *ops; struct list_head l3cpmus; struct list_head iobpmus; struct list_head mcbpmus; @@ -125,11 +159,13 @@ struct xgene_pmu_data { enum xgene_pmu_version { PCP_PMU_V1 = 1, PCP_PMU_V2, + PCP_PMU_V3, }; enum xgene_pmu_dev_type { PMU_TYPE_L3C = 0, PMU_TYPE_IOB, + PMU_TYPE_IOB_SLOW, PMU_TYPE_MCB, PMU_TYPE_MC, }; @@ -195,6 +231,56 @@ static const struct attribute_group mc_pmu_format_attr_group = { .attrs = mc_pmu_format_attrs, }; +static struct attribute *l3c_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"), + NULL, +}; + +static struct attribute *iob_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"), + NULL, +}; + +static struct attribute *iob_slow_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"), + NULL, +}; + +static struct attribute *mcb_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"), + NULL, +}; + +static struct attribute *mc_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"), + NULL, +}; + +static const struct attribute_group l3c_pmu_v3_format_attr_group = { + .name = "format", + .attrs = l3c_pmu_v3_format_attrs, +}; + +static const struct attribute_group iob_pmu_v3_format_attr_group = { + .name = "format", + .attrs = iob_pmu_v3_format_attrs, +}; + +static const struct attribute_group iob_slow_pmu_v3_format_attr_group = { + .name = "format", + .attrs = iob_slow_pmu_v3_format_attrs, +}; + +static const struct attribute_group mcb_pmu_v3_format_attr_group = { + .name = "format", + .attrs = mcb_pmu_v3_format_attrs, +}; + +static const struct attribute_group mc_pmu_v3_format_attr_group = { + .name = "format", + .attrs = mc_pmu_v3_format_attrs, +}; + /* * sysfs event attributes */ @@ -311,6 +397,219 @@ static const struct attribute_group mc_pmu_events_attr_group = { .attrs = mc_pmu_events_attrs, }; +static struct attribute *l3c_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(read-hit, 0x01), + XGENE_PMU_EVENT_ATTR(read-miss, 0x02), + XGENE_PMU_EVENT_ATTR(index-flush-eviction, 0x03), + XGENE_PMU_EVENT_ATTR(write-caused-replacement, 0x04), + XGENE_PMU_EVENT_ATTR(write-not-caused-replacement, 0x05), + XGENE_PMU_EVENT_ATTR(clean-eviction, 0x06), + XGENE_PMU_EVENT_ATTR(dirty-eviction, 0x07), + XGENE_PMU_EVENT_ATTR(read, 0x08), + XGENE_PMU_EVENT_ATTR(write, 0x09), + XGENE_PMU_EVENT_ATTR(request, 0x0a), + XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall, 0x0b), + XGENE_PMU_EVENT_ATTR(tq-full, 0x0c), + XGENE_PMU_EVENT_ATTR(ackq-full, 0x0d), + XGENE_PMU_EVENT_ATTR(wdb-full, 0x0e), + XGENE_PMU_EVENT_ATTR(odb-full, 0x10), + XGENE_PMU_EVENT_ATTR(wbq-full, 0x11), + XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall, 0x12), + XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall, 0x13), + XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall, 0x14), + XGENE_PMU_EVENT_ATTR(total-insertion, 0x15), + XGENE_PMU_EVENT_ATTR(sip-insertions-r-set, 0x16), + XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear, 0x17), + XGENE_PMU_EVENT_ATTR(dip-insertions-r-set, 0x18), + XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear, 0x19), + XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set, 0x1a), + XGENE_PMU_EVENT_ATTR(egression, 0x1b), + XGENE_PMU_EVENT_ATTR(replacement, 0x1c), + XGENE_PMU_EVENT_ATTR(old-replacement, 0x1d), + XGENE_PMU_EVENT_ATTR(young-replacement, 0x1e), + XGENE_PMU_EVENT_ATTR(r-set-replacement, 0x1f), + XGENE_PMU_EVENT_ATTR(r-clear-replacement, 0x20), + XGENE_PMU_EVENT_ATTR(old-r-replacement, 0x21), + XGENE_PMU_EVENT_ATTR(old-nr-replacement, 0x22), + XGENE_PMU_EVENT_ATTR(young-r-replacement, 0x23), + XGENE_PMU_EVENT_ATTR(young-nr-replacement, 0x24), + XGENE_PMU_EVENT_ATTR(bloomfilter-clearing, 0x25), + XGENE_PMU_EVENT_ATTR(generation-flip, 0x26), + XGENE_PMU_EVENT_ATTR(vcc-droop-detected, 0x27), + NULL, +}; + +static struct attribute *iob_fast_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all, 0x01), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd, 0x02), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr, 0x03), + XGENE_PMU_EVENT_ATTR(pa-all-cp-req, 0x04), + XGENE_PMU_EVENT_ATTR(pa-cp-blk-req, 0x05), + XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req, 0x06), + XGENE_PMU_EVENT_ATTR(pa-cp-rd-req, 0x07), + XGENE_PMU_EVENT_ATTR(pa-cp-wr-req, 0x08), + XGENE_PMU_EVENT_ATTR(ba-all-req, 0x09), + XGENE_PMU_EVENT_ATTR(ba-rd-req, 0x0a), + XGENE_PMU_EVENT_ATTR(ba-wr-req, 0x0b), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued, 0x10), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued, 0x11), + XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12), + XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13), + XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable, 0x14), + XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15), + XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req, 0x16), + XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req, 0x17), + XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data, 0x18), + XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS, 0x1b), + XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence, 0x1c), + XGENE_PMU_EVENT_ATTR(pa-barrier-cycles, 0x1d), + XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops, 0x20), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop, 0x21), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit, 0x22), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop, 0x23), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit, 0x24), + XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop, 0x25), + XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit, 0x26), + XGENE_PMU_EVENT_ATTR(pa-req-buffer-full, 0x28), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full, 0x29), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full, 0x2b), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full, 0x2d), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure, 0x2f), + NULL, +}; + +static struct attribute *iob_slow_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req, 0x01), + XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req, 0x02), + XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req, 0x03), + XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req, 0x04), + XGENE_PMU_EVENT_ATTR(ba-all-axi-req, 0x07), + XGENE_PMU_EVENT_ATTR(ba-axi-rd-req, 0x08), + XGENE_PMU_EVENT_ATTR(ba-axi-wr-req, 0x09), + XGENE_PMU_EVENT_ATTR(ba-free-list-empty, 0x10), + NULL, +}; + +static struct attribute *mcb_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(req-receive, 0x01), + XGENE_PMU_EVENT_ATTR(rd-req-recv, 0x02), + XGENE_PMU_EVENT_ATTR(rd-req-recv-2, 0x03), + XGENE_PMU_EVENT_ATTR(wr-req-recv, 0x04), + XGENE_PMU_EVENT_ATTR(wr-req-recv-2, 0x05), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu, 0x06), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2, 0x07), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu, 0x08), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2, 0x09), + XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b), + XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req, 0x0d), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2, 0x0e), + XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu, 0x0f), + XGENE_PMU_EVENT_ATTR(gack-recv, 0x10), + XGENE_PMU_EVENT_ATTR(rd-gack-recv, 0x11), + XGENE_PMU_EVENT_ATTR(wr-gack-recv, 0x12), + XGENE_PMU_EVENT_ATTR(cancel-rd-gack, 0x13), + XGENE_PMU_EVENT_ATTR(cancel-wr-gack, 0x14), + XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall, 0x15), + XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked, 0x16), + XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall, 0x17), + XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked, 0x18), + XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked, 0x19), + XGENE_PMU_EVENT_ATTR(mcu-req-table-full, 0x1a), + XGENE_PMU_EVENT_ATTR(mcu-stat-table-full, 0x1b), + XGENE_PMU_EVENT_ATTR(mcu-wr-table-full, 0x1c), + XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp, 0x1d), + XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp, 0x1e), + XGENE_PMU_EVENT_ATTR(mcu-retryack-resp, 0x1f), + XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp, 0x20), + XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload, 0x21), + XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass, 0x22), + XGENE_PMU_EVENT_ATTR(volt-droop-detect, 0x23), + NULL, +}; + +static struct attribute *mc_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(act-sent, 0x01), + XGENE_PMU_EVENT_ATTR(pre-sent, 0x02), + XGENE_PMU_EVENT_ATTR(rd-sent, 0x03), + XGENE_PMU_EVENT_ATTR(rda-sent, 0x04), + XGENE_PMU_EVENT_ATTR(wr-sent, 0x05), + XGENE_PMU_EVENT_ATTR(wra-sent, 0x06), + XGENE_PMU_EVENT_ATTR(pd-entry-vld, 0x07), + XGENE_PMU_EVENT_ATTR(sref-entry-vld, 0x08), + XGENE_PMU_EVENT_ATTR(prea-sent, 0x09), + XGENE_PMU_EVENT_ATTR(ref-sent, 0x0a), + XGENE_PMU_EVENT_ATTR(rd-rda-sent, 0x0b), + XGENE_PMU_EVENT_ATTR(wr-wra-sent, 0x0c), + XGENE_PMU_EVENT_ATTR(raw-hazard, 0x0d), + XGENE_PMU_EVENT_ATTR(war-hazard, 0x0e), + XGENE_PMU_EVENT_ATTR(waw-hazard, 0x0f), + XGENE_PMU_EVENT_ATTR(rar-hazard, 0x10), + XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard, 0x11), + XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld, 0x12), + XGENE_PMU_EVENT_ATTR(lprd-req-vld, 0x13), + XGENE_PMU_EVENT_ATTR(hprd-req-vld, 0x14), + XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld, 0x15), + XGENE_PMU_EVENT_ATTR(wr-req-vld, 0x16), + XGENE_PMU_EVENT_ATTR(partial-wr-req-vld, 0x17), + XGENE_PMU_EVENT_ATTR(rd-retry, 0x18), + XGENE_PMU_EVENT_ATTR(wr-retry, 0x19), + XGENE_PMU_EVENT_ATTR(retry-gnt, 0x1a), + XGENE_PMU_EVENT_ATTR(rank-change, 0x1b), + XGENE_PMU_EVENT_ATTR(dir-change, 0x1c), + XGENE_PMU_EVENT_ATTR(rank-dir-change, 0x1d), + XGENE_PMU_EVENT_ATTR(rank-active, 0x1e), + XGENE_PMU_EVENT_ATTR(rank-idle, 0x1f), + XGENE_PMU_EVENT_ATTR(rank-pd, 0x20), + XGENE_PMU_EVENT_ATTR(rank-sref, 0x21), + XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh, 0x22), + XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh, 0x23), + XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh, 0x24), + XGENE_PMU_EVENT_ATTR(phy-updt-complt, 0x25), + XGENE_PMU_EVENT_ATTR(tz-fail, 0x26), + XGENE_PMU_EVENT_ATTR(dram-errc, 0x27), + XGENE_PMU_EVENT_ATTR(dram-errd, 0x28), + XGENE_PMU_EVENT_ATTR(rd-enq, 0x29), + XGENE_PMU_EVENT_ATTR(wr-enq, 0x2a), + XGENE_PMU_EVENT_ATTR(tmac-limit-reached, 0x2b), + XGENE_PMU_EVENT_ATTR(tmaw-tracker-full, 0x2c), + NULL, +}; + +static const struct attribute_group l3c_pmu_v3_events_attr_group = { + .name = "events", + .attrs = l3c_pmu_v3_events_attrs, +}; + +static const struct attribute_group iob_fast_pmu_v3_events_attr_group = { + .name = "events", + .attrs = iob_fast_pmu_v3_events_attrs, +}; + +static const struct attribute_group iob_slow_pmu_v3_events_attr_group = { + .name = "events", + .attrs = iob_slow_pmu_v3_events_attrs, +}; + +static const struct attribute_group mcb_pmu_v3_events_attr_group = { + .name = "events", + .attrs = mcb_pmu_v3_events_attrs, +}; + +static const struct attribute_group mc_pmu_v3_events_attr_group = { + .name = "events", + .attrs = mc_pmu_v3_events_attrs, +}; + /* * sysfs cpumask attributes */ @@ -334,7 +633,7 @@ static const struct attribute_group pmu_cpumask_attr_group = { }; /* - * Per PMU device attribute groups + * Per PMU device attribute groups of PMU v1 and v2 */ static const struct attribute_group *l3c_pmu_attr_groups[] = { &l3c_pmu_format_attr_group, @@ -364,6 +663,44 @@ static const struct attribute_group *mc_pmu_attr_groups[] = { NULL }; +/* + * Per PMU device attribute groups of PMU v3 + */ +static const struct attribute_group *l3c_pmu_v3_attr_groups[] = { + &l3c_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &l3c_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = { + &iob_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &iob_fast_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = { + &iob_slow_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &iob_slow_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *mcb_pmu_v3_attr_groups[] = { + &mcb_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &mcb_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *mc_pmu_v3_attr_groups[] = { + &mc_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &mc_pmu_v3_events_attr_group, + NULL +}; + static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev) { int cntr; @@ -387,23 +724,67 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu) writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); } +static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu) +{ + writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); +} + static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu) { writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); } -static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx) +static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu) +{ + writel(PCPPMU_V3_INTCLRMASK, + xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); +} + +static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev, + int idx) { return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } +static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev, + int idx) +{ + u32 lo, hi; + + /* + * v3 has 64-bit counter registers composed by 2 32-bit registers + * This can be a problem if the counter increases and carries + * out of bit [31] between 2 reads. The extra reads would help + * to prevent this issue. + */ + do { + hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1); + lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx); + } while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1)); + + return (((u64)hi << 32) | lo); +} + static inline void -xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val) +xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val) { writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } static inline void +xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val) +{ + u32 cnt_lo, cnt_hi; + + cnt_hi = upper_32_bits(val); + cnt_lo = lower_32_bits(val); + + /* v3 has 64-bit counter registers composed by 2 32-bit registers */ + xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo); + xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi); +} + +static inline void xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val) { writel(val, pmu_dev->inf->csr + PMU_PMEVTYPER0 + (4 * idx)); @@ -416,12 +797,18 @@ xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) } static inline void +xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { } + +static inline void xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { writel(val, pmu_dev->inf->csr + PMU_PMAMR1); } static inline void +xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { } + +static inline void xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx) { u32 val; @@ -491,20 +878,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev) static void xgene_perf_pmu_enable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; int enabled = bitmap_weight(pmu_dev->cntr_assign_mask, pmu_dev->max_counters); if (!enabled) return; - xgene_pmu_start_counters(pmu_dev); + xgene_pmu->ops->start_counters(pmu_dev); } static void xgene_perf_pmu_disable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_stop_counters(pmu_dev); + xgene_pmu->ops->stop_counters(pmu_dev); } static int xgene_perf_event_init(struct perf_event *event) @@ -572,49 +961,56 @@ static int xgene_perf_event_init(struct perf_event *event) static void xgene_perf_enable_event(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event)); - xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event))); + xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event), + GET_EVENTID(event)); + xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event))); if (pmu_dev->inf->type == PMU_TYPE_IOB) - xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event))); + xgene_pmu->ops->write_agent1msk(pmu_dev, + ~((u32)GET_AGENT1ID(event))); - xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event)); - xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event)); } static void xgene_perf_disable_event(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event)); - xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event)); } static void xgene_perf_event_set_period(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; /* - * The X-Gene PMU counters have a period of 2^32. To account for the - * possiblity of extreme interrupt latency we program for a period of - * half that. Hopefully we can handle the interrupt before another 2^31 + * For 32 bit counter, it has a period of 2^32. To account for the + * possibility of extreme interrupt latency we program for a period of + * half that. Hopefully, we can handle the interrupt before another 2^31 * events occur and the counter overtakes its previous value. + * For 64 bit counter, we don't expect it overflow. */ u64 val = 1ULL << 31; local64_set(&hw->prev_count, val); - xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val); + xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val); } static void xgene_perf_event_update(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hw->prev_count); - new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event)); + new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event)); if (local64_cmpxchg(&hw->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -633,6 +1029,7 @@ static void xgene_perf_read(struct perf_event *event) static void xgene_perf_start(struct perf_event *event, int flags) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED))) @@ -646,8 +1043,8 @@ static void xgene_perf_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { u64 prev_raw_count = local64_read(&hw->prev_count); - xgene_pmu_write_counter(pmu_dev, GET_CNTR(event), - (u32) prev_raw_count); + xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event), + prev_raw_count); } xgene_perf_enable_event(event); @@ -713,7 +1110,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) { struct xgene_pmu *xgene_pmu; - pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1; + if (pmu_dev->parent->version == PCP_PMU_V3) + pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD; + else + pmu_dev->max_period = PMU_CNT_MAX_PERIOD; /* First version PMU supports only single event counter */ xgene_pmu = pmu_dev->parent; if (xgene_pmu->version == PCP_PMU_V1) @@ -736,8 +1136,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) }; /* Hardware counter init */ - xgene_pmu_stop_counters(pmu_dev); - xgene_pmu_reset_counters(pmu_dev); + xgene_pmu->ops->stop_counters(pmu_dev); + xgene_pmu->ops->reset_counters(pmu_dev); return perf_pmu_register(&pmu_dev->pmu, name, -1); } @@ -758,20 +1158,38 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) switch (pmu->inf->type) { case PMU_TYPE_L3C: - pmu->attr_groups = l3c_pmu_attr_groups; + if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask)) + goto dev_err; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = l3c_pmu_v3_attr_groups; + else + pmu->attr_groups = l3c_pmu_attr_groups; break; case PMU_TYPE_IOB: - pmu->attr_groups = iob_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = iob_fast_pmu_v3_attr_groups; + else + pmu->attr_groups = iob_pmu_attr_groups; + break; + case PMU_TYPE_IOB_SLOW: + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = iob_slow_pmu_v3_attr_groups; break; case PMU_TYPE_MCB: if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask)) goto dev_err; - pmu->attr_groups = mcb_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = mcb_pmu_v3_attr_groups; + else + pmu->attr_groups = mcb_pmu_attr_groups; break; case PMU_TYPE_MC: if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask)) goto dev_err; - pmu->attr_groups = mc_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = mc_pmu_v3_attr_groups; + else + pmu->attr_groups = mc_pmu_attr_groups; break; default: return -EINVAL; @@ -795,18 +1213,27 @@ dev_err: static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) { struct xgene_pmu *xgene_pmu = pmu_dev->parent; + void __iomem *csr = pmu_dev->inf->csr; u32 pmovsr; int idx; - pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK; + xgene_pmu->ops->stop_counters(pmu_dev); + + if (xgene_pmu->version == PCP_PMU_V3) + pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK; + else + pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK; + if (!pmovsr) - return; + goto out; /* Clear interrupt flag */ if (xgene_pmu->version == PCP_PMU_V1) - writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR); + writel(0x0, csr + PMU_PMOVSR); + else if (xgene_pmu->version == PCP_PMU_V2) + writel(pmovsr, csr + PMU_PMOVSR); else - writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR); + writel(pmovsr, csr + PMU_PMOVSCLR); for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) { struct perf_event *event = pmu_dev->pmu_counter_event[idx]; @@ -818,10 +1245,14 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) xgene_perf_event_update(event); xgene_perf_event_set_period(event); } + +out: + xgene_pmu->ops->start_counters(pmu_dev); } static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) { + u32 intr_mcu, intr_mcb, intr_l3c, intr_iob; struct xgene_pmu_dev_ctx *ctx; struct xgene_pmu *xgene_pmu = dev_id; unsigned long flags; @@ -831,22 +1262,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) /* Get Interrupt PMU source */ val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG); - if (val & PCPPMU_INT_MCU) { + if (xgene_pmu->version == PCP_PMU_V3) { + intr_mcu = PCPPMU_V3_INT_MCU; + intr_mcb = PCPPMU_V3_INT_MCB; + intr_l3c = PCPPMU_V3_INT_L3C; + intr_iob = PCPPMU_V3_INT_IOB; + } else { + intr_mcu = PCPPMU_INT_MCU; + intr_mcb = PCPPMU_INT_MCB; + intr_l3c = PCPPMU_INT_L3C; + intr_iob = PCPPMU_INT_IOB; + } + if (val & intr_mcu) { list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_MCB) { + if (val & intr_mcb) { list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_L3C) { + if (val & intr_l3c) { list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_IOB) { + if (val & intr_iob) { list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } @@ -857,8 +1299,8 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { void __iomem *csw_csr, *mcba_csr, *mcbb_csr; struct resource *res; @@ -885,6 +1327,8 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return PTR_ERR(mcbb_csr); } + xgene_pmu->l3c_active_mask = 0x1; + reg = readl(csw_csr + CSW_CSWCR); if (reg & CSW_CSWCR_DUALMCB_MASK) { /* Dual MCB active */ @@ -905,8 +1349,56 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return 0; } -static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) +{ + void __iomem *csw_csr; + struct resource *res; + unsigned int reg; + u32 mcb0routing; + u32 mcb1routing; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + csw_csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(csw_csr)) { + dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); + return PTR_ERR(csw_csr); + } + + reg = readl(csw_csr + CSW_CSWCR); + mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg); + mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg); + if (reg & CSW_CSWCR_DUALMCB_MASK) { + /* Dual MCB active */ + xgene_pmu->mcb_active_mask = 0x3; + /* Probe all active L3C(s), maximum is 8 */ + xgene_pmu->l3c_active_mask = 0xFF; + /* Probe all active MC(s), maximum is 8 */ + if ((mcb0routing == 0x2) && (mcb1routing == 0x2)) + xgene_pmu->mc_active_mask = 0xFF; + else if ((mcb0routing == 0x1) && (mcb1routing == 0x1)) + xgene_pmu->mc_active_mask = 0x33; + else + xgene_pmu->mc_active_mask = 0x11; + } else { + /* Single MCB active */ + xgene_pmu->mcb_active_mask = 0x1; + /* Probe all active L3C(s), maximum is 4 */ + xgene_pmu->l3c_active_mask = 0x0F; + /* Probe all active MC(s), maximum is 4 */ + if (mcb0routing == 0x2) + xgene_pmu->mc_active_mask = 0x0F; + else if (mcb0routing == 0x1) + xgene_pmu->mc_active_mask = 0x03; + else + xgene_pmu->mc_active_mask = 0x01; + } + + return 0; +} + +static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { struct regmap *csw_map, *mcba_map, *mcbb_map; struct device_node *np = pdev->dev.of_node; @@ -930,6 +1422,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return PTR_ERR(mcbb_map); } + xgene_pmu->l3c_active_mask = 0x1; if (regmap_read(csw_map, CSW_CSWCR, ®)) return -EINVAL; @@ -954,12 +1447,18 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return 0; } -static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { - if (has_acpi_companion(&pdev->dev)) - return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); - return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); + if (has_acpi_companion(&pdev->dev)) { + if (xgene_pmu->version == PCP_PMU_V3) + return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu, + pdev); + else + return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, + pdev); + } + return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev); } static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) @@ -969,6 +1468,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id); case PMU_TYPE_IOB: return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id); + case PMU_TYPE_IOB_SLOW: + return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id); case PMU_TYPE_MCB: return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id); case PMU_TYPE_MC: @@ -1047,9 +1548,40 @@ err: return NULL; } +static const struct acpi_device_id xgene_pmu_acpi_type_match[] = { + {"APMC0D5D", PMU_TYPE_L3C}, + {"APMC0D5E", PMU_TYPE_IOB}, + {"APMC0D5F", PMU_TYPE_MCB}, + {"APMC0D60", PMU_TYPE_MC}, + {"APMC0D84", PMU_TYPE_L3C}, + {"APMC0D85", PMU_TYPE_IOB}, + {"APMC0D86", PMU_TYPE_IOB_SLOW}, + {"APMC0D87", PMU_TYPE_MCB}, + {"APMC0D88", PMU_TYPE_MC}, + {}, +}; + +static const struct acpi_device_id *xgene_pmu_acpi_match_type( + const struct acpi_device_id *ids, + struct acpi_device *adev) +{ + const struct acpi_device_id *match_id = NULL; + const struct acpi_device_id *id; + + for (id = ids; id->id[0] || id->cls; id++) { + if (!acpi_match_device_ids(adev, id)) + match_id = id; + else if (match_id) + break; + } + + return match_id; +} + static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, void *data, void **return_value) { + const struct acpi_device_id *acpi_id; struct xgene_pmu *xgene_pmu = data; struct xgene_pmu_dev_ctx *ctx; struct acpi_device *adev; @@ -1059,17 +1591,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, if (acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; - if (!strcmp(acpi_device_hid(adev), "APMC0D5D")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C); - else if (!strcmp(acpi_device_hid(adev), "APMC0D5E")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB); - else if (!strcmp(acpi_device_hid(adev), "APMC0D5F")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB); - else if (!strcmp(acpi_device_hid(adev), "APMC0D60")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC); - else - ctx = NULL; + acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev); + if (!acpi_id) + return AE_OK; + ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data); if (!ctx) return AE_OK; @@ -1086,6 +1612,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, case PMU_TYPE_IOB: list_add(&ctx->next, &xgene_pmu->iobpmus); break; + case PMU_TYPE_IOB_SLOW: + list_add(&ctx->next, &xgene_pmu->iobpmus); + break; case PMU_TYPE_MCB: list_add(&ctx->next, &xgene_pmu->mcbpmus); break; @@ -1207,6 +1736,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu, case PMU_TYPE_IOB: list_add(&ctx->next, &xgene_pmu->iobpmus); break; + case PMU_TYPE_IOB_SLOW: + list_add(&ctx->next, &xgene_pmu->iobpmus); + break; case PMU_TYPE_MCB: list_add(&ctx->next, &xgene_pmu->mcbpmus); break; @@ -1235,6 +1767,40 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = { .id = PCP_PMU_V2, }; +static const struct xgene_pmu_ops xgene_pmu_ops = { + .mask_int = xgene_pmu_mask_int, + .unmask_int = xgene_pmu_unmask_int, + .read_counter = xgene_pmu_read_counter32, + .write_counter = xgene_pmu_write_counter32, + .write_evttype = xgene_pmu_write_evttype, + .write_agentmsk = xgene_pmu_write_agentmsk, + .write_agent1msk = xgene_pmu_write_agent1msk, + .enable_counter = xgene_pmu_enable_counter, + .disable_counter = xgene_pmu_disable_counter, + .enable_counter_int = xgene_pmu_enable_counter_int, + .disable_counter_int = xgene_pmu_disable_counter_int, + .reset_counters = xgene_pmu_reset_counters, + .start_counters = xgene_pmu_start_counters, + .stop_counters = xgene_pmu_stop_counters, +}; + +static const struct xgene_pmu_ops xgene_pmu_v3_ops = { + .mask_int = xgene_pmu_v3_mask_int, + .unmask_int = xgene_pmu_v3_unmask_int, + .read_counter = xgene_pmu_read_counter64, + .write_counter = xgene_pmu_write_counter64, + .write_evttype = xgene_pmu_write_evttype, + .write_agentmsk = xgene_pmu_v3_write_agentmsk, + .write_agent1msk = xgene_pmu_v3_write_agent1msk, + .enable_counter = xgene_pmu_enable_counter, + .disable_counter = xgene_pmu_disable_counter, + .enable_counter_int = xgene_pmu_enable_counter_int, + .disable_counter_int = xgene_pmu_disable_counter_int, + .reset_counters = xgene_pmu_reset_counters, + .start_counters = xgene_pmu_start_counters, + .stop_counters = xgene_pmu_stop_counters, +}; + static const struct of_device_id xgene_pmu_of_match[] = { { .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data }, { .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data }, @@ -1245,6 +1811,7 @@ MODULE_DEVICE_TABLE(of, xgene_pmu_of_match); static const struct acpi_device_id xgene_pmu_acpi_match[] = { {"APMC0D5B", PCP_PMU_V1}, {"APMC0D5C", PCP_PMU_V2}, + {"APMC0D83", PCP_PMU_V3}, {}, }; MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match); @@ -1284,6 +1851,11 @@ static int xgene_pmu_probe(struct platform_device *pdev) if (version < 0) return -ENODEV; + if (version == PCP_PMU_V3) + xgene_pmu->ops = &xgene_pmu_v3_ops; + else + xgene_pmu->ops = &xgene_pmu_ops; + INIT_LIST_HEAD(&xgene_pmu->l3cpmus); INIT_LIST_HEAD(&xgene_pmu->iobpmus); INIT_LIST_HEAD(&xgene_pmu->mcbpmus); @@ -1317,7 +1889,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&xgene_pmu->lock); /* Check for active MCBs and MCUs */ - rc = xgene_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); + rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev); if (rc) { dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n"); xgene_pmu->mcb_active_mask = 0x1; @@ -1342,7 +1914,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) } /* Enable interrupt */ - xgene_pmu_unmask_int(xgene_pmu); + xgene_pmu->ops->unmask_int(xgene_pmu); return 0; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4ee55274f155..45629f4b5402 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (&m->list == &kclist_head) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if (is_vmalloc_or_module_addr((void *)start)) { + } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ if (copy_to_user(buffer, buf, tsz)) diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 3ff9acea8616..8379d406ad2e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); void acpi_configure_pmsi_domain(struct device *dev); @@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); #else static inline void acpi_iort_init(void) { } -static inline bool iort_node_match(u8 type) { return false; } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) { return req_id; } static inline struct irq_domain *iort_get_device_domain(struct device *dev, |