summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 20:16:05 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 20:16:05 +0200
commite831101a73fbc8339ef1d1909dad3ef64f089e70 (patch)
treec764ca5cb72cdf24ff26357dd12e16f9c7235627 /arch/arm64/kernel
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile (diff)
parentarm64: arm: Fix-up the removal of the arm64 regs_query_register_name() prototype (diff)
downloadlinux-e831101a73fbc8339ef1d1909dad3ef64f089e70.tar.xz
linux-e831101a73fbc8339ef1d1909dad3ef64f089e70.zip
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - Kexec support for arm64 - Kprobes support - Expose MIDR_EL1 and REVIDR_EL1 CPU identification registers to sysfs - Trapping of user space cache maintenance operations and emulation in the kernel (CPU errata workaround) - Clean-up of the early page tables creation (kernel linear mapping, EFI run-time maps) to avoid splitting larger blocks (e.g. pmds) into smaller ones (e.g. ptes) - VDSO support for CLOCK_MONOTONIC_RAW in clock_gettime() - ARCH_HAS_KCOV enabled for arm64 - Optimise IP checksum helpers - SWIOTLB optimisation to only allocate/initialise the buffer if the available RAM is beyond the 32-bit mask - Properly handle the "nosmp" command line argument - Fix for the initialisation of the CPU debug state during early boot - vdso-offsets.h build dependency workaround - Build fix when RANDOMIZE_BASE is enabled with MODULES off * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (64 commits) arm64: arm: Fix-up the removal of the arm64 regs_query_register_name() prototype arm64: Only select ARM64_MODULE_PLTS if MODULES=y arm64: mm: run pgtable_page_ctor() on non-swapper translation table pages arm64: mm: make create_mapping_late() non-allocating arm64: Honor nosmp kernel command line option arm64: Fix incorrect per-cpu usage for boot CPU arm64: kprobes: Add KASAN instrumentation around stack accesses arm64: kprobes: Cleanup jprobe_return arm64: kprobes: Fix overflow when saving stack arm64: kprobes: WARN if attempting to step with PSTATE.D=1 arm64: debug: remove unused local_dbg_{enable, disable} macros arm64: debug: remove redundant spsr manipulation arm64: debug: unmask PSTATE.D earlier arm64: localise Image objcopy flags arm64: ptrace: remove extra define for CPSR's E bit kprobes: Add arm64 case in kprobe example module arm64: Add kernel return probes support (kretprobes) arm64: Add trampoline code for kretprobes arm64: kprobes instruction simulation support arm64: Treat all entry code as non-kprobe-able ...
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile11
-rw-r--r--arch/arm64/kernel/arm64ksyms.c6
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c44
-rw-r--r--arch/arm64/kernel/asm-offsets.c17
-rw-r--r--arch/arm64/kernel/cpu-reset.S54
-rw-r--r--arch/arm64/kernel/cpu-reset.h34
-rw-r--r--arch/arm64/kernel/cpu_errata.c7
-rw-r--r--arch/arm64/kernel/cpufeature.c4
-rw-r--r--arch/arm64/kernel/cpuinfo.c120
-rw-r--r--arch/arm64/kernel/debug-monitors.c47
-rw-r--r--arch/arm64/kernel/efi.c50
-rw-r--r--arch/arm64/kernel/entry.S17
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/hyp-stub.S10
-rw-r--r--arch/arm64/kernel/insn.c133
-rw-r--r--arch/arm64/kernel/kgdb.c4
-rw-r--r--arch/arm64/kernel/machine_kexec.c212
-rw-r--r--arch/arm64/kernel/probes/Makefile3
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c174
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h35
-rw-r--r--arch/arm64/kernel/probes/kprobes.c686
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S81
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c217
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h28
-rw-r--r--arch/arm64/kernel/ptrace.c101
-rw-r--r--arch/arm64/kernel/relocate_kernel.S130
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/smp.c10
-rw-r--r--arch/arm64/kernel/traps.c152
-rw-r--r--arch/arm64/kernel/vdso.c8
-rw-r--r--arch/arm64/kernel/vdso/Makefile7
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S331
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S9
33 files changed, 2529 insertions, 223 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a5125c6d1f87..14f7b651c787 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
- sys_compat.o entry32.o \
- ../../arm/kernel/opcodes.o
+ sys_compat.o entry32.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
@@ -47,12 +46,10 @@ arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
+arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
+ cpu-reset.o
-obj-y += $(arm64-obj-y) vdso/
+obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
head-y := head.o
extra-y += $(head-y) vmlinux.lds
-
-# vDSO - this must be built first to generate the symbol offsets
-$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
-$(obj)/vdso/vdso-offsets.h: $(obj)/vdso
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 678f30b05a45..78f368039c79 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/arm-smccc.h>
+#include <linux/kprobes.h>
#include <asm/checksum.h>
@@ -34,8 +35,8 @@ EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
/* user mem (segment) */
-EXPORT_SYMBOL(__copy_from_user);
-EXPORT_SYMBOL(__copy_to_user);
+EXPORT_SYMBOL(__arch_copy_from_user);
+EXPORT_SYMBOL(__arch_copy_to_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__copy_in_user);
@@ -68,6 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit);
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
+NOKPROBE_SYMBOL(_mcount);
#endif
/* arm-smccc */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c37202c0c838..5f72475e2e3b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -316,28 +316,6 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
*/
#define TYPE_SWPB (1 << 22)
-/*
- * Set up process info to signal segmentation fault - called on access error.
- */
-static void set_segfault(struct pt_regs *regs, unsigned long addr)
-{
- siginfo_t info;
-
- down_read(&current->mm->mmap_sem);
- if (find_vma(current->mm, addr) == NULL)
- info.si_code = SEGV_MAPERR;
- else
- info.si_code = SEGV_ACCERR;
- up_read(&current->mm->mmap_sem);
-
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_addr = (void *) instruction_pointer(regs);
-
- pr_debug("SWP{B} emulation: access caused memory abort!\n");
- arm64_notify_die("Illegal memory access", regs, &info, 0);
-}
-
static int emulate_swpX(unsigned int address, unsigned int *data,
unsigned int type)
{
@@ -366,6 +344,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
+{
+ u32 cc_bits = opcode >> 28;
+
+ if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+ if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+ return ARM_OPCODE_CONDTEST_PASS;
+ else
+ return ARM_OPCODE_CONDTEST_FAIL;
+ }
+ return ARM_OPCODE_CONDTEST_UNCOND;
+}
+
/*
* swp_handler logs the id of calling process, dissects the instruction, sanity
* checks the memory location, calls emulate_swpX for the actual operation and
@@ -380,7 +373,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
type = instr & TYPE_SWPB;
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
@@ -430,7 +423,8 @@ ret:
return 0;
fault:
- set_segfault(regs, address);
+ pr_debug("SWP{B} emulation: access caused memory abort!\n");
+ arm64_notify_segfault(regs, address);
return 0;
}
@@ -461,7 +455,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
{
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 2f4ba774488a..05070b72fc28 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -51,6 +51,17 @@ int main(void)
DEFINE(S_X5, offsetof(struct pt_regs, regs[5]));
DEFINE(S_X6, offsetof(struct pt_regs, regs[6]));
DEFINE(S_X7, offsetof(struct pt_regs, regs[7]));
+ DEFINE(S_X8, offsetof(struct pt_regs, regs[8]));
+ DEFINE(S_X10, offsetof(struct pt_regs, regs[10]));
+ DEFINE(S_X12, offsetof(struct pt_regs, regs[12]));
+ DEFINE(S_X14, offsetof(struct pt_regs, regs[14]));
+ DEFINE(S_X16, offsetof(struct pt_regs, regs[16]));
+ DEFINE(S_X18, offsetof(struct pt_regs, regs[18]));
+ DEFINE(S_X20, offsetof(struct pt_regs, regs[20]));
+ DEFINE(S_X22, offsetof(struct pt_regs, regs[22]));
+ DEFINE(S_X24, offsetof(struct pt_regs, regs[24]));
+ DEFINE(S_X26, offsetof(struct pt_regs, regs[26]));
+ DEFINE(S_X28, offsetof(struct pt_regs, regs[28]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
#ifdef CONFIG_COMPAT
@@ -78,6 +89,7 @@ int main(void)
BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
@@ -85,6 +97,8 @@ int main(void)
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
+ DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
+ DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
@@ -92,7 +106,8 @@ int main(void)
DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec));
DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult));
+ DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
+ DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult));
DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
new file mode 100644
index 000000000000..65f42d257414
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -0,0 +1,54 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection .idmap.text, "ax"
+
+/*
+ * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
+ * cpu_soft_restart.
+ *
+ * @el2_switch: Flag to indicate a swich to EL2 is needed.
+ * @entry: Location to jump to for soft reset.
+ * arg0: First argument passed to @entry.
+ * arg1: Second argument passed to @entry.
+ * arg2: Third argument passed to @entry.
+ *
+ * Put the CPU into the same state as it would be if it had been reset, and
+ * branch to what would be the reset vector. It must be executed with the
+ * flat identity mapping.
+ */
+ENTRY(__cpu_soft_restart)
+ /* Clear sctlr_el1 flags. */
+ mrs x12, sctlr_el1
+ ldr x13, =SCTLR_ELx_FLAGS
+ bic x12, x12, x13
+ msr sctlr_el1, x12
+ isb
+
+ cbz x0, 1f // el2_switch?
+ mov x0, #HVC_SOFT_RESTART
+ hvc #0 // no return
+
+1: mov x18, x1 // entry
+ mov x0, x2 // arg0
+ mov x1, x3 // arg1
+ mov x2, x4 // arg2
+ br x18
+ENDPROC(__cpu_soft_restart)
+
+.popsection
diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h
new file mode 100644
index 000000000000..d4e9ecb264f0
--- /dev/null
+++ b/arch/arm64/kernel/cpu-reset.h
@@ -0,0 +1,34 @@
+/*
+ * CPU reset routines
+ *
+ * Copyright (C) 2015 Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_CPU_RESET_H
+#define _ARM64_CPU_RESET_H
+
+#include <asm/virt.h>
+
+void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry,
+ unsigned long arg0, unsigned long arg1, unsigned long arg2);
+
+static inline void __noreturn cpu_soft_restart(unsigned long el2_switch,
+ unsigned long entry, unsigned long arg0, unsigned long arg1,
+ unsigned long arg2)
+{
+ typeof(__cpu_soft_restart) *restart;
+
+ el2_switch = el2_switch && !is_kernel_in_hyp_mode() &&
+ is_hyp_mode_available();
+ restart = (void *)virt_to_phys(__cpu_soft_restart);
+
+ cpu_install_idmap();
+ restart(el2_switch, entry, arg0, arg1, arg2);
+ unreachable();
+}
+
+#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index af716b65110d..82b0fc2e637b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -46,6 +46,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.desc = "ARM errata 826319, 827319, 824069",
.capability = ARM64_WORKAROUND_CLEAN_CACHE,
MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02),
+ .enable = cpu_enable_cache_maint_trap,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_819472
@@ -54,6 +55,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.desc = "ARM errata 819472",
.capability = ARM64_WORKAROUND_CLEAN_CACHE,
MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01),
+ .enable = cpu_enable_cache_maint_trap,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_832075
@@ -133,3 +135,8 @@ void check_local_cpu_errata(void)
{
update_cpu_capabilities(arm64_errata, "enabling workaround for");
}
+
+void __init enable_errata_workarounds(void)
+{
+ enable_cpu_capabilities(arm64_errata);
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 811773d1c1d0..916d27ad79c1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -913,8 +913,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
* Run through the enabled capabilities and enable() it on all active
* CPUs
*/
-static void __init
-enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++)
if (caps->enable && cpus_have_cap(caps->capability))
@@ -1036,6 +1035,7 @@ void __init setup_cpu_features(void)
/* Set the CPU feature capabilies */
setup_feature_capabilities();
+ enable_errata_workarounds();
setup_elf_hwcaps(arm64_elf_hwcaps);
if (system_supports_32bit_el0())
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index c173d329397f..ed1b84fe6925 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -183,6 +183,123 @@ const struct seq_operations cpuinfo_op = {
.show = c_show
};
+
+static struct kobj_type cpuregs_kobj_type = {
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+/*
+ * The ARM ARM uses the phrase "32-bit register" to describe a register
+ * whose upper 32 bits are RES0 (per C5.1.1, ARM DDI 0487A.i), however
+ * no statement is made as to whether the upper 32 bits will or will not
+ * be made use of in future, and between ARM DDI 0487A.c and ARM DDI
+ * 0487A.d CLIDR_EL1 was expanded from 32-bit to 64-bit.
+ *
+ * Thus, while both MIDR_EL1 and REVIDR_EL1 are described as 32-bit
+ * registers, we expose them both as 64 bit values to cater for possible
+ * future expansion without an ABI break.
+ */
+#define kobj_to_cpuinfo(kobj) container_of(kobj, struct cpuinfo_arm64, kobj)
+#define CPUREGS_ATTR_RO(_name, _field) \
+ static ssize_t _name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+ { \
+ struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \
+ \
+ if (info->reg_midr) \
+ return sprintf(buf, "0x%016x\n", info->reg_##_field); \
+ else \
+ return 0; \
+ } \
+ static struct kobj_attribute cpuregs_attr_##_name = __ATTR_RO(_name)
+
+CPUREGS_ATTR_RO(midr_el1, midr);
+CPUREGS_ATTR_RO(revidr_el1, revidr);
+
+static struct attribute *cpuregs_id_attrs[] = {
+ &cpuregs_attr_midr_el1.attr,
+ &cpuregs_attr_revidr_el1.attr,
+ NULL
+};
+
+static struct attribute_group cpuregs_attr_group = {
+ .attrs = cpuregs_id_attrs,
+ .name = "identification"
+};
+
+static int cpuid_add_regs(int cpu)
+{
+ int rc;
+ struct device *dev;
+ struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+ dev = get_cpu_device(cpu);
+ if (!dev) {
+ rc = -ENODEV;
+ goto out;
+ }
+ rc = kobject_add(&info->kobj, &dev->kobj, "regs");
+ if (rc)
+ goto out;
+ rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group);
+ if (rc)
+ kobject_del(&info->kobj);
+out:
+ return rc;
+}
+
+static int cpuid_remove_regs(int cpu)
+{
+ struct device *dev;
+ struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+ dev = get_cpu_device(cpu);
+ if (!dev)
+ return -ENODEV;
+ if (info->kobj.parent) {
+ sysfs_remove_group(&info->kobj, &cpuregs_attr_group);
+ kobject_del(&info->kobj);
+ }
+
+ return 0;
+}
+
+static int cpuid_callback(struct notifier_block *nb,
+ unsigned long action, void *hcpu)
+{
+ int rc = 0;
+ unsigned long cpu = (unsigned long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ rc = cpuid_add_regs(cpu);
+ break;
+ case CPU_DEAD:
+ rc = cpuid_remove_regs(cpu);
+ break;
+ }
+
+ return notifier_from_errno(rc);
+}
+
+static int __init cpuinfo_regs_init(void)
+{
+ int cpu;
+
+ cpu_notifier_register_begin();
+
+ for_each_possible_cpu(cpu) {
+ struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+
+ kobject_init(&info->kobj, &cpuregs_kobj_type);
+ if (cpu_online(cpu))
+ cpuid_add_regs(cpu);
+ }
+ __hotcpu_notifier(cpuid_callback, 0);
+
+ cpu_notifier_register_done();
+ return 0;
+}
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
{
unsigned int cpu = smp_processor_id();
@@ -212,6 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_ctr = read_cpuid_cachetype();
info->reg_dczid = read_cpuid(DCZID_EL0);
info->reg_midr = read_cpuid_id();
+ info->reg_revidr = read_cpuid(REVIDR_EL1);
info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
@@ -264,3 +382,5 @@ void __init cpuinfo_store_boot_cpu(void)
boot_cpu_data = *info;
init_cpu_features(&boot_cpu_data);
}
+
+device_initcall(cpuinfo_regs_init);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 4fbf3c54275c..91fff48d0f57 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -23,6 +23,7 @@
#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/ptrace.h>
+#include <linux/kprobes.h>
#include <linux/stat.h>
#include <linux/uaccess.h>
@@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr)
asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
local_dbg_restore(flags);
}
+NOKPROBE_SYMBOL(mdscr_write);
static u32 mdscr_read(void)
{
@@ -55,6 +57,7 @@ static u32 mdscr_read(void)
asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
return mdscr;
}
+NOKPROBE_SYMBOL(mdscr_read);
/*
* Allow root to disable self-hosted debug from userspace.
@@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el)
mdscr_write(mdscr);
}
}
+NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
@@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el)
mdscr_write(mdscr);
}
}
+NOKPROBE_SYMBOL(disable_debug_monitors);
/*
* OS lock clearing.
@@ -151,7 +156,6 @@ static int debug_monitors_init(void)
/* Clear the OS lock. */
on_each_cpu(clear_os_lock, NULL, 1);
isb();
- local_dbg_enable();
/* Register hotplug handler. */
__register_cpu_notifier(&os_lock_nb);
@@ -166,22 +170,15 @@ postcore_initcall(debug_monitors_init);
*/
static void set_regs_spsr_ss(struct pt_regs *regs)
{
- unsigned long spsr;
-
- spsr = regs->pstate;
- spsr &= ~DBG_SPSR_SS;
- spsr |= DBG_SPSR_SS;
- regs->pstate = spsr;
+ regs->pstate |= DBG_SPSR_SS;
}
+NOKPROBE_SYMBOL(set_regs_spsr_ss);
static void clear_regs_spsr_ss(struct pt_regs *regs)
{
- unsigned long spsr;
-
- spsr = regs->pstate;
- spsr &= ~DBG_SPSR_SS;
- regs->pstate = spsr;
+ regs->pstate &= ~DBG_SPSR_SS;
}
+NOKPROBE_SYMBOL(clear_regs_spsr_ss);
/* EL1 Single Step Handler hooks */
static LIST_HEAD(step_hook);
@@ -225,6 +222,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
return retval;
}
+NOKPROBE_SYMBOL(call_step_hook);
static void send_user_sigtrap(int si_code)
{
@@ -266,6 +264,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
*/
user_rewind_single_step(current);
} else {
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return 0;
+#endif
if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
return 0;
@@ -279,6 +281,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
return 0;
}
+NOKPROBE_SYMBOL(single_step_handler);
/*
* Breakpoint handler is re-entrant as another breakpoint can
@@ -316,19 +319,28 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
}
+NOKPROBE_SYMBOL(call_break_hook);
static int brk_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
if (user_mode(regs)) {
send_user_sigtrap(TRAP_BRKPT);
- } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
- pr_warning("Unexpected kernel BRK exception at EL1\n");
+ }
+#ifdef CONFIG_KPROBES
+ else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
+ if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED)
+ return -EFAULT;
+ }
+#endif
+ else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+ pr_warn("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
}
return 0;
}
+NOKPROBE_SYMBOL(brk_handler);
int aarch32_break_handler(struct pt_regs *regs)
{
@@ -365,6 +377,7 @@ int aarch32_break_handler(struct pt_regs *regs)
send_user_sigtrap(TRAP_BRKPT);
return 0;
}
+NOKPROBE_SYMBOL(aarch32_break_handler);
static int __init debug_traps_init(void)
{
@@ -386,6 +399,7 @@ void user_rewind_single_step(struct task_struct *task)
if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP))
set_regs_spsr_ss(task_pt_regs(task));
}
+NOKPROBE_SYMBOL(user_rewind_single_step);
void user_fastforward_single_step(struct task_struct *task)
{
@@ -401,6 +415,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
mdscr_write(mdscr_read() | DBG_MDSCR_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
+NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
@@ -408,12 +423,14 @@ void kernel_disable_single_step(void)
mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
+NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
return mdscr_read() & DBG_MDSCR_SS;
}
+NOKPROBE_SYMBOL(kernel_active_single_step);
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
@@ -421,8 +438,10 @@ void user_enable_single_step(struct task_struct *task)
set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
set_regs_spsr_ss(task_pt_regs(task));
}
+NOKPROBE_SYMBOL(user_enable_single_step);
void user_disable_single_step(struct task_struct *task)
{
clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
}
+NOKPROBE_SYMBOL(user_disable_single_step);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 78f52488f9ff..ba9bee389fd5 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -62,13 +62,61 @@ struct screen_info screen_info __section(.data);
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
+ bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_DATA);
+
+ if (!PAGE_ALIGNED(md->phys_addr) ||
+ !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
+ /*
+ * If the end address of this region is not aligned to page
+ * size, the mapping is rounded up, and may end up sharing a
+ * page frame with the next UEFI memory region. If we create
+ * a block entry now, we may need to split it again when mapping
+ * the next region, and support for that is going to be removed
+ * from the MMU routines. So avoid block mappings altogether in
+ * that case.
+ */
+ allow_block_mappings = false;
+ }
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
- __pgprot(prot_val | PTE_NG));
+ __pgprot(prot_val | PTE_NG), allow_block_mappings);
+ return 0;
+}
+
+static int __init set_permissions(pte_t *ptep, pgtable_t token,
+ unsigned long addr, void *data)
+{
+ efi_memory_desc_t *md = data;
+ pte_t pte = *ptep;
+
+ if (md->attribute & EFI_MEMORY_RO)
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+ if (md->attribute & EFI_MEMORY_XP)
+ pte = set_pte_bit(pte, __pgprot(PTE_PXN));
+ set_pte(ptep, pte);
return 0;
}
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+ efi_memory_desc_t *md)
+{
+ BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
+ md->type != EFI_RUNTIME_SERVICES_DATA);
+
+ /*
+ * Calling apply_to_page_range() is only safe on regions that are
+ * guaranteed to be mapped down to pages. Since we are only called
+ * for regions that have been mapped using efi_create_mapping() above
+ * (and this is checked by the generic Memory Attributes table parsing
+ * routines), there is no need to check that again here.
+ */
+ return apply_to_page_range(mm, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+ set_permissions, md);
+}
+
static int __init arm64_dmi_init(void)
{
/*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6c3b7345a6c4..96e4a2b64cc1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -258,6 +258,7 @@ tsk .req x28 // current thread_info
/*
* Exception vectors.
*/
+ .pushsection ".entry.text", "ax"
.align 11
ENTRY(vectors)
@@ -466,7 +467,7 @@ el0_sync:
cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- b.eq el0_undef
+ b.eq el0_sys
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
@@ -547,7 +548,7 @@ el0_ia:
enable_dbg_and_irq
ct_user_exit
mov x0, x26
- orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
+ mov x1, x25
mov x2, sp
bl do_mem_abort
b ret_to_user
@@ -594,6 +595,16 @@ el0_undef:
mov x0, sp
bl do_undefinstr
b ret_to_user
+el0_sys:
+ /*
+ * System instructions, for trapped cache maintenance instructions
+ */
+ enable_dbg_and_irq
+ ct_user_exit
+ mov x0, x25
+ mov x1, sp
+ bl do_sysinstr
+ b ret_to_user
el0_dbg:
/*
* Debug exception handling
@@ -789,6 +800,8 @@ __ni_sys_trace:
bl do_ni_syscall
b __sys_trace_return
+ .popsection // .entry.text
+
/*
* Special system call wrappers.
*/
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index ce21aa88263f..26a6bf77d272 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -24,6 +24,7 @@
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hw_breakpoint.h>
+#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/smp.h>
@@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n)
return val;
}
+NOKPROBE_SYMBOL(read_wb_reg);
static void write_wb_reg(int reg, int n, u64 val)
{
@@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val)
}
isb();
}
+NOKPROBE_SYMBOL(write_wb_reg);
/*
* Convert a breakpoint privilege level to the corresponding exception
@@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
return -EINVAL;
}
}
+NOKPROBE_SYMBOL(debug_exception_level);
enum hw_breakpoint_ops {
HW_BREAKPOINT_INSTALL,
@@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
write_wb_reg(reg, i, ctrl);
}
}
+NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
@@ -654,6 +659,7 @@ unlock:
return 0;
}
+NOKPROBE_SYMBOL(breakpoint_handler);
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
@@ -756,6 +762,7 @@ unlock:
return 0;
}
+NOKPROBE_SYMBOL(watchpoint_handler);
/*
* Handle single-step exception.
@@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
return !handled_exception;
}
+NOKPROBE_SYMBOL(reinstall_suspended_bps);
/*
* Context-switcher for restoring suspended breakpoints.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 8727f4490772..d3b5f75e652e 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -71,8 +71,16 @@ el1_sync:
msr vbar_el2, x1
b 9f
+2: cmp x0, #HVC_SOFT_RESTART
+ b.ne 3f
+ mov x0, x2
+ mov x2, x4
+ mov x4, x1
+ mov x1, x3
+ br x4 // no return
+
/* Someone called kvm_call_hyp() against the hyp-stub... */
-2: mov x0, #ARM_EXCEPTION_HYP_GONE
+3: mov x0, #ARM_EXCEPTION_HYP_GONE
9: eret
ENDPROC(el1_sync)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 368c08290dd8..63f9432d05e8 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,6 +30,7 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
+#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
@@ -162,6 +163,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
aarch64_insn_is_nop(insn);
}
+bool __kprobes aarch64_insn_uses_literal(u32 insn)
+{
+ /* ldr/ldrsw (literal), prfm */
+
+ return aarch64_insn_is_ldr_lit(insn) ||
+ aarch64_insn_is_ldrsw_lit(insn) ||
+ aarch64_insn_is_adr_adrp(insn) ||
+ aarch64_insn_is_prfm_lit(insn);
+}
+
+bool __kprobes aarch64_insn_is_branch(u32 insn)
+{
+ /* b, bl, cb*, tb*, b.cond, br, blr */
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
/*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section B2.6.5 "Concurrent modification and execution of instructions":
@@ -1175,6 +1202,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset)
BUG();
}
+/*
+ * Extract the Op/CR data from a msr/mrs instruction.
+ */
+u32 aarch64_insn_extract_system_reg(u32 insn)
+{
+ return (insn & 0x1FFFE0) >> 5;
+}
+
bool aarch32_insn_is_wide(u32 insn)
{
return insn >= 0xe800;
@@ -1200,3 +1235,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn)
{
return insn & CRM_MASK;
}
+
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+ return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+ __check_eq, __check_ne, __check_cs, __check_cc,
+ __check_mi, __check_pl, __check_vs, __check_vc,
+ __check_hi, __check_ls, __check_ge, __check_lt,
+ __check_gt, __check_le, __check_al, __check_al
+};
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index b5f063e5eff7..8c57f6496e56 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -22,6 +22,7 @@
#include <linux/irq.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
+#include <linux/kprobes.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -230,6 +231,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return 0;
}
+NOKPROBE_SYMBOL(kgdb_brk_fn)
static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
{
@@ -238,12 +240,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
return 0;
}
+NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
{
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return 0;
}
+NOKPROBE_SYMBOL(kgdb_step_brk_fn);
static struct break_hook kgdb_brkpt_hook = {
.esr_mask = 0xffffffff,
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 000000000000..bc96c8a7fc79
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,212 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+/**
+ * kexec_image_info - For debugging output.
+ */
+#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
+static void _kexec_image_info(const char *func, int line,
+ const struct kimage *kimage)
+{
+ unsigned long i;
+
+ pr_debug("%s:%d:\n", func, line);
+ pr_debug(" kexec kimage info:\n");
+ pr_debug(" type: %d\n", kimage->type);
+ pr_debug(" start: %lx\n", kimage->start);
+ pr_debug(" head: %lx\n", kimage->head);
+ pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
+
+ for (i = 0; i < kimage->nr_segments; i++) {
+ pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+ i,
+ kimage->segment[i].mem,
+ kimage->segment[i].mem + kimage->segment[i].memsz,
+ kimage->segment[i].memsz,
+ kimage->segment[i].memsz / PAGE_SIZE);
+ }
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+ /* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
+ * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+ kimage_start = kimage->start;
+
+ kexec_image_info(kimage);
+
+ if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
+ pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+ kimage_entry_t *entry;
+
+ for (entry = &kimage->head; ; entry++) {
+ unsigned int flag;
+ void *addr;
+
+ /* flush the list entries. */
+ __flush_dcache_area(entry, sizeof(kimage_entry_t));
+
+ flag = *entry & IND_FLAGS;
+ if (flag == IND_DONE)
+ break;
+
+ addr = phys_to_virt(*entry & PAGE_MASK);
+
+ switch (flag) {
+ case IND_INDIRECTION:
+ /* Set entry point just before the new list page. */
+ entry = (kimage_entry_t *)addr - 1;
+ break;
+ case IND_SOURCE:
+ /* flush the source pages. */
+ __flush_dcache_area(addr, PAGE_SIZE);
+ break;
+ case IND_DESTINATION:
+ break;
+ default:
+ BUG();
+ }
+ }
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+ unsigned long i;
+
+ pr_debug("%s:\n", __func__);
+
+ for (i = 0; i < kimage->nr_segments; i++) {
+ pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+ i,
+ kimage->segment[i].mem,
+ kimage->segment[i].mem + kimage->segment[i].memsz,
+ kimage->segment[i].memsz,
+ kimage->segment[i].memsz / PAGE_SIZE);
+
+ __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+ kimage->segment[i].memsz);
+ }
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+ phys_addr_t reboot_code_buffer_phys;
+ void *reboot_code_buffer;
+
+ /*
+ * New cpus may have become stuck_in_kernel after we loaded the image.
+ */
+ BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1));
+
+ reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+ reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+ kexec_image_info(kimage);
+
+ pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
+ kimage->control_code_page);
+ pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
+ &reboot_code_buffer_phys);
+ pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
+ reboot_code_buffer);
+ pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
+ arm64_relocate_new_kernel);
+ pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
+ __func__, __LINE__, arm64_relocate_new_kernel_size,
+ arm64_relocate_new_kernel_size);
+
+ /*
+ * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+ * after the kernel is shut down.
+ */
+ memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+ arm64_relocate_new_kernel_size);
+
+ /* Flush the reboot_code_buffer in preparation for its execution. */
+ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+ flush_icache_range((uintptr_t)reboot_code_buffer,
+ arm64_relocate_new_kernel_size);
+
+ /* Flush the kimage list and its buffers. */
+ kexec_list_flush(kimage);
+
+ /* Flush the new image if already in place. */
+ if (kimage->head & IND_DONE)
+ kexec_segment_flush(kimage);
+
+ pr_info("Bye!\n");
+
+ /* Disable all DAIF exceptions. */
+ asm volatile ("msr daifset, #0xf" : : : "memory");
+
+ /*
+ * cpu_soft_restart will shutdown the MMU, disable data caches, then
+ * transfer control to the reboot_code_buffer which contains a copy of
+ * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
+ * uses physical addressing to relocate the new image to its final
+ * position and transfers control to the image entry point when the
+ * relocation is complete.
+ */
+
+ cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
+ kimage_start, 0);
+
+ BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ /* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
new file mode 100644
index 000000000000..ce06312e3d34
--- /dev/null
+++ b/arch/arm64/kernel/probes/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \
+ kprobes_trampoline.o \
+ simulate-insn.o
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
new file mode 100644
index 000000000000..37e47a9d617e
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -0,0 +1,174 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <asm/kprobes.h>
+#include <asm/insn.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static bool __kprobes aarch64_insn_is_steppable(u32 insn)
+{
+ /*
+ * Branch instructions will write a new value into the PC which is
+ * likely to be relative to the XOL address and therefore invalid.
+ * Deliberate generation of an exception during stepping is also not
+ * currently safe. Lastly, MSR instructions can do any number of nasty
+ * things we can't handle during single-stepping.
+ */
+ if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) {
+ if (aarch64_insn_is_branch(insn) ||
+ aarch64_insn_is_msr_imm(insn) ||
+ aarch64_insn_is_msr_reg(insn) ||
+ aarch64_insn_is_exception(insn) ||
+ aarch64_insn_is_eret(insn))
+ return false;
+
+ /*
+ * The MRS instruction may not return a correct value when
+ * executing in the single-stepping environment. We do make one
+ * exception, for reading the DAIF bits.
+ */
+ if (aarch64_insn_is_mrs(insn))
+ return aarch64_insn_extract_system_reg(insn)
+ != AARCH64_INSN_SPCLREG_DAIF;
+
+ /*
+ * The HINT instruction is is problematic when single-stepping,
+ * except for the NOP case.
+ */
+ if (aarch64_insn_is_hint(insn))
+ return aarch64_insn_is_nop(insn);
+
+ return true;
+ }
+
+ /*
+ * Instructions which load PC relative literals are not going to work
+ * when executed from an XOL slot. Instructions doing an exclusive
+ * load/store are not going to complete successfully when single-step
+ * exception handling happens in the middle of the sequence.
+ */
+ if (aarch64_insn_uses_literal(insn) ||
+ aarch64_insn_is_exclusive(insn))
+ return false;
+
+ return true;
+}
+
+/* Return:
+ * INSN_REJECTED If instruction is one not allowed to kprobe,
+ * INSN_GOOD If instruction is supported and uses instruction slot,
+ * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+static enum kprobe_insn __kprobes
+arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+ /*
+ * Instructions reading or modifying the PC won't work from the XOL
+ * slot.
+ */
+ if (aarch64_insn_is_steppable(insn))
+ return INSN_GOOD;
+
+ if (aarch64_insn_is_bcond(insn)) {
+ asi->handler = simulate_b_cond;
+ } else if (aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn)) {
+ asi->handler = simulate_cbz_cbnz;
+ } else if (aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn)) {
+ asi->handler = simulate_tbz_tbnz;
+ } else if (aarch64_insn_is_adr_adrp(insn)) {
+ asi->handler = simulate_adr_adrp;
+ } else if (aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn)) {
+ asi->handler = simulate_b_bl;
+ } else if (aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_ret(insn)) {
+ asi->handler = simulate_br_blr_ret;
+ } else if (aarch64_insn_is_ldr_lit(insn)) {
+ asi->handler = simulate_ldr_literal;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ asi->handler = simulate_ldrsw_literal;
+ } else {
+ /*
+ * Instruction cannot be stepped out-of-line and we don't
+ * (yet) simulate it.
+ */
+ return INSN_REJECTED;
+ }
+
+ return INSN_GOOD_NO_SLOT;
+}
+
+static bool __kprobes
+is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
+{
+ while (scan_start > scan_end) {
+ /*
+ * atomic region starts from exclusive load and ends with
+ * exclusive store.
+ */
+ if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
+ return false;
+ else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
+ return true;
+ scan_start--;
+ }
+
+ return false;
+}
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
+{
+ enum kprobe_insn decoded;
+ kprobe_opcode_t insn = le32_to_cpu(*addr);
+ kprobe_opcode_t *scan_start = addr - 1;
+ kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+ struct module *mod;
+#endif
+
+ if (addr >= (kprobe_opcode_t *)_text &&
+ scan_end < (kprobe_opcode_t *)_text)
+ scan_end = (kprobe_opcode_t *)_text;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+ else {
+ preempt_disable();
+ mod = __module_address((unsigned long)addr);
+ if (mod && within_module_init((unsigned long)addr, mod) &&
+ !within_module_init((unsigned long)scan_end, mod))
+ scan_end = (kprobe_opcode_t *)mod->init_layout.base;
+ else if (mod && within_module_core((unsigned long)addr, mod) &&
+ !within_module_core((unsigned long)scan_end, mod))
+ scan_end = (kprobe_opcode_t *)mod->core_layout.base;
+ preempt_enable();
+ }
+#endif
+ decoded = arm_probe_decode_insn(insn, asi);
+
+ if (decoded == INSN_REJECTED ||
+ is_probed_address_atomic(scan_start, scan_end))
+ return INSN_REJECTED;
+
+ return decoded;
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
new file mode 100644
index 000000000000..d438289646a6
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -0,0 +1,35 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_ARM64_H
+#define _ARM_KERNEL_KPROBES_ARM64_H
+
+/*
+ * ARM strongly recommends a limit of 128 bytes between LoadExcl and
+ * StoreExcl instructions in a single thread of execution. So keep the
+ * max atomic context size as 32.
+ */
+#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
+
+enum kprobe_insn {
+ INSN_REJECTED,
+ INSN_GOOD_NO_SLOT,
+ INSN_GOOD,
+};
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+
+#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
new file mode 100644
index 000000000000..bf9768588288
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -0,0 +1,686 @@
+/*
+ * arch/arm64/kernel/probes/kprobes.c
+ *
+ * Kprobes support for ARM64
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/stringify.h>
+#include <asm/traps.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/system_misc.h>
+#include <asm/insn.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm-generic/sections.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+
+static inline unsigned long min_stack_size(unsigned long addr)
+{
+ unsigned long size;
+
+ if (on_irq_stack(addr, raw_smp_processor_id()))
+ size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
+ else
+ size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
+
+ return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
+}
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+ /* prepare insn slot */
+ p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+
+ flush_icache_range((uintptr_t) (p->ainsn.insn),
+ (uintptr_t) (p->ainsn.insn) +
+ MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+ /*
+ * Needs restoring of return address after stepping xol.
+ */
+ p->ainsn.restore = (unsigned long) p->addr +
+ sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+ /* This instructions is not executed xol. No need to adjust the PC */
+ p->ainsn.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (p->ainsn.handler)
+ p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+
+ /* single step simulated, now go for post processing */
+ post_kprobe_handler(kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long probe_addr = (unsigned long)p->addr;
+ extern char __start_rodata[];
+ extern char __end_rodata[];
+
+ if (probe_addr & 0x3)
+ return -EINVAL;
+
+ /* copy instruction */
+ p->opcode = le32_to_cpu(*p->addr);
+
+ if (in_exception_text(probe_addr))
+ return -EINVAL;
+ if (probe_addr >= (unsigned long) __start_rodata &&
+ probe_addr <= (unsigned long) __end_rodata)
+ return -EINVAL;
+
+ /* decode instruction */
+ switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) {
+ case INSN_REJECTED: /* insn not supported */
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT: /* insn need simulation */
+ p->ainsn.insn = NULL;
+ break;
+
+ case INSN_GOOD: /* instruction uses slot */
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+ break;
+ };
+
+ /* prepare the instruction */
+ if (p->ainsn.insn)
+ arch_prepare_ss_slot(p);
+ else
+ arch_prepare_simulate(p);
+
+ return 0;
+}
+
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+ void *addrs[1];
+ u32 insns[1];
+
+ addrs[0] = (void *)addr;
+ insns[0] = (u32)opcode;
+
+ return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
+/* arm kprobe: install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, BRK64_OPCODE_KPROBES);
+}
+
+/* disarm kprobe: remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * The D-flag (Debug mask) is set (masked) upon debug exception entry.
+ * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive
+ * probe i.e. when probe hit from kprobe handler context upon
+ * executing the pre/post handlers. In this case we return with
+ * D-flag clear so that single-stepping can be carried-out.
+ *
+ * Leave D-flag set in all other cases.
+ */
+static void __kprobes
+spsr_set_debug_flag(struct pt_regs *regs, int mask)
+{
+ unsigned long spsr = regs->pstate;
+
+ if (mask)
+ spsr |= PSR_D_BIT;
+ else
+ spsr &= ~PSR_D_BIT;
+
+ regs->pstate = spsr;
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ kcb->saved_irqflag = regs->pstate;
+ regs->pstate |= PSR_I_BIT;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ if (kcb->saved_irqflag & PSR_I_BIT)
+ regs->pstate |= PSR_I_BIT;
+ else
+ regs->pstate &= ~PSR_I_BIT;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+ kcb->ss_ctx.ss_pending = true;
+ kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+ kcb->ss_ctx.ss_pending = false;
+ kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, int reenter)
+{
+ unsigned long slot;
+
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+
+
+ if (p->ainsn.insn) {
+ /* prepare for single stepping */
+ slot = (unsigned long)p->ainsn.insn;
+
+ set_ss_context(kcb, slot); /* mark pending ss */
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 0);
+ else
+ WARN_ON(regs->pstate & PSR_D_BIT);
+
+ /* IRQs and single stepping do not mix well. */
+ kprobes_save_local_irqflag(kcb, regs);
+ kernel_enable_single_step(regs);
+ instruction_pointer_set(regs, slot);
+ } else {
+ /* insn simulation */
+ arch_simulate_insn(p, regs);
+ }
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ setup_singlestep(p, regs, kcb, 1);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr);
+ dump_kprobe(p);
+ BUG();
+ break;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+
+ if (!cur)
+ return;
+
+ /* return addr restore if non-branching insn */
+ if (cur->ainsn.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.restore);
+
+ /* restore back original saved kprobe variables and continue */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ return;
+ }
+ /* call post handler */
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ if (cur->post_handler) {
+ /* post_handler can hit breakpoint and single step
+ * again, so we enable D-flag for recursive exception.
+ */
+ cur->post_handler(cur, regs, 0);
+ }
+
+ reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ instruction_pointer_set(regs, (unsigned long) cur->addr);
+ if (!instruction_pointer(regs))
+ BUG();
+
+ kernel_disable_single_step();
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 1);
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+ }
+ return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+static void __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p, *cur_kprobe;
+ struct kprobe_ctlblk *kcb;
+ unsigned long addr = instruction_pointer(regs);
+
+ kcb = get_kprobe_ctlblk();
+ cur_kprobe = kprobe_running();
+
+ p = get_kprobe((kprobe_opcode_t *) addr);
+
+ if (p) {
+ if (cur_kprobe) {
+ if (reenter_kprobe(p, regs, kcb))
+ return;
+ } else {
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for calling the break_handler below on re-entry,
+ * so get out doing nothing more here.
+ *
+ * pre_handler can hit a breakpoint and can step thru
+ * before return, keep PSTATE D-flag enabled until
+ * pre_handler return back.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ setup_singlestep(p, regs, kcb, 0);
+ return;
+ }
+ }
+ } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) ==
+ BRK64_OPCODE_KPROBES) && cur_kprobe) {
+ /* We probably hit a jprobe. Call its break handler. */
+ if (cur_kprobe->break_handler &&
+ cur_kprobe->break_handler(cur_kprobe, regs)) {
+ setup_singlestep(cur_kprobe, regs, kcb, 0);
+ return;
+ }
+ }
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Return back to original instruction, and continue.
+ */
+}
+
+static int __kprobes
+kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+ if ((kcb->ss_ctx.ss_pending)
+ && (kcb->ss_ctx.match_addr == addr)) {
+ clear_ss_context(kcb); /* clear pending ss */
+ return DBG_HOOK_HANDLED;
+ }
+ /* not ours, kprobes should ignore it */
+ return DBG_HOOK_ERROR;
+}
+
+int __kprobes
+kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ int retval;
+
+ /* return error if this is not our step */
+ retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+
+ if (retval == DBG_HOOK_HANDLED) {
+ kprobes_restore_local_irqflag(kcb, regs);
+ kernel_disable_single_step();
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 1);
+
+ post_kprobe_handler(kcb, regs);
+ }
+
+ return retval;
+}
+
+int __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+{
+ kprobe_handler(regs);
+ return DBG_HOOK_HANDLED;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ long stack_ptr = kernel_stack_pointer(regs);
+
+ kcb->jprobe_saved_regs = *regs;
+ /*
+ * As Linus pointed out, gcc assumes that the callee
+ * owns the argument space and could overwrite it, e.g.
+ * tailcall optimization. So, to be absolutely safe
+ * we also save and restore enough stack bytes to cover
+ * the argument area.
+ */
+ kasan_disable_current();
+ memcpy(kcb->jprobes_stack, (void *)stack_ptr,
+ min_stack_size(stack_ptr));
+ kasan_enable_current();
+
+ instruction_pointer_set(regs, (unsigned long) jp->entry);
+ preempt_disable();
+ pause_graph_tracing();
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * Jprobe handler return by entering break exception,
+ * encoded same as kprobe, but with following conditions
+ * -a special PC to identify it from the other kprobes.
+ * -restore stack addr to original saved pt_regs
+ */
+ asm volatile(" mov sp, %0 \n"
+ "jprobe_return_break: brk %1 \n"
+ :
+ : "r" (kcb->jprobe_saved_regs.sp),
+ "I" (BRK64_ESR_KPROBES)
+ : "memory");
+
+ unreachable();
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ long stack_addr = kcb->jprobe_saved_regs.sp;
+ long orig_sp = kernel_stack_pointer(regs);
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ extern const char jprobe_return_break[];
+
+ if (instruction_pointer(regs) != (u64) jprobe_return_break)
+ return 0;
+
+ if (orig_sp != stack_addr) {
+ struct pt_regs *saved_regs =
+ (struct pt_regs *)kcb->jprobe_saved_regs.sp;
+ pr_err("current sp %lx does not match saved sp %lx\n",
+ orig_sp, stack_addr);
+ pr_err("Saved registers for jprobe %p\n", jp);
+ show_regs(saved_regs);
+ pr_err("Current registers\n");
+ show_regs(regs);
+ BUG();
+ }
+ unpause_graph_tracing();
+ *regs = kcb->jprobe_saved_regs;
+ kasan_disable_current();
+ memcpy((void *)stack_addr, kcb->jprobes_stack,
+ min_stack_size(stack_addr));
+ kasan_enable_current();
+ preempt_enable_no_resched();
+ return 1;
+}
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ extern char __idmap_text_start[], __idmap_text_end[];
+ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+ if ((addr >= (unsigned long)__kprobes_text_start &&
+ addr < (unsigned long)__kprobes_text_end) ||
+ (addr >= (unsigned long)__entry_text_start &&
+ addr < (unsigned long)__entry_text_end) ||
+ (addr >= (unsigned long)__idmap_text_start &&
+ addr < (unsigned long)__idmap_text_end) ||
+ !!search_exception_tables(addr))
+ return true;
+
+ if (!is_kernel_in_hyp_mode()) {
+ if ((addr >= (unsigned long)__hyp_text_start &&
+ addr < (unsigned long)__hyp_text_end) ||
+ (addr >= (unsigned long)__hyp_idmap_text_start &&
+ addr < (unsigned long)__hyp_idmap_text_end))
+ return true;
+ }
+
+ return false;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =
+ (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * return probes installed on them, and/or more than one
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always pushed into the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the (chronologically) first instance's ret_addr
+ * will be the real return address, and all the rest will
+ * point to kretprobe_trampoline.
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (ri->rp && ri->rp->handler) {
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_hash_unlock(current, &flags);
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ return (void *)orig_ret_address;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
+
+ /* replace return addr (x30) with trampoline */
+ regs->regs[30] = (long)&kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+ return 0;
+}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
new file mode 100644
index 000000000000..5d6e7f14638c
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -0,0 +1,81 @@
+/*
+ * trampoline entry and return code for kretprobes.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+ .text
+
+ .macro save_all_base_regs
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ add x0, sp, #S_FRAME_SIZE
+ stp lr, x0, [sp, #S_LR]
+ /*
+ * Construct a useful saved PSTATE
+ */
+ mrs x0, nzcv
+ mrs x1, daif
+ orr x0, x0, x1
+ mrs x1, CurrentEL
+ orr x0, x0, x1
+ mrs x1, SPSel
+ orr x0, x0, x1
+ stp xzr, x0, [sp, #S_PC]
+ .endm
+
+ .macro restore_all_base_regs
+ ldr x0, [sp, #S_PSTATE]
+ and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
+ msr nzcv, x0
+ ldp x0, x1, [sp, #S_X0]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldp x8, x9, [sp, #S_X8]
+ ldp x10, x11, [sp, #S_X10]
+ ldp x12, x13, [sp, #S_X12]
+ ldp x14, x15, [sp, #S_X14]
+ ldp x16, x17, [sp, #S_X16]
+ ldp x18, x19, [sp, #S_X18]
+ ldp x20, x21, [sp, #S_X20]
+ ldp x22, x23, [sp, #S_X22]
+ ldp x24, x25, [sp, #S_X24]
+ ldp x26, x27, [sp, #S_X26]
+ ldp x28, x29, [sp, #S_X28]
+ .endm
+
+ENTRY(kretprobe_trampoline)
+ sub sp, sp, #S_FRAME_SIZE
+
+ save_all_base_regs
+
+ mov x0, sp
+ bl trampoline_probe_handler
+ /*
+ * Replace trampoline address in lr with actual orig_ret_addr return
+ * address.
+ */
+ mov lr, x0
+
+ restore_all_base_regs
+
+ add sp, sp, #S_FRAME_SIZE
+ ret
+
+ENDPROC(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
new file mode 100644
index 000000000000..8977ce9d009d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -0,0 +1,217 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "simulate-insn.h"
+
+#define sign_extend(x, signbit) \
+ ((x) | (0 - ((x) & (1 << (signbit)))))
+
+#define bbl_displacement(insn) \
+ sign_extend(((insn) & 0x3ffffff) << 2, 27)
+
+#define bcond_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define cbz_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define tbz_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x3fff) << 2, 15)
+
+#define ldr_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
+{
+ if (reg < 31)
+ regs->regs[reg] = val;
+}
+
+static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val)
+{
+ if (reg < 31)
+ regs->regs[reg] = lower_32_bits(val);
+}
+
+static inline u64 get_x_reg(struct pt_regs *regs, int reg)
+{
+ if (reg < 31)
+ return regs->regs[reg];
+ else
+ return 0;
+}
+
+static inline u32 get_w_reg(struct pt_regs *regs, int reg)
+{
+ if (reg < 31)
+ return lower_32_bits(regs->regs[reg]);
+ else
+ return 0;
+}
+
+static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+
+ return (opcode & (1 << 31)) ?
+ (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0);
+}
+
+static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+
+ return (opcode & (1 << 31)) ?
+ (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0);
+}
+
+static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+ int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+ return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0;
+}
+
+static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+ int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+ return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0;
+}
+
+/*
+ * instruction simulation functions
+ */
+void __kprobes
+simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs)
+{
+ long imm, xn, val;
+
+ xn = opcode & 0x1f;
+ imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3);
+ imm = sign_extend(imm, 20);
+ if (opcode & 0x80000000)
+ val = (imm<<12) + (addr & 0xfffffffffffff000);
+ else
+ val = imm + addr;
+
+ set_x_reg(regs, xn, val);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = bbl_displacement(opcode);
+
+ /* Link register is x30 */
+ if (opcode & (1 << 31))
+ set_x_reg(regs, 30, addr + 4);
+
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff))
+ disp = bcond_displacement(opcode);
+
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int xn = (opcode >> 5) & 0x1f;
+
+ /* update pc first in case we're doing a "blr lr" */
+ instruction_pointer_set(regs, get_x_reg(regs, xn));
+
+ /* Link register is x30 */
+ if (((opcode >> 21) & 0x3) == 1)
+ set_x_reg(regs, 30, addr + 4);
+}
+
+void __kprobes
+simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (opcode & (1 << 24)) {
+ if (check_cbnz(opcode, regs))
+ disp = cbz_displacement(opcode);
+ } else {
+ if (check_cbz(opcode, regs))
+ disp = cbz_displacement(opcode);
+ }
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (opcode & (1 << 24)) {
+ if (check_tbnz(opcode, regs))
+ disp = tbz_displacement(opcode);
+ } else {
+ if (check_tbz(opcode, regs))
+ disp = tbz_displacement(opcode);
+ }
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+ u64 *load_addr;
+ int xn = opcode & 0x1f;
+ int disp;
+
+ disp = ldr_displacement(opcode);
+ load_addr = (u64 *) (addr + disp);
+
+ if (opcode & (1 << 30)) /* x0-x30 */
+ set_x_reg(regs, xn, *load_addr);
+ else /* w0-w30 */
+ set_w_reg(regs, xn, *load_addr);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+ s32 *load_addr;
+ int xn = opcode & 0x1f;
+ int disp;
+
+ disp = ldr_displacement(opcode);
+ load_addr = (s32 *) (addr + disp);
+
+ set_x_reg(regs, xn, *load_addr);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
new file mode 100644
index 000000000000..050bde683c2d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -0,0 +1,28 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+
+void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+
+#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 3f6cd5c5234f..030c1d5aa46d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -48,6 +48,107 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+#define GPR_OFFSET_NAME(r) \
+ {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])},
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(pc),
+ REG_OFFSET_NAME(pstate),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ return ((addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
+ on_irq_stack(addr, raw_smp_processor_id());
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
+}
+
/*
* TODO: does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..51b73cdde287
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,130 @@
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location. To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+ENTRY(arm64_relocate_new_kernel)
+
+ /* Setup the list loop variables. */
+ mov x17, x1 /* x17 = kimage_start */
+ mov x16, x0 /* x16 = kimage_head */
+ dcache_line_size x15, x0 /* x15 = dcache line size */
+ mov x14, xzr /* x14 = entry ptr */
+ mov x13, xzr /* x13 = copy dest */
+
+ /* Clear the sctlr_el2 flags. */
+ mrs x0, CurrentEL
+ cmp x0, #CurrentEL_EL2
+ b.ne 1f
+ mrs x0, sctlr_el2
+ ldr x1, =SCTLR_ELx_FLAGS
+ bic x0, x0, x1
+ msr sctlr_el2, x0
+ isb
+1:
+
+ /* Check if the new image needs relocation. */
+ tbnz x16, IND_DONE_BIT, .Ldone
+
+.Lloop:
+ and x12, x16, PAGE_MASK /* x12 = addr */
+
+ /* Test the entry flags. */
+.Ltest_source:
+ tbz x16, IND_SOURCE_BIT, .Ltest_indirection
+
+ /* Invalidate dest page to PoC. */
+ mov x0, x13
+ add x20, x0, #PAGE_SIZE
+ sub x1, x15, #1
+ bic x0, x0, x1
+2: dc ivac, x0
+ add x0, x0, x15
+ cmp x0, x20
+ b.lo 2b
+ dsb sy
+
+ mov x20, x13
+ mov x21, x12
+ copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+ /* dest += PAGE_SIZE */
+ add x13, x13, PAGE_SIZE
+ b .Lnext
+
+.Ltest_indirection:
+ tbz x16, IND_INDIRECTION_BIT, .Ltest_destination
+
+ /* ptr = addr */
+ mov x14, x12
+ b .Lnext
+
+.Ltest_destination:
+ tbz x16, IND_DESTINATION_BIT, .Lnext
+
+ /* dest = addr */
+ mov x13, x12
+
+.Lnext:
+ /* entry = *ptr++ */
+ ldr x16, [x14], #8
+
+ /* while (!(entry & DONE)) */
+ tbz x16, IND_DONE_BIT, .Lloop
+
+.Ldone:
+ /* wait for writes from copy_page to finish */
+ dsb nsh
+ ic iallu
+ dsb nsh
+ isb
+
+ /* Start new image. */
+ mov x0, xzr
+ mov x1, xzr
+ mov x2, xzr
+ mov x3, xzr
+ br x17
+
+ENDPROC(arm64_relocate_new_kernel)
+
+.ltorg
+
+.align 3 /* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+ .quad .Lcopy_end - arm64_relocate_new_kernel
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 92f0e1e767cf..5b8256770e22 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -202,7 +202,7 @@ static void __init request_standard_resources(void)
struct resource *res;
kernel_code.start = virt_to_phys(_text);
- kernel_code.end = virt_to_phys(_etext - 1);
+ kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a68e0ccd9f4b..76a6d9263908 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void)
set_cpu_online(cpu, true);
complete(&cpu_running);
- local_dbg_enable();
local_irq_enable();
local_async_enable();
@@ -437,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
cpuinfo_store_boot_cpu();
save_boot_cpu_run_el();
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
static u64 __init of_get_cpu_mpidr(struct device_node *dn)
@@ -696,6 +695,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
smp_store_cpu_info(smp_processor_id());
/*
+ * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
+ * secondary CPUs present.
+ */
+ if (max_cpus == 0)
+ return;
+
+ /*
* Initialise the present map (which describes the set of CPUs
* actually populated at the present time) and release the
* secondaries from the bootloader.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 2a43012616b7..e04f83873af7 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -41,6 +41,7 @@
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
+#include <asm/sysreg.h>
static const char *handler[]= {
"Synchronous Abort",
@@ -52,15 +53,14 @@ static const char *handler[]= {
int show_unhandled_signals = 1;
/*
- * Dump out the contents of some memory nicely...
+ * Dump out the contents of some kernel memory nicely...
*/
static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
- unsigned long top, bool compat)
+ unsigned long top)
{
unsigned long first;
mm_segment_t fs;
int i;
- unsigned int width = compat ? 4 : 8;
/*
* We need to switch to kernel mode so that we can use __get_user
@@ -78,22 +78,15 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
memset(str, ' ', sizeof(str));
str[sizeof(str) - 1] = '\0';
- for (p = first, i = 0; i < (32 / width)
- && p < top; i++, p += width) {
+ for (p = first, i = 0; i < (32 / 8)
+ && p < top; i++, p += 8) {
if (p >= bottom && p < top) {
unsigned long val;
- if (width == 8) {
- if (__get_user(val, (unsigned long *)p) == 0)
- sprintf(str + i * 17, " %016lx", val);
- else
- sprintf(str + i * 17, " ????????????????");
- } else {
- if (__get_user(val, (unsigned int *)p) == 0)
- sprintf(str + i * 9, " %08lx", val);
- else
- sprintf(str + i * 9, " ????????");
- }
+ if (__get_user(val, (unsigned long *)p) == 0)
+ sprintf(str + i * 17, " %016lx", val);
+ else
+ sprintf(str + i * 17, " ????????????????");
}
}
printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
@@ -216,7 +209,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
dump_mem("", "Exception stack", stack,
- stack + sizeof(struct pt_regs), false);
+ stack + sizeof(struct pt_regs));
}
}
}
@@ -254,10 +247,9 @@ static int __die(const char *str, int err, struct thread_info *thread,
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
- if (!user_mode(regs) || in_interrupt()) {
+ if (!user_mode(regs)) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
- THREAD_SIZE + (unsigned long)task_stack_page(tsk),
- compat_user_mode(regs));
+ THREAD_SIZE + (unsigned long)task_stack_page(tsk));
dump_backtrace(regs, tsk);
dump_instr(KERN_EMERG, regs);
}
@@ -373,11 +365,59 @@ exit:
return fn ? fn(regs, instr) : 1;
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+static void force_signal_inject(int signal, int code, struct pt_regs *regs,
+ unsigned long address)
{
siginfo_t info;
void __user *pc = (void __user *)instruction_pointer(regs);
+ const char *desc;
+ switch (signal) {
+ case SIGILL:
+ desc = "undefined instruction";
+ break;
+ case SIGSEGV:
+ desc = "illegal memory access";
+ break;
+ default:
+ desc = "bad mode";
+ break;
+ }
+
+ if (unhandled_signal(current, signal) &&
+ show_unhandled_signals_ratelimited()) {
+ pr_info("%s[%d]: %s: pc=%p\n",
+ current->comm, task_pid_nr(current), desc, pc);
+ dump_instr(KERN_INFO, regs);
+ }
+
+ info.si_signo = signal;
+ info.si_errno = 0;
+ info.si_code = code;
+ info.si_addr = pc;
+
+ arm64_notify_die(desc, regs, &info, 0);
+}
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr)
+{
+ int code;
+
+ down_read(&current->mm->mmap_sem);
+ if (find_vma(current->mm, addr) == NULL)
+ code = SEGV_MAPERR;
+ else
+ code = SEGV_ACCERR;
+ up_read(&current->mm->mmap_sem);
+
+ force_signal_inject(SIGSEGV, code, regs, addr);
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
/* check for AArch32 breakpoint instructions */
if (!aarch32_break_handler(regs))
return;
@@ -385,18 +425,66 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
if (call_undef_hook(regs) == 0)
return;
- if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) {
- pr_info("%s[%d]: undefined instruction: pc=%p\n",
- current->comm, task_pid_nr(current), pc);
- dump_instr(KERN_INFO, regs);
- }
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+}
- info.si_signo = SIGILL;
- info.si_errno = 0;
- info.si_code = ILL_ILLOPC;
- info.si_addr = pc;
+void cpu_enable_cache_maint_trap(void *__unused)
+{
+ config_sctlr_el1(SCTLR_EL1_UCI, 0);
+}
+
+#define __user_cache_maint(insn, address, res) \
+ asm volatile ( \
+ "1: " insn ", %1\n" \
+ " mov %w0, #0\n" \
+ "2:\n" \
+ " .pushsection .fixup,\"ax\"\n" \
+ " .align 2\n" \
+ "3: mov %w0, %w2\n" \
+ " b 2b\n" \
+ " .popsection\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : "=r" (res) \
+ : "r" (address), "i" (-EFAULT) )
+
+asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+{
+ unsigned long address;
+ int ret;
- arm64_notify_die("Oops - undefined instruction", regs, &info, 0);
+ /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */
+ if ((esr & 0x01fffc01) == 0x0012dc00) {
+ int rt = (esr >> 5) & 0x1f;
+ int crm = (esr >> 1) & 0x0f;
+
+ address = (rt == 31) ? 0 : regs->regs[rt];
+
+ switch (crm) {
+ case 11: /* DC CVAU, gets promoted */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case 10: /* DC CVAC, gets promoted */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case 14: /* DC CIVAC */
+ __user_cache_maint("dc civac", address, ret);
+ break;
+ case 5: /* IC IVAU */
+ __user_cache_maint("ic ivau", address, ret);
+ break;
+ default:
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+ return;
+ }
+ } else {
+ force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+ return;
+ }
+
+ if (ret)
+ arm64_notify_segfault(regs, address);
+ else
+ regs->pc += 4;
}
long compat_arm_syscall(struct pt_regs *regs);
@@ -465,7 +553,7 @@ static const char *esr_class_str[] = {
const char *esr_get_class_string(u32 esr)
{
- return esr_class_str[esr >> ESR_ELx_EC_SHIFT];
+ return esr_class_str[ESR_ELx_EC(esr)];
}
/*
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 9fefb005812a..076312b17d4f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -214,10 +214,16 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
+ /* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
+ vdso_data->raw_time_sec = tk->raw_time.tv_sec;
+ vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mult = tk->tkr_mono.mult;
+ /* tkr_raw.xtime_nsec == 0 */
+ vdso_data->cs_mono_mult = tk->tkr_mono.mult;
+ vdso_data->cs_raw_mult = tk->tkr_raw.mult;
+ /* tkr_mono.shift == tkr_raw.shift */
vdso_data->cs_shift = tk->tkr_mono.shift;
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b467fd0a384b..62c84f7cb01b 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -23,7 +23,7 @@ GCOV_PROFILE := n
ccflags-y += -Wl,-shared
obj-y += vdso.o
-extra-y += vdso.lds vdso-offsets.h
+extra-y += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Force dependency (incbin is bad)
@@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
define cmd_vdsosym
- $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \
- cp $@ include/generated/
+ $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
endef
-$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
# Assembly rules for the .S files
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index efa79e8d4196..e00b4671bd7c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -26,24 +26,109 @@
#define NSEC_PER_SEC_HI16 0x3b9a
vdso_data .req x6
-use_syscall .req w7
-seqcnt .req w8
+seqcnt .req w7
+w_tmp .req w8
+x_tmp .req x8
+
+/*
+ * Conventions for macro arguments:
+ * - An argument is write-only if its name starts with "res".
+ * - All other arguments are read-only, unless otherwise specified.
+ */
.macro seqcnt_acquire
9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
tbnz seqcnt, #0, 9999b
dmb ishld
- ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL]
.endm
- .macro seqcnt_read, cnt
+ .macro seqcnt_check fail
dmb ishld
- ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
+ ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
+ cmp w_tmp, seqcnt
+ b.ne \fail
.endm
- .macro seqcnt_check, cnt, fail
- cmp \cnt, seqcnt
- b.ne \fail
+ .macro syscall_check fail
+ ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
+ cbnz w_tmp, \fail
+ .endm
+
+ .macro get_nsec_per_sec res
+ mov \res, #NSEC_PER_SEC_LO16
+ movk \res, #NSEC_PER_SEC_HI16, lsl #16
+ .endm
+
+ /*
+ * Returns the clock delta, in nanoseconds left-shifted by the clock
+ * shift.
+ */
+ .macro get_clock_shifted_nsec res, cycle_last, mult
+ /* Read the virtual counter. */
+ isb
+ mrs x_tmp, cntvct_el0
+ /* Calculate cycle delta and convert to ns. */
+ sub \res, x_tmp, \cycle_last
+ /* We can only guarantee 56 bits of precision. */
+ movn x_tmp, #0xff00, lsl #48
+ and \res, x_tmp, \res
+ mul \res, \res, \mult
+ .endm
+
+ /*
+ * Returns in res_{sec,nsec} the REALTIME timespec, based on the
+ * "wall time" (xtime) and the clock_mono delta.
+ */
+ .macro get_ts_realtime res_sec, res_nsec, \
+ clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
+ add \res_nsec, \clock_nsec, \xtime_nsec
+ udiv x_tmp, \res_nsec, \nsec_to_sec
+ add \res_sec, \xtime_sec, x_tmp
+ msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
+ .endm
+
+ /*
+ * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
+ * used for CLOCK_MONOTONIC_RAW.
+ */
+ .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
+ udiv \res_sec, \clock_nsec, \nsec_to_sec
+ msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
+ .endm
+
+ /* sec and nsec are modified in place. */
+ .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
+ /* Add timespec. */
+ add \sec, \sec, \ts_sec
+ add \nsec, \nsec, \ts_nsec
+
+ /* Normalise the new timespec. */
+ cmp \nsec, \nsec_to_sec
+ b.lt 9999f
+ sub \nsec, \nsec, \nsec_to_sec
+ add \sec, \sec, #1
+9999:
+ cmp \nsec, #0
+ b.ge 9998f
+ add \nsec, \nsec, \nsec_to_sec
+ sub \sec, \sec, #1
+9998:
+ .endm
+
+ .macro clock_gettime_return, shift=0
+ .if \shift == 1
+ lsr x11, x11, x12
+ .endif
+ stp x10, x11, [x1, #TSPEC_TV_SEC]
+ mov x0, xzr
+ ret
+ .endm
+
+ .macro jump_slot jumptable, index, label
+ .if (. - \jumptable) != 4 * (\index)
+ .error "Jump slot index mismatch"
+ .endif
+ b \label
.endm
.text
@@ -51,18 +136,25 @@ seqcnt .req w8
/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
ENTRY(__kernel_gettimeofday)
.cfi_startproc
- mov x2, x30
- .cfi_register x30, x2
-
- /* Acquire the sequence counter and get the timespec. */
adr vdso_data, _vdso_data
-1: seqcnt_acquire
- cbnz use_syscall, 4f
-
/* If tv is NULL, skip to the timezone code. */
cbz x0, 2f
- bl __do_get_tspec
- seqcnt_check w9, 1b
+
+ /* Compute the time of day. */
+1: seqcnt_acquire
+ syscall_check fail=4f
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ seqcnt_check fail=1b
+
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
+
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
/* Convert ns to us. */
mov x13, #1000
@@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday)
stp w4, w5, [x1, #TZ_MINWEST]
3:
mov x0, xzr
- ret x2
+ ret
4:
/* Syscall fallback. */
mov x8, #__NR_gettimeofday
svc #0
- ret x2
+ ret
.cfi_endproc
ENDPROC(__kernel_gettimeofday)
+#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
+
/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
ENTRY(__kernel_clock_gettime)
.cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- b.ne 2f
+ cmp w0, #JUMPSLOT_MAX
+ b.hi syscall
+ adr vdso_data, _vdso_data
+ adr x_tmp, jumptable
+ add x_tmp, x_tmp, w0, uxtw #2
+ br x_tmp
+
+ ALIGN
+jumptable:
+ jump_slot jumptable, CLOCK_REALTIME, realtime
+ jump_slot jumptable, CLOCK_MONOTONIC, monotonic
+ b syscall
+ b syscall
+ jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
+ jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
+ jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
+
+ .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
+ .error "Wrong jumptable size"
+ .endif
+
+ ALIGN
+realtime:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ seqcnt_check fail=realtime
- mov x2, x30
- .cfi_register x30, x2
+ /* All computations are done with left-shifted nsecs. */
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- /* Get kernel timespec. */
- adr vdso_data, _vdso_data
-1: seqcnt_acquire
- cbnz use_syscall, 7f
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return, shift=1
- bl __do_get_tspec
- seqcnt_check w9, 1b
+ ALIGN
+monotonic:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
+ seqcnt_check fail=monotonic
- mov x30, x2
+ /* All computations are done with left-shifted nsecs. */
+ lsl x4, x4, x12
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- cmp w0, #CLOCK_MONOTONIC
- b.ne 6f
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- /* Get wtm timespec. */
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+ add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
+ clock_gettime_return, shift=1
- /* Check the sequence counter. */
- seqcnt_read w9
- seqcnt_check w9, 1b
- b 4f
-2:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 8f
+ ALIGN
+monotonic_raw:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_raw_mult, w12 = cs_shift */
+ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
+ ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
+ seqcnt_check fail=monotonic_raw
- /* xtime_coarse_nsec is already right-shifted */
- mov x12, #0
+ /* All computations are done with left-shifted nsecs. */
+ lsl x14, x14, x12
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- /* Get coarse timespec. */
- adr vdso_data, _vdso_data
-3: seqcnt_acquire
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_clock_raw res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, nsec_to_sec=x9
+
+ add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return, shift=1
+
+ ALIGN
+realtime_coarse:
+ seqcnt_acquire
ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
+ seqcnt_check fail=realtime_coarse
+ clock_gettime_return
- /* Get wtm timespec. */
+ ALIGN
+monotonic_coarse:
+ seqcnt_acquire
+ ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+ seqcnt_check fail=monotonic_coarse
- /* Check the sequence counter. */
- seqcnt_read w9
- seqcnt_check w9, 3b
+ /* Computations are done in (non-shifted) nsecs. */
+ get_nsec_per_sec res=x9
+ add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return
- cmp w0, #CLOCK_MONOTONIC_COARSE
- b.ne 6f
-4:
- /* Add on wtm timespec. */
- add x10, x10, x13
- lsl x14, x14, x12
- add x11, x11, x14
-
- /* Normalise the new timespec. */
- mov x15, #NSEC_PER_SEC_LO16
- movk x15, #NSEC_PER_SEC_HI16, lsl #16
- lsl x15, x15, x12
- cmp x11, x15
- b.lt 5f
- sub x11, x11, x15
- add x10, x10, #1
-5:
- cmp x11, #0
- b.ge 6f
- add x11, x11, x15
- sub x10, x10, #1
-
-6: /* Store to the user timespec. */
- lsr x11, x11, x12
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
-7:
- mov x30, x2
-8: /* Syscall fallback. */
+ ALIGN
+syscall: /* Syscall fallback. */
mov x8, #__NR_clock_gettime
svc #0
ret
@@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres)
.cfi_startproc
cmp w0, #CLOCK_REALTIME
ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
+ ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
b.ne 1f
ldr x2, 5f
@@ -203,46 +327,3 @@ ENTRY(__kernel_clock_getres)
.quad CLOCK_COARSE_RES
.cfi_endproc
ENDPROC(__kernel_clock_getres)
-
-/*
- * Read the current time from the architected counter.
- * Expects vdso_data to be initialised.
- * Clobbers the temporary registers (x9 - x15).
- * Returns:
- * - w9 = vDSO sequence counter
- * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec)
- * - w12 = cs_shift
- */
-ENTRY(__do_get_tspec)
- .cfi_startproc
-
- /* Read from the vDSO data page. */
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp w11, w12, [vdso_data, #VDSO_CS_MULT]
- seqcnt_read w9
-
- /* Read the virtual counter. */
- isb
- mrs x15, cntvct_el0
-
- /* Calculate cycle delta and convert to ns. */
- sub x10, x15, x10
- /* We can only guarantee 56 bits of precision. */
- movn x15, #0xff00, lsl #48
- and x10, x15, x10
- mul x10, x10, x11
-
- /* Use the kernel time to calculate the new timespec. */
- mov x11, #NSEC_PER_SEC_LO16
- movk x11, #NSEC_PER_SEC_HI16, lsl #16
- lsl x11, x11, x12
- add x15, x10, x14
- udiv x14, x15, x11
- add x10, x13, x14
- mul x13, x14, x11
- sub x11, x15, x13
-
- ret
- .cfi_endproc
-ENDPROC(__do_get_tspec)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 435e820e898d..89d6e177ecbd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -118,9 +118,11 @@ SECTIONS
__exception_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
@@ -131,12 +133,13 @@ SECTIONS
}
. = ALIGN(SEGMENT_ALIGN);
- RO_DATA(PAGE_SIZE) /* everything from this point to */
- EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
+ _etext = .; /* End of text section */
+
+ RO_DATA(PAGE_SIZE) /* everything from this point to */
+ EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
NOTES
. = ALIGN(SEGMENT_ALIGN);
- _etext = .; /* End of text and rodata section */
__init_begin = .;
INIT_TEXT_SECTION(8)