summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/alternative.c253
-rw-r--r--arch/x86/kernel/asm-offsets_32.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/hygon.c4
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c12
-rw-r--r--arch/x86/kernel/doublefault_32.c4
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/head_32.S18
-rw-r--r--arch/x86/kernel/jump_label.c32
-rw-r--r--arch/x86/kernel/kprobes/core.c596
-rw-r--r--arch/x86/kernel/kprobes/opt.c9
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/setup_percpu.c1
-rw-r--r--arch/x86/kernel/sev-es.c63
-rw-r--r--arch/x86/kernel/smpboot.c90
-rw-r--r--arch/x86/kernel/static_call.c4
-rw-r--r--arch/x86/kernel/tls.c8
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kernel/umip.c2
-rw-r--r--arch/x86/kernel/uprobes.c8
22 files changed, 561 insertions, 574 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f810e6fececd..84ec0ba491e4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -75,186 +75,30 @@ do { \
} \
} while (0)
-/*
- * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
- * that correspond to that nop. Getting from one nop to the next, we
- * add to the array the offset that is equal to the sum of all sizes of
- * nops preceding the one we are after.
- *
- * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
- * nice symmetry of sizes of the previous nops.
- */
-#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
-static const unsigned char intelnops[] =
-{
- GENERIC_NOP1,
- GENERIC_NOP2,
- GENERIC_NOP3,
- GENERIC_NOP4,
- GENERIC_NOP5,
- GENERIC_NOP6,
- GENERIC_NOP7,
- GENERIC_NOP8,
- GENERIC_NOP5_ATOMIC
-};
-static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
+const unsigned char x86nops[] =
{
- NULL,
- intelnops,
- intelnops + 1,
- intelnops + 1 + 2,
- intelnops + 1 + 2 + 3,
- intelnops + 1 + 2 + 3 + 4,
- intelnops + 1 + 2 + 3 + 4 + 5,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+ BYTES_NOP1,
+ BYTES_NOP2,
+ BYTES_NOP3,
+ BYTES_NOP4,
+ BYTES_NOP5,
+ BYTES_NOP6,
+ BYTES_NOP7,
+ BYTES_NOP8,
};
-#endif
-#ifdef K8_NOP1
-static const unsigned char k8nops[] =
-{
- K8_NOP1,
- K8_NOP2,
- K8_NOP3,
- K8_NOP4,
- K8_NOP5,
- K8_NOP6,
- K8_NOP7,
- K8_NOP8,
- K8_NOP5_ATOMIC
-};
-static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
+const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
{
NULL,
- k8nops,
- k8nops + 1,
- k8nops + 1 + 2,
- k8nops + 1 + 2 + 3,
- k8nops + 1 + 2 + 3 + 4,
- k8nops + 1 + 2 + 3 + 4 + 5,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+ x86nops,
+ x86nops + 1,
+ x86nops + 1 + 2,
+ x86nops + 1 + 2 + 3,
+ x86nops + 1 + 2 + 3 + 4,
+ x86nops + 1 + 2 + 3 + 4 + 5,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
-#endif
-
-#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
-static const unsigned char k7nops[] =
-{
- K7_NOP1,
- K7_NOP2,
- K7_NOP3,
- K7_NOP4,
- K7_NOP5,
- K7_NOP6,
- K7_NOP7,
- K7_NOP8,
- K7_NOP5_ATOMIC
-};
-static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
-{
- NULL,
- k7nops,
- k7nops + 1,
- k7nops + 1 + 2,
- k7nops + 1 + 2 + 3,
- k7nops + 1 + 2 + 3 + 4,
- k7nops + 1 + 2 + 3 + 4 + 5,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
-#endif
-
-#ifdef P6_NOP1
-static const unsigned char p6nops[] =
-{
- P6_NOP1,
- P6_NOP2,
- P6_NOP3,
- P6_NOP4,
- P6_NOP5,
- P6_NOP6,
- P6_NOP7,
- P6_NOP8,
- P6_NOP5_ATOMIC
-};
-static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
-{
- NULL,
- p6nops,
- p6nops + 1,
- p6nops + 1 + 2,
- p6nops + 1 + 2 + 3,
- p6nops + 1 + 2 + 3 + 4,
- p6nops + 1 + 2 + 3 + 4 + 5,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
- p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-};
-#endif
-
-/* Initialize these to a safe default */
-#ifdef CONFIG_X86_64
-const unsigned char * const *ideal_nops = p6_nops;
-#else
-const unsigned char * const *ideal_nops = intel_nops;
-#endif
-
-void __init arch_init_ideal_nops(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- /*
- * Due to a decoder implementation quirk, some
- * specific Intel CPUs actually perform better with
- * the "k8_nops" than with the SDM-recommended NOPs.
- */
- if (boot_cpu_data.x86 == 6 &&
- boot_cpu_data.x86_model >= 0x0f &&
- boot_cpu_data.x86_model != 0x1c &&
- boot_cpu_data.x86_model != 0x26 &&
- boot_cpu_data.x86_model != 0x27 &&
- boot_cpu_data.x86_model < 0x30) {
- ideal_nops = k8_nops;
- } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
- ideal_nops = p6_nops;
- } else {
-#ifdef CONFIG_X86_64
- ideal_nops = k8_nops;
-#else
- ideal_nops = intel_nops;
-#endif
- }
- break;
-
- case X86_VENDOR_HYGON:
- ideal_nops = p6_nops;
- return;
-
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 > 0xf) {
- ideal_nops = p6_nops;
- return;
- }
-
- fallthrough;
-
- default:
-#ifdef CONFIG_X86_64
- ideal_nops = k8_nops;
-#else
- if (boot_cpu_has(X86_FEATURE_K8))
- ideal_nops = k8_nops;
- else if (boot_cpu_has(X86_FEATURE_K7))
- ideal_nops = k7_nops;
- else
- ideal_nops = intel_nops;
-#endif
- }
-}
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
static void __init_or_module add_nops(void *insns, unsigned int len)
@@ -263,7 +107,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
unsigned int noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
- memcpy(insns, ideal_nops[noplen], noplen);
+ memcpy(insns, x86_nops[noplen], noplen);
insns += noplen;
len -= noplen;
}
@@ -345,19 +189,35 @@ done:
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
{
unsigned long flags;
- int i;
+ struct insn insn;
+ int nop, i = 0;
- for (i = 0; i < a->padlen; i++) {
- if (instr[i] != 0x90)
+ /*
+ * Jump over the non-NOP insns, the remaining bytes must be single-byte
+ * NOPs, optimize them.
+ */
+ for (;;) {
+ if (insn_decode_kernel(&insn, &instr[i]))
+ return;
+
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+ break;
+
+ if ((i += insn.length) >= a->instrlen)
+ return;
+ }
+
+ for (nop = i; i < a->instrlen; i++) {
+ if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
return;
}
local_irq_save(flags);
- add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+ add_nops(instr + nop, i - nop);
local_irq_restore(flags);
DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
- instr, a->instrlen - a->padlen, a->padlen);
+ instr, nop, a->instrlen);
}
/*
@@ -403,19 +263,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* - feature not present but ALTINSTR_FLAG_INV is set to mean,
* patch if feature is *NOT* present.
*/
- if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
- if (a->padlen > 1)
- optimize_nops(a, instr);
-
- continue;
- }
+ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
+ goto next;
- DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d",
+ DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
feature >> 5,
feature & 0x1f,
instr, instr, a->instrlen,
- replacement, a->replacementlen, a->padlen);
+ replacement, a->replacementlen);
DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
@@ -439,14 +295,15 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
if (a->replacementlen && is_jmp(replacement[0]))
recompute_jump(a, instr, replacement, insn_buff);
- if (a->instrlen > a->replacementlen) {
- add_nops(insn_buff + a->replacementlen,
- a->instrlen - a->replacementlen);
- insn_buff_sz += a->instrlen - a->replacementlen;
- }
+ for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
+ insn_buff[insn_buff_sz] = 0x90;
+
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
text_poke_early(instr, insn_buff, insn_buff_sz);
+
+next:
+ optimize_nops(a, instr);
}
}
@@ -1310,15 +1167,15 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
const void *opcode, size_t len, const void *emulate)
{
struct insn insn;
+ int ret;
memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
- kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
- insn_get_length(&insn);
+ ret = insn_decode_kernel(&insn, emulate);
- BUG_ON(!insn_complete(&insn));
+ BUG_ON(ret < 0);
BUG_ON(len != insn.length);
tp->rel_addr = addr - (void *)_stext;
@@ -1338,13 +1195,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
default: /* assume NOP */
switch (len) {
case 2: /* NOP2 -- emulate as JMP8+0 */
- BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP8_INSN_OPCODE;
tp->rel32 = 0;
break;
case 5: /* NOP5 -- emulate as JMP32+0 */
- BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ BUG_ON(memcmp(emulate, x86_nops[len], len));
tp->opcode = JMP32_INSN_OPCODE;
tp->rel32 = 0;
break;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 6e043f295a60..2b411cd00a4e 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -53,11 +53,6 @@ void foo(void)
offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
offsetofend(struct cpu_entry_area, entry_stack_page.stack));
-#ifdef CONFIG_STACKPROTECTOR
- BLANK();
- OFFSET(stack_canary_offset, stack_canary, canary);
-#endif
-
BLANK();
DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 347a956f71ca..2d11384dc9ab 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -628,11 +628,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_init_amd_mc(c);
-#ifdef CONFIG_X86_32
- if (c->x86 == 6)
- set_cpu_cap(c, X86_FEATURE_K7);
-#endif
-
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 99e1656b326e..6bdb69a9a7dc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -161,7 +161,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
- GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -599,7 +598,6 @@ void load_percpu_segment(int cpu)
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
- load_stack_canary_segment();
}
#ifdef CONFIG_X86_32
@@ -1798,7 +1796,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
#ifdef CONFIG_STACKPROTECTOR
-DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index ae59115d18f9..0bd6c74e3ba1 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -215,12 +215,12 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
u32 ecx;
ecx = cpuid_ecx(0x8000001e);
- nodes_per_socket = ((ecx >> 8) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1;
} else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
- nodes_per_socket = ((value >> 3) & 7) + 1;
+ __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1;
}
if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index e309476743b7..acfd5d9f93c6 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 55ffa84d30d6..17e631443116 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -218,15 +218,15 @@ static struct severity {
static bool is_copy_from_user(struct pt_regs *regs)
{
u8 insn_buf[MAX_INSN_SIZE];
- struct insn insn;
unsigned long addr;
+ struct insn insn;
+ int ret;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
- kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
- insn_get_opcode(&insn);
- if (!insn.opcode.got)
+ ret = insn_decode_kernel(&insn, insn_buf);
+ if (ret < 0)
return false;
switch (insn.opcode.value) {
@@ -234,10 +234,6 @@ static bool is_copy_from_user(struct pt_regs *regs)
case 0x8A: case 0x8B:
/* MOVZ mem,reg */
case 0xB60F: case 0xB70F:
- insn_get_modrm(&insn);
- insn_get_sib(&insn);
- if (!insn.modrm.got || !insn.sib.got)
- return false;
addr = (unsigned long)insn_get_addr_ref(&insn, regs);
break;
/* REP MOVS */
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 759d392cbe9f..d1d49e3d536b 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -100,9 +100,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
.ss = __KERNEL_DS,
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
-#ifndef CONFIG_X86_32_LAZY_GS
- .gs = __KERNEL_STACK_CANARY,
-#endif
+ .gs = 0,
.__cr3 = __pa_nodebug(swapper_pg_dir),
},
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7edbd5ee5ed4..1b3ce3b4a2a2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -66,7 +66,7 @@ int ftrace_arch_code_modify_post_process(void)
static const char *ftrace_nop_replace(void)
{
- return ideal_nops[NOP_ATOMIC5];
+ return x86_nops[5];
}
static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
@@ -377,7 +377,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + (jmp_offset - start_offset);
if (WARN_ON(*(char *)ip != 0x75))
goto fail;
- ret = copy_from_kernel_nofault(ip, ideal_nops[2], 2);
+ ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
if (ret < 0)
goto fail;
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7ed84c282233..67f590425d90 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -318,8 +318,8 @@ SYM_FUNC_START(startup_32_smp)
movl $(__KERNEL_PERCPU), %eax
movl %eax,%fs # set this cpu's percpu
- movl $(__KERNEL_STACK_CANARY),%eax
- movl %eax,%gs
+ xorl %eax,%eax
+ movl %eax,%gs # clear possible garbage in %gs
xorl %eax,%eax # Clear LDT
lldt %ax
@@ -339,20 +339,6 @@ SYM_FUNC_END(startup_32_smp)
*/
__INIT
setup_once:
-#ifdef CONFIG_STACKPROTECTOR
- /*
- * Configure the stack canary. The linker can't handle this by
- * relocation. Manually set base address in stack canary
- * segment descriptor.
- */
- movl $gdt_page,%eax
- movl $stack_canary,%ecx
- movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
- shrl $16, %ecx
- movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
- movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
-#endif
-
andl $0,setup_once_ref /* Once is enough, thanks */
ret
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 5ba8477c2cb7..6a2eb62c85e6 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -28,10 +28,8 @@ static void bug_at(const void *ip, int line)
}
static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
+__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
{
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
const void *expect, *code;
const void *addr, *dest;
int line;
@@ -41,10 +39,8 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
- if (init) {
- expect = default_nop; line = __LINE__;
- } else if (type == JUMP_LABEL_JMP) {
- expect = ideal_nop; line = __LINE__;
+ if (type == JUMP_LABEL_JMP) {
+ expect = x86_nops[5]; line = __LINE__;
} else {
expect = code; line = __LINE__;
}
@@ -53,7 +49,7 @@ __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type,
bug_at(addr, line);
if (type == JUMP_LABEL_NOP)
- code = ideal_nop;
+ code = x86_nops[5];
return code;
}
@@ -62,7 +58,7 @@ static inline void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
- const void *opcode = __jump_label_set_jump_code(entry, type, init);
+ const void *opcode = __jump_label_set_jump_code(entry, type);
/*
* As long as only a single processor is running and the code is still
@@ -113,7 +109,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
}
mutex_lock(&text_mutex);
- opcode = __jump_label_set_jump_code(entry, type, 0);
+ opcode = __jump_label_set_jump_code(entry, type);
text_poke_queue((void *)jump_entry_code(entry),
opcode, JUMP_LABEL_NOP_SIZE, NULL);
mutex_unlock(&text_mutex);
@@ -136,22 +132,6 @@ static enum {
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- /*
- * This function is called at boot up and when modules are
- * first loaded. Check if the default nop, the one that is
- * inserted at compile time, is the ideal nop. If it is, then
- * we do not need to update the nop, and we can leave it as is.
- * If it is not, then we need to update the nop to the ideal nop.
- */
- if (jlstate == JL_STATE_START) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
-
- if (memcmp(ideal_nop, default_nop, 5) != 0)
- jlstate = JL_STATE_UPDATE;
- else
- jlstate = JL_STATE_NO_UPDATE;
- }
if (jlstate == JL_STATE_UPDATE)
jump_label_transform(entry, type, 1);
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index df776cdca327..d3d65545cb8b 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -139,6 +139,8 @@ NOKPROBE_SYMBOL(synthesize_relcall);
int can_boost(struct insn *insn, void *addr)
{
kprobe_opcode_t opcode;
+ insn_byte_t prefix;
+ int i;
if (search_exception_tables((unsigned long)addr))
return 0; /* Page fault may occur on this address. */
@@ -151,35 +153,39 @@ int can_boost(struct insn *insn, void *addr)
if (insn->opcode.nbytes != 1)
return 0;
- /* Can't boost Address-size override prefix */
- if (unlikely(inat_is_address_size_prefix(insn->attr)))
- return 0;
+ for_each_insn_prefix(insn, i, prefix) {
+ insn_attr_t attr;
+
+ attr = inat_get_opcode_attribute(prefix);
+ /* Can't boost Address-size override prefix and CS override prefix */
+ if (prefix == 0x2e || inat_is_address_size_prefix(attr))
+ return 0;
+ }
opcode = insn->opcode.bytes[0];
- switch (opcode & 0xf0) {
- case 0x60:
- /* can't boost "bound" */
- return (opcode != 0x62);
- case 0x70:
- return 0; /* can't boost conditional jump */
- case 0x90:
- return opcode != 0x9a; /* can't boost call far */
- case 0xc0:
- /* can't boost software-interruptions */
- return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
- case 0xd0:
- /* can boost AA* and XLAT */
- return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
- case 0xe0:
- /* can boost in/out and absolute jmps */
- return ((opcode & 0x04) || opcode == 0xea);
- case 0xf0:
- /* clear and set flags are boostable */
- return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+ switch (opcode) {
+ case 0x62: /* bound */
+ case 0x70 ... 0x7f: /* Conditional jumps */
+ case 0x9a: /* Call far */
+ case 0xc0 ... 0xc1: /* Grp2 */
+ case 0xcc ... 0xce: /* software exceptions */
+ case 0xd0 ... 0xd3: /* Grp2 */
+ case 0xd6: /* (UD) */
+ case 0xd8 ... 0xdf: /* ESC */
+ case 0xe0 ... 0xe3: /* LOOP*, JCXZ */
+ case 0xe8 ... 0xe9: /* near Call, JMP */
+ case 0xeb: /* Short JMP */
+ case 0xf0 ... 0xf4: /* LOCK/REP, HLT */
+ case 0xf6 ... 0xf7: /* Grp3 */
+ case 0xfe: /* Grp4 */
+ /* ... are not boostable */
+ return 0;
+ case 0xff: /* Grp5 */
+ /* Only indirect jmp is boostable */
+ return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
default:
- /* CS override prefix and call are not boostable */
- return (opcode != 0x2e && opcode != 0x9a);
+ return 1;
}
}
@@ -229,7 +235,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
return 0UL;
if (faddr)
- memcpy(buf, ideal_nops[NOP_ATOMIC5], 5);
+ memcpy(buf, x86_nops[5], 5);
else
buf[0] = kp->opcode;
return (unsigned long)buf;
@@ -265,6 +271,8 @@ static int can_probe(unsigned long paddr)
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
+ int ret;
+
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
@@ -276,8 +284,10 @@ static int can_probe(unsigned long paddr)
__addr = recover_probed_instruction(buf, addr);
if (!__addr)
return 0;
- kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
- insn_get_length(&insn);
+
+ ret = insn_decode_kernel(&insn, (void *)__addr);
+ if (ret < 0)
+ return 0;
/*
* Another debugging subsystem might insert this breakpoint.
@@ -301,8 +311,8 @@ static int can_probe(unsigned long paddr)
int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
{
kprobe_opcode_t buf[MAX_INSN_SIZE];
- unsigned long recovered_insn =
- recover_probed_instruction(buf, (unsigned long)src);
+ unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src);
+ int ret;
if (!recovered_insn || !insn)
return 0;
@@ -312,8 +322,9 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
MAX_INSN_SIZE))
return 0;
- kernel_insn_init(insn, dest, MAX_INSN_SIZE);
- insn_get_length(insn);
+ ret = insn_decode_kernel(insn, dest);
+ if (ret < 0)
+ return 0;
/* We can not probe force emulate prefixed instruction */
if (insn_has_emulate_prefix(insn))
@@ -357,13 +368,14 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
return insn->length;
}
-/* Prepare reljump right after instruction to boost */
-static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
- struct insn *insn)
+/* Prepare reljump or int3 right after instruction */
+static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p,
+ struct insn *insn)
{
int len = insn->length;
- if (can_boost(insn, p->addr) &&
+ if (!IS_ENABLED(CONFIG_PREEMPTION) &&
+ !p->post_handler && can_boost(insn, p->addr) &&
MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
/*
* These instructions can be executed directly if it
@@ -374,7 +386,12 @@ static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
len += JMP32_INSN_SIZE;
p->ainsn.boostable = 1;
} else {
- p->ainsn.boostable = 0;
+ /* Otherwise, put an int3 for trapping singlestep */
+ if (MAX_INSN_SIZE - len < INT3_INSN_SIZE)
+ return -ENOSPC;
+
+ buf[len] = INT3_INSN_OPCODE;
+ len += INT3_INSN_SIZE;
}
return len;
@@ -411,86 +428,290 @@ void free_insn_page(void *page)
module_memfree(page);
}
-static void set_resume_flags(struct kprobe *p, struct insn *insn)
+/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
+
+static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs)
+{
+ switch (p->ainsn.opcode) {
+ case 0xfa: /* cli */
+ regs->flags &= ~(X86_EFLAGS_IF);
+ break;
+ case 0xfb: /* sti */
+ regs->flags |= X86_EFLAGS_IF;
+ break;
+ case 0x9c: /* pushf */
+ int3_emulate_push(regs, regs->flags);
+ break;
+ case 0x9d: /* popf */
+ regs->flags = int3_emulate_pop(regs);
+ break;
+ }
+ regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+}
+NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers);
+
+static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs)
+{
+ int3_emulate_ret(regs);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_ret);
+
+static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+
+ func += p->ainsn.rel32;
+ int3_emulate_call(regs, func);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_call);
+
+static nokprobe_inline
+void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
+{
+ unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+
+ if (cond)
+ ip += p->ainsn.rel32;
+ int3_emulate_jmp(regs, ip);
+}
+
+static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
+{
+ __kprobe_emulate_jmp(p, regs, true);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jmp);
+
+static const unsigned long jcc_mask[6] = {
+ [0] = X86_EFLAGS_OF,
+ [1] = X86_EFLAGS_CF,
+ [2] = X86_EFLAGS_ZF,
+ [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
+ [4] = X86_EFLAGS_SF,
+ [5] = X86_EFLAGS_PF,
+};
+
+static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
+{
+ bool invert = p->ainsn.jcc.type & 1;
+ bool match;
+
+ if (p->ainsn.jcc.type < 0xc) {
+ match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
+ } else {
+ match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ if (p->ainsn.jcc.type >= 0xe)
+ match = match && (regs->flags & X86_EFLAGS_ZF);
+ }
+ __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jcc);
+
+static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
+{
+ bool match;
+
+ if (p->ainsn.loop.type != 3) { /* LOOP* */
+ if (p->ainsn.loop.asize == 32)
+ match = ((*(u32 *)&regs->cx)--) != 0;
+#ifdef CONFIG_X86_64
+ else if (p->ainsn.loop.asize == 64)
+ match = ((*(u64 *)&regs->cx)--) != 0;
+#endif
+ else
+ match = ((*(u16 *)&regs->cx)--) != 0;
+ } else { /* JCXZ */
+ if (p->ainsn.loop.asize == 32)
+ match = *(u32 *)(&regs->cx) == 0;
+#ifdef CONFIG_X86_64
+ else if (p->ainsn.loop.asize == 64)
+ match = *(u64 *)(&regs->cx) == 0;
+#endif
+ else
+ match = *(u16 *)(&regs->cx) == 0;
+ }
+
+ if (p->ainsn.loop.type == 0) /* LOOPNE */
+ match = match && !(regs->flags & X86_EFLAGS_ZF);
+ else if (p->ainsn.loop.type == 1) /* LOOPE */
+ match = match && (regs->flags & X86_EFLAGS_ZF);
+
+ __kprobe_emulate_jmp(p, regs, match);
+}
+NOKPROBE_SYMBOL(kprobe_emulate_loop);
+
+static const int addrmode_regoffs[] = {
+ offsetof(struct pt_regs, ax),
+ offsetof(struct pt_regs, cx),
+ offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, bx),
+ offsetof(struct pt_regs, sp),
+ offsetof(struct pt_regs, bp),
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, di),
+#ifdef CONFIG_X86_64
+ offsetof(struct pt_regs, r8),
+ offsetof(struct pt_regs, r9),
+ offsetof(struct pt_regs, r10),
+ offsetof(struct pt_regs, r11),
+ offsetof(struct pt_regs, r12),
+ offsetof(struct pt_regs, r13),
+ offsetof(struct pt_regs, r14),
+ offsetof(struct pt_regs, r15),
+#endif
+};
+
+static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
+
+ int3_emulate_call(regs, regs_get_register(regs, offs));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
+
+static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs)
+{
+ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
+
+ int3_emulate_jmp(regs, regs_get_register(regs, offs));
+}
+NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect);
+
+static int prepare_emulation(struct kprobe *p, struct insn *insn)
{
insn_byte_t opcode = insn->opcode.bytes[0];
switch (opcode) {
case 0xfa: /* cli */
case 0xfb: /* sti */
+ case 0x9c: /* pushfl */
case 0x9d: /* popf/popfd */
- /* Check whether the instruction modifies Interrupt Flag or not */
- p->ainsn.if_modifier = 1;
- break;
- case 0x9c: /* pushfl */
- p->ainsn.is_pushf = 1;
+ /*
+ * IF modifiers must be emulated since it will enable interrupt while
+ * int3 single stepping.
+ */
+ p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
+ p->ainsn.opcode = opcode;
break;
- case 0xcf: /* iret */
- p->ainsn.if_modifier = 1;
- fallthrough;
case 0xc2: /* ret/lret */
case 0xc3:
case 0xca:
case 0xcb:
- case 0xea: /* jmp absolute -- ip is correct */
- /* ip is already adjusted, no more changes required */
- p->ainsn.is_abs_ip = 1;
- /* Without resume jump, this is boostable */
- p->ainsn.boostable = 1;
+ p->ainsn.emulate_op = kprobe_emulate_ret;
break;
- case 0xe8: /* call relative - Fix return addr */
- p->ainsn.is_call = 1;
+ case 0x9a: /* far call absolute -- segment is not supported */
+ case 0xea: /* far jmp absolute -- segment is not supported */
+ case 0xcc: /* int3 */
+ case 0xcf: /* iret -- in-kernel IRET is not supported */
+ return -EOPNOTSUPP;
break;
-#ifdef CONFIG_X86_32
- case 0x9a: /* call absolute -- same as call absolute, indirect */
- p->ainsn.is_call = 1;
- p->ainsn.is_abs_ip = 1;
+ case 0xe8: /* near call relative */
+ p->ainsn.emulate_op = kprobe_emulate_call;
+ if (insn->immediate.nbytes == 2)
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+ else
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
break;
-#endif
- case 0xff:
+ case 0xeb: /* short jump relative */
+ case 0xe9: /* near jump relative */
+ p->ainsn.emulate_op = kprobe_emulate_jmp;
+ if (insn->immediate.nbytes == 1)
+ p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
+ else if (insn->immediate.nbytes == 2)
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+ else
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
+ break;
+ case 0x70 ... 0x7f:
+ /* 1 byte conditional jump */
+ p->ainsn.emulate_op = kprobe_emulate_jcc;
+ p->ainsn.jcc.type = opcode & 0xf;
+ p->ainsn.rel32 = *(char *)insn->immediate.bytes;
+ break;
+ case 0x0f:
opcode = insn->opcode.bytes[1];
+ if ((opcode & 0xf0) == 0x80) {
+ /* 2 bytes Conditional Jump */
+ p->ainsn.emulate_op = kprobe_emulate_jcc;
+ p->ainsn.jcc.type = opcode & 0xf;
+ if (insn->immediate.nbytes == 2)
+ p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
+ else
+ p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
+ } else if (opcode == 0x01 &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
+ X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) {
+ /* VM extensions - not supported */
+ return -EOPNOTSUPP;
+ }
+ break;
+ case 0xe0: /* Loop NZ */
+ case 0xe1: /* Loop */
+ case 0xe2: /* Loop */
+ case 0xe3: /* J*CXZ */
+ p->ainsn.emulate_op = kprobe_emulate_loop;
+ p->ainsn.loop.type = opcode & 0x3;
+ p->ainsn.loop.asize = insn->addr_bytes * 8;
+ p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
+ break;
+ case 0xff:
+ /*
+ * Since the 0xff is an extended group opcode, the instruction
+ * is determined by the MOD/RM byte.
+ */
+ opcode = insn->modrm.bytes[0];
if ((opcode & 0x30) == 0x10) {
- /*
- * call absolute, indirect
- * Fix return addr; ip is correct.
- * But this is not boostable
- */
- p->ainsn.is_call = 1;
- p->ainsn.is_abs_ip = 1;
+ if ((opcode & 0x8) == 0x8)
+ return -EOPNOTSUPP; /* far call */
+ /* call absolute, indirect */
+ p->ainsn.emulate_op = kprobe_emulate_call_indirect;
+ } else if ((opcode & 0x30) == 0x20) {
+ if ((opcode & 0x8) == 0x8)
+ return -EOPNOTSUPP; /* far jmp */
+ /* jmp near absolute indirect */
+ p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
+ } else
break;
- } else if (((opcode & 0x31) == 0x20) ||
- ((opcode & 0x31) == 0x21)) {
- /*
- * jmp near and far, absolute indirect
- * ip is correct.
- */
- p->ainsn.is_abs_ip = 1;
- /* Without resume jump, this is boostable */
- p->ainsn.boostable = 1;
- }
+
+ if (insn->addr_bytes != sizeof(unsigned long))
+ return -EOPNOTSUPP; /* Don't support differnt size */
+ if (X86_MODRM_MOD(opcode) != 3)
+ return -EOPNOTSUPP; /* TODO: support memory addressing */
+
+ p->ainsn.indirect.reg = X86_MODRM_RM(opcode);
+#ifdef CONFIG_X86_64
+ if (X86_REX_B(insn->rex_prefix.value))
+ p->ainsn.indirect.reg += 8;
+#endif
+ break;
+ default:
break;
}
+ p->ainsn.size = insn->length;
+
+ return 0;
}
static int arch_copy_kprobe(struct kprobe *p)
{
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- int len;
+ int ret, len;
/* Copy an instruction with recovering if other optprobe modifies it.*/
len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
if (!len)
return -EINVAL;
- /*
- * __copy_instruction can modify the displacement of the instruction,
- * but it doesn't affect boostable check.
- */
- len = prepare_boost(buf, p, &insn);
+ /* Analyze the opcode and setup emulate functions */
+ ret = prepare_emulation(p, &insn);
+ if (ret < 0)
+ return ret;
- /* Analyze the opcode and set resume flags */
- set_resume_flags(p, &insn);
+ /* Add int3 for single-step or booster jmp */
+ len = prepare_singlestep(buf, p, &insn);
+ if (len < 0)
+ return len;
/* Also, displacement change doesn't affect the first byte */
p->opcode = buf[0];
@@ -583,29 +804,7 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
{
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_flags = kcb->kprobe_old_flags
- = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- if (p->ainsn.if_modifier)
- kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
-}
-
-static nokprobe_inline void clear_btf(void)
-{
- if (test_thread_flag(TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl &= ~DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
- }
-}
-
-static nokprobe_inline void restore_btf(void)
-{
- if (test_thread_flag(TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl |= DEBUGCTLMSR_BTF;
- update_debugctlmsr(debugctl);
- }
+ = (regs->flags & X86_EFLAGS_IF);
}
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
@@ -620,6 +819,22 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ /* Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+}
+NOKPROBE_SYMBOL(kprobe_post_process);
+
static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, int reenter)
{
@@ -627,7 +842,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
return;
#if !defined(CONFIG_PREEMPTION)
- if (p->ainsn.boostable && !p->post_handler) {
+ if (p->ainsn.boostable) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
reset_current_kprobe();
@@ -646,19 +861,51 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_status = KPROBE_REENTER;
} else
kcb->kprobe_status = KPROBE_HIT_SS;
- /* Prepare real single stepping */
- clear_btf();
- regs->flags |= X86_EFLAGS_TF;
+
+ if (p->ainsn.emulate_op) {
+ p->ainsn.emulate_op(p, regs);
+ kprobe_post_process(p, regs, kcb);
+ return;
+ }
+
+ /* Disable interrupt, and set ip register on trampoline */
regs->flags &= ~X86_EFLAGS_IF;
- /* single step inline if the instruction is an int3 */
- if (p->opcode == INT3_INSN_OPCODE)
- regs->ip = (unsigned long)p->addr;
- else
- regs->ip = (unsigned long)p->ainsn.insn;
+ regs->ip = (unsigned long)p->ainsn.insn;
}
NOKPROBE_SYMBOL(setup_singlestep);
/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "int3"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again
+ * right after the copied instruction.
+ * Different from the trap single-step, "int3" single-step can not
+ * handle the instruction which changes the ip register, e.g. jmp,
+ * call, conditional jmp, and the instructions which changes the IF
+ * flags because interrupt must be disabled around the single-stepping.
+ * Such instructions are software emulated, but others are single-stepped
+ * using "int3".
+ *
+ * When the 2nd "int3" handled, the regs->ip and regs->flags needs to
+ * be adjusted, so that we can resume execution on correct code.
+ */
+static void resume_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+ unsigned long orig_ip = (unsigned long)p->addr;
+
+ /* Restore saved interrupt flag and ip register */
+ regs->flags |= kcb->kprobe_saved_flags;
+ /* Note that regs->ip is executed int3 so must be a step back */
+ regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
+}
+NOKPROBE_SYMBOL(resume_singlestep);
+
+/*
* We have reentered the kprobe_handler(), since another probe was hit while
* within the handler. We save the original kprobes variables and just single
* step on the instruction of the new probe without calling any user handlers.
@@ -693,6 +940,12 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
}
NOKPROBE_SYMBOL(reenter_kprobe);
+static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb)
+{
+ return (kcb->kprobe_status == KPROBE_HIT_SS ||
+ kcb->kprobe_status == KPROBE_REENTER);
+}
+
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -737,7 +990,18 @@ int kprobe_int3_handler(struct pt_regs *regs)
reset_current_kprobe();
return 1;
}
- } else if (*addr != INT3_INSN_OPCODE) {
+ } else if (kprobe_is_ss(kcb)) {
+ p = kprobe_running();
+ if ((unsigned long)p->ainsn.insn < regs->ip &&
+ (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) {
+ /* Most provably this is the second int3 for singlestep */
+ resume_singlestep(p, regs, kcb);
+ kprobe_post_process(p, regs, kcb);
+ return 1;
+ }
+ }
+
+ if (*addr != INT3_INSN_OPCODE) {
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed
@@ -810,91 +1074,6 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(trampoline_handler);
-/*
- * Called after single-stepping. p->addr is the address of the
- * instruction whose first byte has been replaced by the "int 3"
- * instruction. To avoid the SMP problems that can occur when we
- * temporarily put back the original opcode to single-step, we
- * single-stepped a copy of the instruction. The address of this
- * copy is p->ainsn.insn.
- *
- * This function prepares to return from the post-single-step
- * interrupt. We have to fix up the stack as follows:
- *
- * 0) Except in the case of absolute or indirect jump or call instructions,
- * the new ip is relative to the copied instruction. We need to make
- * it relative to the original instruction.
- *
- * 1) If the single-stepped instruction was pushfl, then the TF and IF
- * flags are set in the just-pushed flags, and may need to be cleared.
- *
- * 2) If the single-stepped instruction was a call, the return address
- * that is atop the stack is the address following the copied instruction.
- * We need to make it the address following the original instruction.
- */
-static void resume_execution(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- unsigned long *tos = stack_addr(regs);
- unsigned long copy_ip = (unsigned long)p->ainsn.insn;
- unsigned long orig_ip = (unsigned long)p->addr;
-
- regs->flags &= ~X86_EFLAGS_TF;
-
- /* Fixup the contents of top of stack */
- if (p->ainsn.is_pushf) {
- *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
- *tos |= kcb->kprobe_old_flags;
- } else if (p->ainsn.is_call) {
- *tos = orig_ip + (*tos - copy_ip);
- }
-
- if (!p->ainsn.is_abs_ip)
- regs->ip += orig_ip - copy_ip;
-
- restore_btf();
-}
-NOKPROBE_SYMBOL(resume_execution);
-
-/*
- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled throughout this function.
- */
-int kprobe_debug_handler(struct pt_regs *regs)
-{
- struct kprobe *cur = kprobe_running();
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- if (!cur)
- return 0;
-
- resume_execution(cur, regs, kcb);
- regs->flags |= kcb->kprobe_saved_flags;
-
- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- cur->post_handler(cur, regs, 0);
- }
-
- /* Restore back the original saved kprobes variables and continue. */
- if (kcb->kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe(kcb);
- goto out;
- }
- reset_current_kprobe();
-out:
- /*
- * if somebody else is singlestepping across a probe point, flags
- * will have TF set, in which case, continue the remaining processing
- * of do_debug, as if this is not a probe hit.
- */
- if (regs->flags & X86_EFLAGS_TF)
- return 0;
-
- return 1;
-}
-NOKPROBE_SYMBOL(kprobe_debug_handler);
-
int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
@@ -912,20 +1091,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* normal page fault.
*/
regs->ip = (unsigned long)cur->addr;
- /*
- * Trap flag (TF) has been set here because this fault
- * happened where the single stepping will be done.
- * So clear it by resetting the current kprobe:
- */
- regs->flags &= ~X86_EFLAGS_TF;
- /*
- * Since the single step (trap) has been cancelled,
- * we need to restore BTF here.
- */
- restore_btf();
/*
- * If the TF flag was set before the kprobe hit,
+ * If the IF flag was set before the kprobe hit,
* don't touch it:
*/
regs->flags |= kcb->kprobe_old_flags;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 08eb23074f92..71425ebba98a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -312,6 +312,8 @@ static int can_optimize(unsigned long paddr)
addr = paddr - offset;
while (addr < paddr - offset + size) { /* Decode until function end */
unsigned long recovered_insn;
+ int ret;
+
if (search_exception_tables(addr))
/*
* Since some fixup code will jumps into this function,
@@ -321,8 +323,11 @@ static int can_optimize(unsigned long paddr)
recovered_insn = recover_probed_instruction(buf, addr);
if (!recovered_insn)
return 0;
- kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
- insn_get_length(&insn);
+
+ ret = insn_decode_kernel(&insn, (void *)recovered_insn);
+ if (ret < 0)
+ return 0;
+
/*
* In the case of detecting unknown breakpoint, this could be
* a padding INT3 between functions. Let's check that all the
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 69757fac7462..72920af0b3c0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -822,7 +822,6 @@ void __init setup_arch(char **cmdline_p)
idt_setup_early_traps();
early_cpu_init();
- arch_init_ideal_nops();
jump_label_init();
static_call_init();
early_ioremap_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index fd945ce78554..0941d2f44f2a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -224,7 +224,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
per_cpu(cpu_number, cpu) = cpu;
setup_percpu_segment(cpu);
- setup_stack_canary_segment(cpu);
/*
* Copy data used in early init routines from the
* initial arrays to the per cpu data areas. These
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 26f5479a97a8..73873b007838 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -263,39 +263,54 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
}
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
{
char buffer[MAX_INSN_SIZE];
- enum es_result ret;
int res;
- if (user_mode(ctxt->regs)) {
- res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
- if (!res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
+ res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+ if (!res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
+ }
- if (!insn_decode(&ctxt->insn, ctxt->regs, buffer, res))
- return ES_DECODE_FAILED;
- } else {
- res = vc_fetch_insn_kernel(ctxt, buffer);
- if (res) {
- ctxt->fi.vector = X86_TRAP_PF;
- ctxt->fi.error_code = X86_PF_INSTR;
- ctxt->fi.cr2 = ctxt->regs->ip;
- return ES_EXCEPTION;
- }
+ if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+ return ES_DECODE_FAILED;
+
+ if (ctxt->insn.immediate.got)
+ return ES_OK;
+ else
+ return ES_DECODE_FAILED;
+}
- insn_init(&ctxt->insn, buffer, MAX_INSN_SIZE, 1);
- insn_get_length(&ctxt->insn);
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+ char buffer[MAX_INSN_SIZE];
+ int res, ret;
+
+ res = vc_fetch_insn_kernel(ctxt, buffer);
+ if (res) {
+ ctxt->fi.vector = X86_TRAP_PF;
+ ctxt->fi.error_code = X86_PF_INSTR;
+ ctxt->fi.cr2 = ctxt->regs->ip;
+ return ES_EXCEPTION;
}
- ret = ctxt->insn.immediate.got ? ES_OK : ES_DECODE_FAILED;
+ ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+ if (ret < 0)
+ return ES_DECODE_FAILED;
+ else
+ return ES_OK;
+}
- return ret;
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+ if (user_mode(ctxt->regs))
+ return __vc_decode_user_insn(ctxt);
+ else
+ return __vc_decode_kern_insn(ctxt);
}
static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 1e2050c4f94a..7ffb0cf3f997 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -458,29 +458,52 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_die_id == o->cpu_die_id)
+ return true;
+ return false;
+}
+
/*
- * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
+ * Unlike the other levels, we do not enforce keeping a
+ * multicore group inside a NUMA node. If this happens, we will
+ * discard the MC level of the topology later.
+ */
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ if (c->phys_proc_id == o->phys_proc_id)
+ return true;
+ return false;
+}
+
+/*
+ * Define intel_cod_cpu[] for Intel COD (Cluster-on-Die) CPUs.
*
- * These are Intel CPUs that enumerate an LLC that is shared by
- * multiple NUMA nodes. The LLC on these systems is shared for
- * off-package data access but private to the NUMA node (half
- * of the package) for on-package access.
+ * Any Intel CPU that has multiple nodes per package and does not
+ * match intel_cod_cpu[] has the SNC (Sub-NUMA Cluster) topology.
*
- * CPUID (the source of the information about the LLC) can only
- * enumerate the cache as being shared *or* unshared, but not
- * this particular configuration. The CPU in this case enumerates
- * the cache to be shared across the entire package (spanning both
- * NUMA nodes).
+ * When in SNC mode, these CPUs enumerate an LLC that is shared
+ * by multiple NUMA nodes. The LLC is shared for off-package data
+ * access but private to the NUMA node (half of the package) for
+ * on-package access. CPUID (the source of the information about
+ * the LLC) can only enumerate the cache as shared or unshared,
+ * but not this particular configuration.
*/
-static const struct x86_cpu_id snc_cpu[] = {
- X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
+static const struct x86_cpu_id intel_cod_cpu[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0), /* COD */
+ X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0), /* COD */
+ X86_MATCH_INTEL_FAM6_MODEL(ANY, 1), /* SNC */
{}
};
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
+ const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+ bool intel_snc = id && id->driver_data;
/* Do not match if we do not have a valid APICID for cpu: */
if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
@@ -495,32 +518,12 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
* means 'c' does not share the LLC of 'o'. This will be
* reflected to userspace.
*/
- if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
+ if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
return false;
return topology_sane(c, o, "llc");
}
-/*
- * Unlike the other levels, we do not enforce keeping a
- * multicore group inside a NUMA node. If this happens, we will
- * discard the MC level of the topology later.
- */
-static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
-{
- if (c->phys_proc_id == o->phys_proc_id)
- return true;
- return false;
-}
-
-static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
-{
- if ((c->phys_proc_id == o->phys_proc_id) &&
- (c->cpu_die_id == o->cpu_die_id))
- return true;
- return false;
-}
-
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
@@ -592,14 +595,23 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
+ if (match_pkg(c, o) && !topology_same_node(c, o))
+ x86_has_numa_in_package = true;
+
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(topology_sibling_cpumask, cpu, i);
if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(cpu_llc_shared_mask, cpu, i);
+ if ((i == cpu) || (has_mp && match_die(c, o)))
+ link_mask(topology_die_cpumask, cpu, i);
}
+ threads = cpumask_weight(topology_sibling_cpumask(cpu));
+ if (threads > __max_smt_threads)
+ __max_smt_threads = threads;
+
/*
* This needs a separate iteration over the cpus because we rely on all
* topology_sibling_cpumask links to be set-up.
@@ -613,8 +625,7 @@ void set_cpu_sibling_map(int cpu)
/*
* Does this new cpu bringup a new core?
*/
- if (cpumask_weight(
- topology_sibling_cpumask(cpu)) == 1) {
+ if (threads == 1) {
/*
* for each core in package, increment
* the booted_cores for this new cpu
@@ -631,16 +642,7 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
- if (match_pkg(c, o) && !topology_same_node(c, o))
- x86_has_numa_in_package = true;
-
- if ((i == cpu) || (has_mp && match_die(c, o)))
- link_mask(topology_die_cpumask, cpu, i);
}
-
- threads = cpumask_weight(topology_sibling_cpumask(cpu));
- if (threads > __max_smt_threads)
- __max_smt_threads = threads;
}
/* maps the cpu to the sched domain representing multi-core */
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 9442c4136c38..ea028e736831 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -34,7 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
break;
case NOP:
- code = ideal_nops[NOP_ATOMIC5];
+ code = x86_nops[5];
break;
case JMP:
@@ -66,7 +66,7 @@ static void __static_call_validate(void *insn, bool tail)
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
- !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
+ !memcmp(insn, x86_nops[5], 5) ||
!memcmp(insn, xor5rax, 5))
return;
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 64a496a0687f..3c883e064242 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -164,17 +164,11 @@ int do_set_thread_area(struct task_struct *p, int idx,
savesegment(fs, sel);
if (sel == modified_sel)
loadsegment(fs, sel);
-
- savesegment(gs, sel);
- if (sel == modified_sel)
- load_gs_index(sel);
#endif
-#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, sel);
if (sel == modified_sel)
- loadsegment(gs, sel);
-#endif
+ load_gs_index(sel);
} else {
#ifdef CONFIG_X86_64
if (p->thread.fsindex == modified_sel)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f577d07fbd43..853ea7a80806 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -498,14 +498,15 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
{
u8 insn_buf[MAX_INSN_SIZE];
struct insn insn;
+ int ret;
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip,
MAX_INSN_SIZE))
return GP_NO_HINT;
- kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
- insn_get_modrm(&insn);
- insn_get_sib(&insn);
+ ret = insn_decode_kernel(&insn, insn_buf);
+ if (ret < 0)
+ return GP_NO_HINT;
*addr = (unsigned long)insn_get_addr_ref(&insn, regs);
if (*addr == -1UL)
@@ -889,9 +890,6 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
dr6 &= ~DR_STEP;
- if (kprobe_debug_handler(regs))
- goto out;
-
/*
* The kernel doesn't use INT1
*/
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index fac1daae7994..8daa70b0d2da 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -356,7 +356,7 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (!nr_copied)
return false;
- if (!insn_decode(&insn, regs, buf, nr_copied))
+ if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
return false;
umip_inst = identify_insn(&insn);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index a2b413394917..b63cf8f7745e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -276,12 +276,12 @@ static bool is_prefix_bad(struct insn *insn)
static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64)
{
+ enum insn_mode m = x86_64 ? INSN_MODE_64 : INSN_MODE_32;
u32 volatile *good_insns;
+ int ret;
- insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
- /* has the side-effect of processing the entire instruction */
- insn_get_length(insn);
- if (!insn_complete(insn))
+ ret = insn_decode(insn, auprobe->insn, sizeof(auprobe->insn), m);
+ if (ret < 0)
return -ENOEXEC;
if (is_prefix_bad(insn))