diff options
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r-- | arch/x86_64/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/aperture.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 36 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 28 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/x8664_ksyms.c | 3 |
15 files changed, 95 insertions, 43 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index a098a11e7755..059c88313f4e 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - dmi_scan.o pci-dma.o pci-nommu.o + pci-dma.o pci-nommu.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -49,5 +49,3 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o -dmi_scan-y += ../../i386/kernel/dmi_scan.o - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index fffd6b0a2fab..70b9d21ed675 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) printk("Aperture from %s beyond 4GB. Ignoring.\n",name); return 0; } - if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) { + if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); return 0; } diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 293cd71a266a..62776c07cff1 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -80,7 +80,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) return 0; } -int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) +/* + * This function checks if any part of the range <start,end> is mapped + * with type. + */ +int __meminit +e820_any_mapped(unsigned long start, unsigned long end, unsigned type) { int i; for (i = 0; i < e820.nr_map; i++) { @@ -94,6 +99,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) return 0; } +/* + * This function checks if the entire range <start,end> is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +{ + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of <start,end> we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + /* * Find a free area in a specific range. */ diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 8538bfea30e6..c946e4fe67a7 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -180,6 +180,10 @@ rff_trace: * * XXX if we had a free scratch register we could save the RSP into the stack frame * and report it properly in ps. Unfortunately we haven't. + * + * When user can change the frames always force IRET. That is because + * it deals with uncanonical addresses better. SYSRET has trouble + * with them due to bugs in both AMD and Intel CPUs. */ ENTRY(system_call) @@ -254,7 +258,10 @@ sysret_signal: xorl %esi,%esi # oldset -> arg2 call ptregscall_common 1: movl $_TIF_NEED_RESCHED,%edi - jmp sysret_check + /* Use IRET because user could have changed frame. This + works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ + cli + jmp int_with_check badsys: movq $-ENOSYS,RAX-ARGOFFSET(%rsp) @@ -280,7 +287,8 @@ tracesys: call syscall_trace_leave RESTORE_TOP_OF_STACK %rbx RESTORE_REST - jmp ret_from_sys_call + /* Use IRET because user could have changed frame */ + jmp int_ret_from_sys_call CFI_ENDPROC /* @@ -408,25 +416,9 @@ ENTRY(stub_execve) CFI_ADJUST_CFA_OFFSET -8 CFI_REGISTER rip, r11 SAVE_REST - movq %r11, %r15 - CFI_REGISTER rip, r15 FIXUP_TOP_OF_STACK %r11 call sys_execve - GET_THREAD_INFO(%rcx) - bt $TIF_IA32,threadinfo_flags(%rcx) - CFI_REMEMBER_STATE - jc exec_32bit RESTORE_TOP_OF_STACK %r11 - movq %r15, %r11 - CFI_REGISTER rip, r11 - RESTORE_REST - pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rip, 0 - ret - -exec_32bit: - CFI_RESTORE_STATE movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index accbff3fec49..1eaa5dae6174 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -53,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * returns non-zero if opcode modifies the interrupt flag. */ -static inline int is_IF_modifier(kprobe_opcode_t *insn) +static __always_inline int is_IF_modifier(kprobe_opcode_t *insn) { switch (*insn) { case 0xfa: /* cli */ @@ -84,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) * If it does, return the address of the 32-bit displacement word. * If not, return null. */ -static inline s32 *is_riprel(u8 *insn) +static s32 __kprobes *is_riprel(u8 *insn) { #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -229,7 +229,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) mutex_unlock(&kprobe_mutex); } -static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -237,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; } -static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; @@ -245,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = p; diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 10b3e348fc99..6f0790e8b6d3 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -29,6 +29,8 @@ #define MISC_MCELOG_MINOR 227 #define NR_BANKS 6 +atomic_t mce_entry; + static int mce_dont_init; /* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, @@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) int i; int panicm_found = 0; + atomic_inc(&mce_entry); + if (regs) notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); if (!banks) - return; + goto out2; memset(&m, 0, sizeof(struct mce)); m.cpu = safe_smp_processor_id(); @@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) out: /* Last thing done in the machine check exception to clear state. */ wrmsrl(MSR_IA32_MCG_STATUS, 0); + out2: + atomic_dec(&mce_entry); } /* diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index d9e4067faf05..4e6357fe0ec3 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -34,6 +34,7 @@ #include <asm/proto.h> #include <asm/kdebug.h> #include <asm/local.h> +#include <asm/mce.h> /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) __get_cpu_var(nmi_touch) = 0; touched = 1; } +#ifdef CONFIG_X86_MCE + /* Could check oops_in_progress here too, but it's safer + not too */ + if (atomic_read(&mce_entry) > 0) + touched = 1; +#endif if (!touched && __get_cpu_var(last_irq_sum) == sum) { /* * Ayiee, looks like this CPU is stuck ... diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 03c9eeedb0f3..af035ede70cd 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -48,9 +48,11 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { struct page *page; int node; +#ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) node = pcibus_to_node(to_pci_dev(dev)->bus); else +#endif node = numa_node_id(); page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 70dd8e5c6889..fb903e65e079 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -575,8 +575,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + /* This must be here to ensure both math_state_restore() and - kernel_fpu_begin() work consistently. */ + kernel_fpu_begin() work consistently. + And the AMD workaround requires it to be after DS reload. */ unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); @@ -781,10 +783,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) } case ARCH_GET_GS: { unsigned long base; + unsigned gsindex; if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); - else if (doit) - rdmsrl(MSR_KERNEL_GS_BASE, base); + else if (doit) { + asm("movl %%gs,%0" : "=r" (gsindex)); + if (gsindex) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + base = task->thread.gs; + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index d44b2c1e63a6..da8e7903d817 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -274,11 +274,6 @@ static int putreg(struct task_struct *child, return -EIO; value &= 0xffff; break; - case offsetof(struct user_regs_struct, rip): - /* Check if the new RIP address is canonical */ - if (value >= TASK_SIZE_OF(child)) - return -EIO; - break; } put_stack_long(child, regno - sizeof(struct pt_regs), value); return 0; diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0856ad444f90..759070c82751 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -353,8 +353,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (fullarg(from, "enable_timer_pin_1")) disable_timer_pin_1 = -1; - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) + if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); disable_apic = 1; + } if (fullarg(from, "noapic")) skip_ioapic_setup = 1; @@ -928,6 +930,10 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); + r = get_model_name(c); if (!r) { switch (c->x86) { diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index ef8bc46dc140..7392570f975d 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -726,7 +726,7 @@ static __init int late_hpet_init(void) unsigned int ntimer; if (!vxtime.hpet_address) - return -1; + return 0; memset(&hd, 0, sizeof (hd)); @@ -917,6 +917,8 @@ void __init time_init(void) vxtime.hpet_address = 0; if (hpet_use_timer) { + /* set tick_nsec to use the proper rate for HPET */ + tick_nsec = TICK_NSEC_HPET; cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6bda322d3caf..2700b1375c1f 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/moduleparam.h> #include <linux/nmi.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -433,6 +434,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err) printk(KERN_ALERT "RIP "); printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); + if (kexec_should_crash(current)) + crash_kexec(regs); } void die(const char * str, struct pt_regs * regs, long err) @@ -455,6 +458,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) */ printk(str, safe_smp_processor_id()); show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 39ff0708f803..b81f473c4a19 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -65,7 +65,7 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index d78f46056bda..1def21c9f7cd 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -112,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memcpy #undef memset #undef memmove -#undef strlen extern void * memset(void *,int,__kernel_size_t); extern size_t strlen(const char *); @@ -121,8 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t); extern void * __memcpy(void *,const void *,__kernel_size_t); EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); |