diff options
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 263 |
1 files changed, 141 insertions, 122 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b1..f8869978bbb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,14 +21,14 @@ #include <asm/asm.h> #include <asm/numa.h> #include <asm/smp.h> -#ifdef CONFIG_X86_LOCAL_APIC -#include <asm/mpspec.h> +#include <asm/cpu.h> +#include <asm/cpumask.h> #include <asm/apic.h> -#include <mach_apic.h> -#include <asm/genapic.h> + +#ifdef CONFIG_X86_LOCAL_APIC +#include <asm/uv/uv.h> #endif -#include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/desc.h> @@ -37,6 +37,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/hypervisor.h> +#include <asm/stackprotector.h> #include "cpu.h" @@ -50,6 +51,15 @@ cpumask_var_t cpu_initialized_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* correctly size the local cpu masks */ +void __init setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #else /* CONFIG_X86_32 */ cpumask_t cpu_callin_map; @@ -62,23 +72,23 @@ cpumask_t cpu_sibling_setup_map; static struct cpu_dev *this_cpu __cpuinitdata; +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * The TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; #else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -110,9 +120,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + GDT_STACK_CANARY_INIT #endif +} }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); #ifdef CONFIG_X86_32 @@ -213,6 +224,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) #endif /* + * Some CPU features depend on higher CPUID levels, which may not always + * be available due to CPUID level capping or broken virtualization + * software. Add those features to this table to auto-disable them. + */ +struct cpuid_dependent_feature { + u32 feature; + u32 level; +}; +static const struct cpuid_dependent_feature __cpuinitconst +cpuid_dependent_features[] = { + { X86_FEATURE_MWAIT, 0x00000005 }, + { X86_FEATURE_DCA, 0x00000009 }, + { X86_FEATURE_XSAVE, 0x0000000d }, + { 0, 0 } +}; + +static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) +{ + const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { + /* + * Note: cpuid_level is set to -1 if unavailable, but + * extended_extended_level is set to 0 if unavailable + * and the legitimate extended levels are all negative + * when signed; hence the weird messing around with + * signs here... + */ + if (cpu_has(c, df->feature) && + ((s32)df->level < 0 ? + (u32)df->level > (u32)c->extended_cpuid_level : + (s32)df->level > (s32)c->cpuid_level)) { + clear_cpu_cap(c, df->feature); + if (warn) + printk(KERN_WARNING + "CPU: CPU feature %s disabled " + "due to lack of CPUID level 0x%x\n", + x86_cap_flags[df->feature], + df->level); + } + } +} + +/* * Naming convention should be: <Name> [(<Codename>)] * This table only is used unless init_<vendor>() below doesn't set it; * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used @@ -242,18 +296,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif + load_stack_canary_segment(); +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif + /* Reload the per-cpu base */ + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; @@ -383,11 +448,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -395,13 +456,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif } out: @@ -570,11 +626,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); - validate_pat_support(c); - #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif + filter_cpuid_features(c, false); } void __init early_cpu_init(void) @@ -637,7 +692,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; # endif @@ -684,7 +739,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif /* @@ -708,6 +763,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * we do "generic changes." */ + /* Filter out anything that depends on CPUID levels we don't have */ + filter_cpuid_features(c, true); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { char *p; @@ -877,54 +935,22 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); +DEFINE_PER_CPU(char *, irq_stack_ptr) = + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -void __cpuinit pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } +DEFINE_PER_CPU(unsigned int, irq_count) = -1; - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -957,16 +983,21 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else +#else /* x86_64 */ -/* Make sure %fs is initialized properly in idle threads */ +#ifdef CONFIG_CC_STACKPROTECTOR +DEFINE_PER_CPU(unsigned long, stack_canary); +#endif + +/* Make sure %fs and %gs are initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; + regs->gs = __KERNEL_STACK_CANARY; return regs; } -#endif +#endif /* x86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -982,15 +1013,14 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif me = current; @@ -1006,7 +1036,9 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); @@ -1017,25 +1049,20 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); - if (cpu != 0 && x2apic) + if (cpu != 0) enable_x2apic(); /* * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } @@ -1051,8 +1078,7 @@ void __cpuinit cpu_init(void) atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; - if (me->mm) - BUG(); + BUG_ON(me->mm); enter_lazy_tlb(&init_mm, me); load_sp0(t, ¤t->thread); @@ -1069,22 +1095,19 @@ void __cpuinit cpu_init(void) */ if (kgdb_connected && arch_kgdb_ops.correct_hw_break) arch_kgdb_ops.correct_hw_break(); - else { + else #endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ + { + /* + * Clear all 6 debug registers: + */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); } -#endif fpu_init(); @@ -1114,15 +1137,14 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); curr->active_mm = &init_mm; - if (curr->mm) - BUG(); + BUG_ON(curr->mm); enter_lazy_tlb(&init_mm, curr); load_sp0(t, thread); @@ -1135,9 +1157,6 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - /* Clear all 6 debug registers: */ set_debugreg(0, 0); set_debugreg(0, 1); |