diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
29 files changed, 431 insertions, 303 deletions
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 3e91d3ee26ec..238468ae1993 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -45,6 +45,6 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) ®s[CR_ECX], ®s[CR_EDX]); if (regs[cb->reg] & (1 << cb->bit)) - set_bit(cb->feature, c->x86_capability); + set_cpu_cap(c, cb->feature); } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1ff88c7f45cf..06fa159232fd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -63,6 +63,15 @@ static __cpuinit int amd_apic_timer_broken(void) int force_mwait __cpuinitdata; +void __cpuinit early_init_amd(struct cpuinfo_x86 *c) +{ + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } +} + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; @@ -85,6 +94,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* * FIXME: We should handle the K5 here. Set up the write * range and also turn on MSR 83 bits 4 and 31 (write alloc, @@ -257,12 +268,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; } - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - } - #ifdef CONFIG_X86_HT /* * On a AMD multi core setup the lower bits of the APIC id @@ -295,12 +300,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) local_apic_timer_disabled = 1; #endif - if (c->x86 == 0x10 && !force_mwait) - clear_bit(X86_FEATURE_MWAIT, c->x86_capability); - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_bit(X86_FEATURE_MCE, c->x86_capability); + + if (cpu_has_xmm) + set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 205fd5ba57f7..9b95edcfc6ae 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,7 @@ #include <linux/utsname.h> #include <asm/bugs.h> #include <asm/processor.h> +#include <asm/processor-flags.h> #include <asm/i387.h> #include <asm/msr.h> #include <asm/paravirt.h> @@ -35,7 +36,7 @@ __setup("mca-pentium", mca_pentium); static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); + write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0()); return 1; } @@ -153,7 +154,7 @@ static void __init check_config(void) * If we configured ourselves for a TSC, we'd better have one! */ #ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) + if (!cpu_has_tsc) panic("Kernel compiled for Pentium+, requires TSC feature!"); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2fcf2051bdb..db28aa9e2f69 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,43 +22,48 @@ #include "cpu.h" DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, - [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, - [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, - [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, /* * Segments used for calling PnP BIOS have byte granularity. * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ - [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ - [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ - [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, - [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, - [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; + static int cachesize_override __cpuinitdata = -1; -static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; -static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; -extern int disable_pse; - static void __cpuinit default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ @@ -207,16 +212,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPUs to not use it... */ - disable_x86_fxsr = 1; - - /* - * ... and clear the bits early in the boot_cpu_data - * so that the bootup process doesn't try to do this - * either. - */ - clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); return 1; } __setup("nofxsr", x86_fxsr_setup); @@ -224,7 +221,7 @@ __setup("nofxsr", x86_fxsr_setup); static int __init x86_sep_setup(char * s) { - disable_x86_sep = 1; + setup_clear_cpu_cap(X86_FEATURE_SEP); return 1; } __setup("nosep", x86_sep_setup); @@ -281,6 +278,33 @@ void __init cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; } } +static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + int ebx; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (have_cpuid_p()) { + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + } + + } + +} /* Do minimum CPU detection early. Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. @@ -300,6 +324,17 @@ static void __init early_cpu_detect(void) cpu_detect(c); get_cpu_vendor(c, 1); + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + early_init_amd(c); + break; + case X86_VENDOR_INTEL: + early_init_intel(c); + break; + } + + early_get_cap(c); } static void __cpuinit generic_identify(struct cpuinfo_x86 * c) @@ -357,8 +392,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) init_scattered_cpuid_features(c); } - early_intel_workaround(c); - #ifdef CONFIG_X86_HT c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif @@ -392,7 +425,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -418,20 +451,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) generic_identify(c); - printk(KERN_DEBUG "CPU: After generic identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - - if (this_cpu->c_identify) { + if (this_cpu->c_identify) this_cpu->c_identify(c); - printk(KERN_DEBUG "CPU: After vendor identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - } - /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -453,23 +475,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * we do "generic changes." */ - /* TSC disabled? */ - if ( tsc_disable ) - clear_bit(X86_FEATURE_TSC, c->x86_capability); - - /* FXSR disabled? */ - if (disable_x86_fxsr) { - clear_bit(X86_FEATURE_FXSR, c->x86_capability); - clear_bit(X86_FEATURE_XMM, c->x86_capability); - } - - /* SEP disabled? */ - if (disable_x86_sep) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - if (disable_pse) - clear_bit(X86_FEATURE_PSE, c->x86_capability); - /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { char *p; @@ -482,13 +487,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After all inits, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -501,8 +499,14 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } + /* Clear all flags overriden by options */ + for (i = 0; i < NCAPINTS; i++) + c->x86_capability[i] ^= cleared_cpu_caps[i]; + /* Init Machine Check Exception if available. */ mcheck_init(c); + + select_idle_routine(c); } void __init identify_boot_cpu(void) @@ -510,7 +514,6 @@ void __init identify_boot_cpu(void) identify_cpu(&boot_cpu_data); sysenter_setup(); enable_sep_cpu(); - mtrr_bp_init(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -567,6 +570,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } #endif +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -590,6 +600,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } +static __init int setup_disablecpuid(char *arg) +{ + int bit; + if (get_option(&arg, &bit) && bit < NCAPINTS*32) + setup_clear_cpu_cap(bit); + else + return 0; + return 1; +} +__setup("clearcpuid=", setup_disablecpuid); + cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) @@ -620,21 +641,13 @@ void __init early_cpu_init(void) nexgen_init_cpu(); umc_init_cpu(); early_cpu_detect(); - -#ifdef CONFIG_DEBUG_PAGEALLOC - /* pse is not compatible with on-the-fly unmapping, - * disable it even if the cpus claim to support it. - */ - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; -#endif } /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - regs->xfs = __KERNEL_PERCPU; + regs->fs = __KERNEL_PERCPU; return regs; } @@ -642,7 +655,7 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) * it's on the real one. */ void switch_to_new_gdt(void) { - struct Xgt_desc_struct gdt_descr; + struct desc_ptr gdt_descr; gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; @@ -672,12 +685,6 @@ void __cpuinit cpu_init(void) if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } load_idt(&idt_descr); switch_to_new_gdt(); @@ -691,7 +698,7 @@ void __cpuinit cpu_init(void) BUG(); enter_lazy_tlb(&init_mm, curr); - load_esp0(t, thread); + load_sp0(t, thread); set_tss_desc(cpu,t); load_TR_desc(); load_LDT(&init_mm.context); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2f6432cef6ff..ad6527a5beb1 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -24,5 +24,6 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); -extern void early_intel_workaround(struct cpuinfo_x86 *c); +extern void early_init_intel(struct cpuinfo_x86 *c); +extern void early_init_amd(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index fea0af0476b9..a962dcb9c408 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -67,7 +67,8 @@ struct acpi_cpufreq_data { unsigned int cpu_feature; }; -static struct acpi_cpufreq_data *drv_data[NR_CPUS]; +static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); + /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; @@ -218,14 +219,14 @@ static u32 get_cur_val(cpumask_t mask) if (unlikely(cpus_empty(mask))) return 0; - switch (drv_data[first_cpu(mask)]->cpu_feature) { + switch (per_cpu(drv_data, first_cpu(mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = drv_data[first_cpu(mask)]->acpi_data; + perf = per_cpu(drv_data, first_cpu(mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -325,7 +326,7 @@ static unsigned int get_measured_perf(unsigned int cpu) #endif - retval = drv_data[cpu]->max_freq * perf_percent / 100; + retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; put_cpu(); set_cpus_allowed(current, saved_mask); @@ -336,7 +337,7 @@ static unsigned int get_measured_perf(unsigned int cpu) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = drv_data[cpu]; + struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); unsigned int freq; dprintk("get_cur_freq_on_cpu (%d)\n", cpu); @@ -370,7 +371,7 @@ static unsigned int check_freqs(cpumask_t mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; cpumask_t online_policy_cpus; @@ -466,7 +467,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, static int acpi_cpufreq_verify(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); dprintk("acpi_cpufreq_verify\n"); @@ -570,7 +571,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; data->acpi_data = percpu_ptr(acpi_perf_data, cpu); - drv_data[cpu] = data; + per_cpu(drv_data, cpu) = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; @@ -714,20 +715,20 @@ err_unreg: acpi_processor_unregister_performance(perf, cpu); err_free: kfree(data); - drv_data[cpu] = NULL; + per_cpu(drv_data, cpu) = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); dprintk("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); - drv_data[policy->cpu] = NULL; + per_cpu(drv_data, policy->cpu) = NULL; acpi_processor_unregister_performance(data->acpi_data, policy->cpu); kfree(data); @@ -738,7 +739,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); dprintk("acpi_cpufreq_resume\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 749d00cb2ebd..06fcce516d51 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -694,7 +694,7 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, if ( acpi_bus_get_device(obj_handle, &d) ) { return 0; } - *return_value = (void *)acpi_driver_data(d); + *return_value = acpi_driver_data(d); return 1; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 99e1ef9939be..a0522735dd9d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -52,7 +52,7 @@ /* serialize freq changes */ static DEFINE_MUTEX(fidvid_mutex); -static struct powernow_k8_data *powernow_data[NR_CPUS]; +static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); static int cpu_family = CPU_OPTERON; @@ -1018,7 +1018,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { cpumask_t oldmask = CPU_MASK_ALL; - struct powernow_k8_data *data = powernow_data[pol->cpu]; + struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; unsigned int newstate; @@ -1094,7 +1094,7 @@ err_out: /* Driver entry point to verify the policy and range of frequencies */ static int powernowk8_verify(struct cpufreq_policy *pol) { - struct powernow_k8_data *data = powernow_data[pol->cpu]; + struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); if (!data) return -EINVAL; @@ -1202,7 +1202,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - powernow_data[pol->cpu] = data; + per_cpu(powernow_data, pol->cpu) = data; return 0; @@ -1216,7 +1216,7 @@ err_out: static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) { - struct powernow_k8_data *data = powernow_data[pol->cpu]; + struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); if (!data) return -EINVAL; @@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu) cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; + data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu))); if (!data) return -EINVAL; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 88d66fb8411d..404a6a2d4016 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -5,6 +5,7 @@ #include <asm/dma.h> #include <asm/io.h> #include <asm/processor-cyrix.h> +#include <asm/processor-flags.h> #include <asm/timer.h> #include <asm/pci-direct.h> #include <asm/tsc.h> @@ -126,15 +127,12 @@ static void __cpuinit set_cx86_reorder(void) static void __cpuinit set_cx86_memwb(void) { - u32 cr0; - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ - cr0 = 0x20000000; - write_cr0(read_cr0() | cr0); + write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cc8c501b9f39..d1c372b018db 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -11,6 +11,8 @@ #include <asm/pgtable.h> #include <asm/msr.h> #include <asm/uaccess.h> +#include <asm/ptrace.h> +#include <asm/ds.h> #include "cpu.h" @@ -27,13 +29,14 @@ struct movsl_mask movsl_mask __read_mostly; #endif -void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - if (c->x86_vendor != X86_VENDOR_INTEL) - return; /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } /* @@ -113,6 +116,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned int l2 = 0; char *p = NULL; + early_init_intel(c); + #ifdef CONFIG_X86_F00F_BUG /* * All current models of Pentium and Pentium with MMX technology CPUs @@ -132,7 +137,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } #endif - select_idle_routine(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9 ) { unsigned eax = cpuid_eax(10); @@ -201,16 +205,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } #endif + if (cpu_has_xmm2) + set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability); if (c->x86 == 15) { set_bit(X86_FEATURE_P4, c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); } if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); @@ -219,6 +220,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } + + if (cpu_has_bts) + ds_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) @@ -342,5 +346,22 @@ unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) EXPORT_SYMBOL(cmpxchg_386_u32); #endif +#ifndef CONFIG_X86_CMPXCHG64 +unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) +{ + u64 prev; + unsigned long flags; + + /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u64 *)ptr; + if (prev == old) + *(u64 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_486_u64); +#endif + // arch_initcall(intel_cpu_init); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9f530ff43c21..8b4507b8469b 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -733,10 +733,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - cache_kobject[cpu]->parent = &sys_dev->kobj; - kobject_set_name(cache_kobject[cpu], "%s", "cache"); - cache_kobject[cpu]->ktype = &ktype_percpu_entry; - retval = kobject_register(cache_kobject[cpu]); + retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, + &sys_dev->kobj, "%s", "cache"); if (retval < 0) { cpuid4_cache_sysfs_exit(cpu); return retval; @@ -746,23 +744,23 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object = INDEX_KOBJECT_PTR(cpu,i); this_object->cpu = cpu; this_object->index = i; - this_object->kobj.parent = cache_kobject[cpu]; - kobject_set_name(&(this_object->kobj), "index%1lu", i); - this_object->kobj.ktype = &ktype_cache; - retval = kobject_register(&(this_object->kobj)); + retval = kobject_init_and_add(&(this_object->kobj), + &ktype_cache, cache_kobject[cpu], + "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { - kobject_unregister( - &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); + kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); } - kobject_unregister(cache_kobject[cpu]); + kobject_put(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); break; } + kobject_uevent(&(this_object->kobj), KOBJ_ADD); } if (!retval) cpu_set(cpu, cache_dev_map); + kobject_uevent(cache_kobject[cpu], KOBJ_ADD); return retval; } @@ -778,8 +776,8 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) cpu_clear(cpu, cache_dev_map); for (i = 0; i < num_cache_leaves; i++) - kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - kobject_unregister(cache_kobject[cpu]); + kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + kobject_put(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index eef63e3630c2..e633c9c2b764 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -16,7 +16,7 @@ #include "mce.h" /* Machine Check Handler For AMD Athlon/Duron */ -static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) +static void k7_machine_check(struct pt_regs * regs, long error_code) { int recover=1; u32 alow, ahigh, high, low; @@ -27,29 +27,32 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) if (mcgstl & (1<<0)) /* Recoverable ? */ recover=0; - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); - for (i=1; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + for (i = 1; i < nr_mce_banks; i++) { + rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); if (high&(1<<31)) { + char misc[20]; + char addr[24]; + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); high &= ~(1<<31); if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); + rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); + snprintf(misc, 20, "[%08x%08x]", ahigh, alow); } if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); + rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + snprintf(addr, 24, " at %08x%08x", ahigh, alow); } - printk ("\n"); + printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", + smp_processor_id(), i, high, low, misc, addr); /* Clear it */ - wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); + wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); /* Serialize */ wmb(); add_taint(TAINT_MACHINE_CHECK); diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index 81fb6e2d35f3..ae9f628838f1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -8,7 +8,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c); void winchip_mcheck_init(struct cpuinfo_x86 *c); /* Call the installed machine check handler for this CPU setup. */ -extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); +extern void (*machine_check_vector)(struct pt_regs *, long error_code); extern int nr_mce_banks; diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 34c781eddee4..a5182dcd94ae 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -22,13 +22,13 @@ int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ /* Handle unconfigured int18 (should never happen) */ -static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) +static void unexpected_machine_check(struct pt_regs * regs, long error_code) { printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); } /* Call the installed machine check handler for this CPU setup. */ -void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ void mcheck_init(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b21d29fb5aa..9a699ed03598 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -63,7 +63,7 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); * separate MCEs from kernel messages to avoid bogus bug reports. */ -struct mce_log mcelog = { +static struct mce_log mcelog = { MCE_LOG_SIGNATURE, MCE_LOG_LEN, }; @@ -80,7 +80,7 @@ void mce_log(struct mce *mce) /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, &mcelog.flags); + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); return; } /* Old left over entry. Skip. */ @@ -110,12 +110,12 @@ static void print_mce(struct mce *m) KERN_EMERG "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->cpu, m->mcgstatus, m->bank, m->status); - if (m->rip) { + if (m->ip) { printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->rip); + m->cs, m->ip); if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->rip); + print_symbol("{%s}", m->ip); printk("\n"); } printk(KERN_EMERG "TSC %Lx ", m->tsc); @@ -156,16 +156,16 @@ static int mce_available(struct cpuinfo_x86 *c) static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) { if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { - m->rip = regs->rip; + m->ip = regs->ip; m->cs = regs->cs; } else { - m->rip = 0; + m->ip = 0; m->cs = 0; } if (rip_msr) { /* Assume the RIP in the MSR is exact. Is this true? */ m->mcgstatus |= MCG_STATUS_EIPV; - rdmsrl(rip_msr, m->rip); + rdmsrl(rip_msr, m->ip); m->cs = 0; } } @@ -192,10 +192,10 @@ void do_machine_check(struct pt_regs * regs, long error_code) atomic_inc(&mce_entry); - if (regs) - notify_die(DIE_NMI, "machine check", regs, error_code, 18, - SIGKILL); - if (!banks) + if ((regs + && notify_die(DIE_NMI, "machine check", regs, error_code, + 18, SIGKILL) == NOTIFY_STOP) + || !banks) goto out2; memset(&m, 0, sizeof(struct mce)); @@ -288,7 +288,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) * instruction which caused the MCE. */ if (m.mcgstatus & MCG_STATUS_EIPV) - user_space = panicm.rip && (panicm.cs & 3); + user_space = panicm.ip && (panicm.cs & 3); /* * If we know that the error was in user space, send a @@ -564,7 +564,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) { unsigned long *cpu_tsc; - static DECLARE_MUTEX(mce_read_sem); + static DEFINE_MUTEX(mce_read_mutex); unsigned next; char __user *buf = ubuf; int i, err; @@ -573,12 +573,12 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, if (!cpu_tsc) return -ENOMEM; - down(&mce_read_sem); + mutex_lock(&mce_read_mutex); next = rcu_dereference(mcelog.next); /* Only supports full reads right now */ if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { - up(&mce_read_sem); + mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); return -EINVAL; } @@ -621,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, memset(&mcelog.entry[i], 0, sizeof(struct mce)); } } - up(&mce_read_sem); + mutex_unlock(&mce_read_mutex); kfree(cpu_tsc); return err ? -EFAULT : buf - ubuf; } @@ -634,8 +634,7 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) return 0; } -static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, - unsigned long arg) +static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int __user *p = (int __user *)arg; @@ -664,7 +663,7 @@ static const struct file_operations mce_chrdev_ops = { .release = mce_release, .read = mce_read, .poll = mce_poll, - .ioctl = mce_ioctl, + .unlocked_ioctl = mce_ioctl, }; static struct miscdevice mce_log_device = { @@ -745,7 +744,7 @@ static void mce_restart(void) static struct sysdev_class mce_sysclass = { .resume = mce_resume, - set_kset_name("machinecheck"), + .name = "machinecheck", }; DEFINE_PER_CPU(struct sys_device, device_mce); @@ -855,8 +854,8 @@ static void mce_remove_device(unsigned int cpu) } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static int -mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -873,7 +872,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block mce_cpu_notifier = { +static struct notifier_block mce_cpu_notifier __cpuinitdata = { .notifier_call = mce_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 752fb16a817d..32671da8184e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -65,7 +65,7 @@ static struct threshold_block threshold_defaults = { }; struct threshold_bank { - struct kobject kobj; + struct kobject *kobj; struct threshold_block *blocks; cpumask_t cpus; }; @@ -118,6 +118,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) { unsigned int bank, block; unsigned int cpu = smp_processor_id(); + u8 lvt_off; u32 low = 0, high = 0, address = 0; for (bank = 0; bank < NR_BANKS; ++bank) { @@ -153,14 +154,13 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) if (shared_bank[bank] && c->cpu_core_id) break; #endif + lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, + APIC_EILVT_MSG_FIX, 0); + high &= ~MASK_LVTOFF_HI; - high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20; + high |= lvt_off << 20; wrmsr(address, low, high); - setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD, - THRESHOLD_APIC_VECTOR, - K8_APIC_EXT_INT_MSG_FIX, 0); - threshold_defaults.address = address; threshold_restart_bank(&threshold_defaults, 0, 0); } @@ -432,10 +432,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, else per_cpu(threshold_banks, cpu)[bank]->blocks = b; - kobject_set_name(&b->kobj, "misc%i", block); - b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; - b->kobj.ktype = &threshold_ktype; - err = kobject_register(&b->kobj); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, + per_cpu(threshold_banks, cpu)[bank]->kobj, + "misc%i", block); if (err) goto out_free; recurse: @@ -451,11 +450,14 @@ recurse: if (err) goto out_free; + if (b) + kobject_uevent(&b->kobj, KOBJ_ADD); + return err; out_free: if (b) { - kobject_unregister(&b->kobj); + kobject_put(&b->kobj); kfree(b); } return err; @@ -489,7 +491,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, - &b->kobj, name); + b->kobj, name); if (err) goto out; @@ -505,16 +507,15 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - kobject_set_name(&b->kobj, "threshold_bank%i", bank); - b->kobj.parent = &per_cpu(device_mce, cpu).kobj; + b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); + if (!b->kobj) + goto out_free; + #ifndef CONFIG_SMP b->cpus = CPU_MASK_ALL; #else b->cpus = per_cpu(cpu_core_map, cpu); #endif - err = kobject_register(&b->kobj); - if (err) - goto out_free; per_cpu(threshold_banks, cpu)[bank] = b; @@ -531,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) continue; err = sysfs_create_link(&per_cpu(device_mce, i).kobj, - &b->kobj, name); + b->kobj, name); if (err) goto out; @@ -554,7 +555,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu) int err = 0; for (bank = 0; bank < NR_BANKS; ++bank) { - if (!(per_cpu(bank_map, cpu) & 1 << bank)) + if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; err = threshold_create_bank(cpu, bank); if (err) @@ -581,7 +582,7 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_unregister(&pos->kobj); + kobject_put(&pos->kobj); list_del(&pos->miscj); kfree(pos); } @@ -627,7 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: - kobject_unregister(&b->kobj); + kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } @@ -637,14 +638,14 @@ static void threshold_remove_device(unsigned int cpu) unsigned int bank; for (bank = 0; bank < NR_BANKS; ++bank) { - if (!(per_cpu(bank_map, cpu) & 1 << bank)) + if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); } } /* get notified when a cpu comes on/off */ -static int threshold_cpu_callback(struct notifier_block *nfb, +static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ @@ -669,7 +670,7 @@ static int threshold_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier = { +static struct notifier_block threshold_cpu_notifier __cpuinitdata = { .notifier_call = threshold_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index be4dabfee1f5..cb03345554a5 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -57,7 +57,7 @@ static void intel_thermal_interrupt(struct pt_regs *regs) /* Thermal interrupt handler for this CPU setup */ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; -fastcall void smp_thermal_interrupt(struct pt_regs *regs) +void smp_thermal_interrupt(struct pt_regs *regs) { irq_enter(); vendor_thermal_interrupt(regs); @@ -141,7 +141,7 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) rdmsr (MSR_IA32_MCG_EIP, r->eip, h); } -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +static void intel_machine_check(struct pt_regs * regs, long error_code) { int recover=1; u32 alow, ahigh, high, low; @@ -152,38 +152,41 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) if (mcgstl & (1<<0)) /* Recoverable ? */ recover=0; - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); if (mce_num_extended_msrs > 0) { struct intel_mce_extended_msrs dbg; intel_get_extended_msrs(&dbg); - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" + "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" + "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags, + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, dbg.esi, dbg.edi, dbg.ebp, dbg.esp); } - for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + for (i = 0; i < nr_mce_banks; i++) { + rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); if (high & (1<<31)) { + char misc[20]; + char addr[24]; + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); high &= ~(1<<31); if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); + rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); + snprintf(misc, 20, "[%08x%08x]", ahigh, alow); } if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); + rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + snprintf(addr, 24, " at %08x%08x", ahigh, alow); } - printk ("\n"); + printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", + smp_processor_id(), i, high, low, misc, addr); } } diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 94bc43d950cf..a18310aaae0c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -16,7 +16,7 @@ #include "mce.h" /* Machine check handler for Pentium class Intel */ -static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) +static void pentium_machine_check(struct pt_regs * regs, long error_code) { u32 loaddr, hi, lotype; rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index deeae42ce199..74342604d30e 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -16,7 +16,7 @@ #include "mce.h" /* Machine Check Handler For PII/PIII */ -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +static void intel_machine_check(struct pt_regs * regs, long error_code) { int recover=1; u32 alow, ahigh, high, low; @@ -27,27 +27,30 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) if (mcgstl & (1<<0)) /* Recoverable ? */ recover=0; - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); - for (i=0; i<nr_mce_banks; i++) { - rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); + for (i = 0; i < nr_mce_banks; i++) { + rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); if (high & (1<<31)) { + char misc[20]; + char addr[24]; + misc[0] = addr[0] = '\0'; if (high & (1<<29)) recover |= 1; if (high & (1<<25)) recover |= 2; - printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); high &= ~(1<<31); if (high & (1<<27)) { - rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ("[%08x%08x]", ahigh, alow); + rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); + snprintf(misc, 20, "[%08x%08x]", ahigh, alow); } if (high & (1<<26)) { - rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk (" at %08x%08x", ahigh, alow); + rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); + snprintf(addr, 24, " at %08x%08x", ahigh, alow); } - printk ("\n"); + printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", + smp_processor_id(), i, high, low, misc, addr); } } diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 9e424b6c293d..3d428d5afc52 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,7 +15,7 @@ #include "mce.h" /* Machine check handler for WinChip C6 */ -static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) +static void winchip_machine_check(struct pt_regs * regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK); diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 0949cdbf848a..ee2331b0e58f 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -53,8 +53,6 @@ static void amd_set_mtrr(unsigned int reg, unsigned long base, <base> The base address of the region. <size> The size of the region. If this is 0 the region is disabled. <type> The type of the region. - <do_safe> If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 9964be3de2b7..8e139c70f888 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -4,6 +4,7 @@ #include <asm/msr.h> #include <asm/io.h> #include <asm/processor-cyrix.h> +#include <asm/processor-flags.h> #include "mtrr.h" int arr3_protected; @@ -142,7 +143,7 @@ static void prepare_set(void) /* Disable and flush caches. Note that wbinvd flushes the TLBs as a side-effect */ - cr0 = read_cr0() | 0x40000000; + cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); wbinvd(); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 992f08dfbb6c..103d61a59b19 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -9,11 +9,12 @@ #include <asm/msr.h> #include <asm/system.h> #include <asm/cpufeature.h> +#include <asm/processor-flags.h> #include <asm/tlbflush.h> #include "mtrr.h" struct mtrr_state { - struct mtrr_var_range *var_ranges; + struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; mtrr_type fixed_ranges[NUM_FIXED_RANGES]; unsigned char enabled; unsigned char have_fixed; @@ -85,12 +86,6 @@ void __init get_mtrr_state(void) struct mtrr_var_range *vrs; unsigned lo, dummy; - if (!mtrr_state.var_ranges) { - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), - GFP_KERNEL); - if (!mtrr_state.var_ranges) - return; - } vrs = mtrr_state.var_ranges; rdmsr(MTRRcap_MSR, lo, dummy); @@ -188,7 +183,7 @@ static inline void k8_enable_fixed_iorrs(void) * \param changed pointer which indicates whether the MTRR needed to be changed * \param msrwords pointer to the MSR values which the MSR should have */ -static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) { unsigned lo, hi; @@ -200,7 +195,7 @@ static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) k8_enable_fixed_iorrs(); mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = TRUE; + *changed = true; } } @@ -260,7 +255,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, static int set_fixed_ranges(mtrr_type * frs) { unsigned long long *saved = (unsigned long long *) frs; - int changed = FALSE; + bool changed = false; int block=-1, range; while (fixed_range_blocks[++block].ranges) @@ -273,17 +268,17 @@ static int set_fixed_ranges(mtrr_type * frs) /* Set the MSR pair relating to a var range. Returns TRUE if changes are made */ -static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) { unsigned int lo, hi; - int changed = FALSE; + bool changed = false; rdmsr(MTRRphysBase_MSR(index), lo, hi); if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = TRUE; + changed = true; } rdmsr(MTRRphysMask_MSR(index), lo, hi); @@ -292,7 +287,7 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = TRUE; + changed = true; } return changed; } @@ -350,7 +345,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) spin_lock(&set_atomicity_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ - cr0 = read_cr0() | 0x40000000; /* set CD flag */ + cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); wbinvd(); @@ -417,8 +412,6 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, <base> The base address of the region. <size> The size of the region. If this is 0 the region is disabled. <type> The type of the region. - <do_safe> If TRUE, do the change safely. If FALSE, safety measures should - be done externally. [RETURNS] Nothing. */ { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index c7d8f1756745..91e150acb46c 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -11,10 +11,6 @@ #include <asm/mtrr.h> #include "mtrr.h" -/* RED-PEN: this is accessed without any locking */ -extern unsigned int *usage_table; - - #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) static const char *const mtrr_strings[MTRR_NUM_TYPES] = @@ -37,7 +33,7 @@ const char *mtrr_attrib_to_str(int x) static int mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, char increment, struct file *file, int page) + unsigned int type, bool increment, struct file *file, int page) { int reg, max; unsigned int *fcount = FILE_FCOUNT(file); @@ -55,7 +51,7 @@ mtrr_file_add(unsigned long base, unsigned long size, base >>= PAGE_SHIFT; size >>= PAGE_SHIFT; } - reg = mtrr_add_page(base, size, type, 1); + reg = mtrr_add_page(base, size, type, true); if (reg >= 0) ++fcount[reg]; return reg; @@ -141,7 +137,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) size >>= PAGE_SHIFT; err = mtrr_add_page((unsigned long) base, (unsigned long) size, i, - 1); + true); if (err < 0) return err; return len; @@ -217,7 +213,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 0); break; case MTRRIOC_SET_ENTRY: @@ -226,7 +222,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + err = mtrr_add(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_ENTRY: #ifdef CONFIG_COMPAT @@ -270,7 +266,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + mtrr_file_add(sentry.base, sentry.size, sentry.type, true, file, 1); break; case MTRRIOC_SET_PAGE_ENTRY: @@ -279,7 +275,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) #endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; - err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + err = + mtrr_add_page(sentry.base, sentry.size, sentry.type, false); break; case MTRRIOC_DEL_PAGE_ENTRY: #ifdef CONFIG_COMPAT @@ -396,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) for (i = 0; i < max; i++) { mtrr_if->get(i, &base, &size, &type); if (size == 0) - usage_table[i] = 0; + mtrr_usage_table[i] = 0; else { if (size < (0x100000 >> PAGE_SHIFT)) { /* less than 1MB */ @@ -410,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) len += seq_printf(seq, "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), usage_table[i]); + mtrr_attrib_to_str(type), mtrr_usage_table[i]); } } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 3b20613325dc..715919582657 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -38,8 +38,8 @@ #include <linux/cpu.h> #include <linux/mutex.h> +#include <asm/e820.h> #include <asm/mtrr.h> - #include <asm/uaccess.h> #include <asm/processor.h> #include <asm/msr.h> @@ -47,7 +47,7 @@ u32 num_var_ranges = 0; -unsigned int *usage_table; +unsigned int mtrr_usage_table[MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -121,13 +121,8 @@ static void __init init_table(void) int i, max; max = num_var_ranges; - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) - == NULL) { - printk(KERN_ERR "mtrr: could not allocate\n"); - return; - } for (i = 0; i < max; i++) - usage_table[i] = 1; + mtrr_usage_table[i] = 1; } struct set_mtrr_data { @@ -311,7 +306,7 @@ static void set_mtrr(unsigned int reg, unsigned long base, */ int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, char increment) + unsigned int type, bool increment) { int i, replace, error; mtrr_type ltype; @@ -349,7 +344,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, replace = -1; /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); + get_online_cpus(); /* Search for existing MTRR */ mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { @@ -383,7 +378,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, goto out; } if (increment) - ++usage_table[i]; + ++mtrr_usage_table[i]; error = i; goto out; } @@ -391,13 +386,15 @@ int mtrr_add_page(unsigned long base, unsigned long size, i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { set_mtrr(i, base, size, type); - if (likely(replace < 0)) - usage_table[i] = 1; - else { - usage_table[i] = usage_table[replace] + !!increment; + if (likely(replace < 0)) { + mtrr_usage_table[i] = 1; + } else { + mtrr_usage_table[i] = mtrr_usage_table[replace]; + if (increment) + mtrr_usage_table[i]++; if (unlikely(replace != i)) { set_mtrr(replace, 0, 0, 0); - usage_table[replace] = 0; + mtrr_usage_table[replace] = 0; } } } else @@ -405,7 +402,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); return error; } @@ -460,7 +457,7 @@ static int mtrr_check(unsigned long base, unsigned long size) int mtrr_add(unsigned long base, unsigned long size, unsigned int type, - char increment) + bool increment) { if (mtrr_check(base, size)) return -EINVAL; @@ -495,7 +492,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); + get_online_cpus(); mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ @@ -527,16 +524,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); goto out; } - if (usage_table[reg] < 1) { + if (mtrr_usage_table[reg] < 1) { printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); goto out; } - if (--usage_table[reg] < 1) + if (--mtrr_usage_table[reg] < 1) set_mtrr(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); return error; } /** @@ -591,16 +588,11 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value * mtrr_state; +static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { int i; - int size = num_var_ranges * sizeof(struct mtrr_value); - - mtrr_state = kzalloc(size,GFP_ATOMIC); - if (!mtrr_state) - return -ENOMEM; for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, @@ -622,7 +614,6 @@ static int mtrr_restore(struct sys_device * sysdev) mtrr_state[i].lsize, mtrr_state[i].ltype); } - kfree(mtrr_state); return 0; } @@ -633,6 +624,112 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +static int disable_mtrr_trim; + +static int __init disable_mtrr_trim_setup(char *str) +{ + disable_mtrr_trim = 1; + return 0; +} +early_param("disable_mtrr_trim", disable_mtrr_trim_setup); + +/* + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB + * for memory >4GB. Check for that here. + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't + * apply to are wrong, but so far we don't know of any such case in the wild. + */ +#define Tom2Enabled (1U << 21) +#define Tom2ForceMemTypeWB (1U << 22) + +static __init int amd_special_default_mtrr(void) +{ + u32 l, h; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) + return 0; + /* In case some hypervisor doesn't pass SYSCFG through */ + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + return 0; + /* + * Memory between 4GB and top of mem is forced WB by this magic bit. + * Reserved before K8RevF, but should be zero there. + */ + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == + (Tom2Enabled | Tom2ForceMemTypeWB)) + return 1; + return 0; +} + +/** + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs + * + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain + * memory configurations. This routine checks that the highest MTRR matches + * the end of memory, to make sure the MTRRs having a write back type cover + * all of the memory the kernel is intending to use. If not, it'll trim any + * memory off the end by adjusting end_pfn, removing it from the kernel's + * allocation pools, warning the user with an obnoxious message. + */ +int __init mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + unsigned long i, base, size, highest_addr = 0, def, dummy; + mtrr_type type; + u64 trim_start, trim_size; + + /* + * Make sure we only trim uncachable memory on machines that + * support the Intel MTRR architecture: + */ + if (!is_cpu(INTEL) || disable_mtrr_trim) + return 0; + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (def != MTRR_TYPE_UNCACHABLE) + return 0; + + if (amd_special_default_mtrr()) + return 0; + + /* Find highest cached pfn */ + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_WRBACK) + continue; + base <<= PAGE_SHIFT; + size <<= PAGE_SHIFT; + if (highest_addr < base + size) + highest_addr = base + size; + } + + /* kvm/qemu doesn't have mtrr set right, don't trim them all */ + if (!highest_addr) { + printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); + WARN_ON(1); + return 0; + } + + if ((highest_addr >> PAGE_SHIFT) < end_pfn) { + printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" + " all of memory, losing %LdMB of RAM.\n", + (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20); + + WARN_ON(1); + + printk(KERN_INFO "update e820 for mtrr\n"); + trim_start = highest_addr; + trim_size = end_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + add_memory_region(trim_start, trim_size, E820_RESERVED); + update_e820(); + return 1; + } + + return 0; +} /** * mtrr_bp_init - initialize mtrrs on the boot CPU diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 289dfe6030e3..fb74a2c20814 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -2,10 +2,8 @@ * local mtrr defines. */ -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif +#include <linux/types.h> +#include <linux/stddef.h> #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff @@ -14,6 +12,7 @@ #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) #define NUM_FIXED_RANGES 88 +#define MAX_VAR_RANGES 256 #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -34,6 +33,8 @@ an 8 bit field: */ typedef u8 mtrr_type; +extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; + struct mtrr_ops { u32 vendor; u32 use_intel_if; diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 49e20c2afcdf..9f8ba923d1c9 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -4,6 +4,7 @@ #include <asm/mtrr.h> #include <asm/msr.h> #include <asm/processor-cyrix.h> +#include <asm/processor-flags.h> #include "mtrr.h" @@ -25,7 +26,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) /* Disable and flush caches. Note that wbinvd flushes the TLBs as a side-effect */ - cr0 = read_cr0() | 0x40000000; + cr0 = read_cr0() | X86_CR0_CD; wbinvd(); write_cr0(cr0); wbinvd(); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index c02541e6e653..9b838324b818 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -167,7 +167,6 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 3900e46d66db..028213260148 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -188,7 +188,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) static void c_stop(struct seq_file *m, void *v) { } -struct seq_operations cpuinfo_op = { +const struct seq_operations cpuinfo_op = { .start = c_start, .next = c_next, .stop = c_stop, |