diff options
Diffstat (limited to 'arch/i386')
40 files changed, 444 insertions, 212 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6004bb0795e0..968fabd8723f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -464,7 +464,6 @@ config NUMA depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) - select SPARSEMEM_STATIC # Need comments to help the hapless user trying to turn on NUMA support comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" @@ -493,6 +492,10 @@ config HAVE_ARCH_ALLOC_REMAP depends on NUMA default y +config ARCH_FLATMEM_ENABLE + def_bool y + depends on (ARCH_SELECT_MEMORY_MODEL && X86_PC) + config ARCH_DISCONTIGMEM_ENABLE def_bool y depends on NUMA @@ -503,7 +506,8 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on NUMA + depends on (NUMA || (X86_PC && EXPERIMENTAL)) + select SPARSEMEM_STATIC config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 53bbb3c008ee..79603b3471f9 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -39,6 +39,7 @@ config M386 - "Winchip-2" for IDT Winchip 2. - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). @@ -171,6 +172,11 @@ config MGEODEGX1 help Select this for a Geode GX1 (Cyrix MediaGX) chip. +config MGEODE_LX + bool "Geode GX/LX" + help + Select this for AMD Geode GX and LX processors. + config MCYRIXIII bool "CyrixIII/VIA-C3" help @@ -220,8 +226,8 @@ config X86_XADD config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC - default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 + default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK @@ -290,12 +296,12 @@ config X86_INTEL_USERCOPY config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX default y config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on MCYRIXIII || MK7 || MGEODE_LX default y config X86_OOSTORE diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index c48b424dd640..bf32ecc9ad04 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -42,6 +42,16 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + depends on DEBUG_KERNEL + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const + data. This option may have a slight performance impact because a + portion of the kernel code won't be covered by a 2MB TLB anymore. + If in doubt, say "N". + config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on DEBUG_KERNEL diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 496a2c9909fe..d8f94e78de8a 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -721,7 +721,7 @@ static int __init apic_set_verbosity(char *str) apic_verbosity = APIC_VERBOSE; else printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug", str); + " use apic=verbose or apic=debug\n", str); return 0; } diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 1e60acbed3c1..2d793d4aef1a 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -303,17 +303,6 @@ extern int (*console_blank_hook)(int); #include "apm.h" /* - * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is - * supposed to provide limit information that it recognizes. Many machines - * do this correctly, but many others do not restrict themselves to their - * claimed limit. When this happens, they will cause a segmentation - * violation in the kernel at boot time. Most BIOS's, however, will - * respect a 64k limit, so we use that. If you want to be pedantic and - * hold your BIOS to its claims, then undefine this. - */ -#define APM_RELAX_SEGMENTS - -/* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. * This patched by Chad Miller <cmiller@surfsouth.com>, original code by * David Chen <chen@ctpa04.mit.edu> @@ -1075,22 +1064,23 @@ static int apm_engage_power_management(u_short device, int enable) static int apm_console_blank(int blank) { - int error; - u_short state; + int error, i; + u_short state; + static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - /* Blank the first display device */ - error = set_power_state(0x100, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) { - /* try to blank them all instead */ - error = set_power_state(0x1ff, state); - if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) - /* try to blank device one instead */ - error = set_power_state(0x101, state); + + for (i = 0; i < ARRAY_SIZE(dev); i++) { + error = set_power_state(dev[i], state); + + if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) + return 1; + + if (error == APM_NOT_ENGAGED) + break; } - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - if (error == APM_NOT_ENGAGED) { + + if (error == APM_NOT_ENGAGED && state != APM_STATE_READY) { static int tried; int eng_error; if (tried++ == 0) { @@ -2233,8 +2223,8 @@ static struct dmi_system_id __initdata apm_dmi_table[] = { static int __init apm_init(void) { struct proc_dir_entry *apm_proc; + struct desc_struct *gdt; int ret; - int i; dmi_check_system(apm_dmi_table); @@ -2312,45 +2302,30 @@ static int __init apm_init(void) set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); + /* + * Set up the long jump entry point to the APM BIOS, which is called + * from inline assembly. + */ apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; - for (i = 0; i < NR_CPUS; i++) { - struct desc_struct *gdt = get_cpu_gdt_table(i); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); -#ifndef APM_RELAX_SEGMENTS - if (apm_info.bios.version == 0x100) { -#endif - /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); - /* For some unknown machine. */ - _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); - /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); -#ifndef APM_RELAX_SEGMENTS - } else { - _set_limit((char *)&gdt[APM_CS >> 3], - (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_CS_16 >> 3], - (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_DS >> 3], - (apm_info.bios.dseg_len - 1) & 0xffff); - /* workaround for broken BIOSes */ - if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); - if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ - /* for the BIOS that assumes granularity = 1 */ - gdt[APM_DS >> 3].b |= 0x800000; - printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); - } - } -#endif - } + /* + * The APM 1.1 BIOS is supposed to provide limit information that it + * recognizes. Many machines do this correctly, but many others do + * not restrict themselves to their claimed limit. When this happens, + * they will cause a segmentation violation in the kernel at boot time. + * Most BIOS's, however, will respect a 64k limit, so we use that. + * + * Note we only set APM segments on CPU zero, since we pin the APM + * code to that CPU. + */ + gdt = get_cpu_gdt_table(0); + set_base(gdt[APM_CS >> 3], + __va((unsigned long)apm_info.bios.cseg << 4)); + set_base(gdt[APM_CS_16 >> 3], + __va((unsigned long)apm_info.bios.cseg_16 << 4)); + set_base(gdt[APM_DS >> 3], + __va((unsigned long)apm_info.bios.dseg << 4)); apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); if (apm_proc) diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e344ef88cfcd..e7697e077f6b 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -161,8 +161,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); break; } - break; + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ /* Bit 15 of Athlon specific MSR 15, needs to be 0 diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 31e344b26bae..cca655688ffc 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -18,9 +18,6 @@ #include "cpu.h" -DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); - DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); @@ -599,11 +596,6 @@ void __devinit cpu_init(void) load_idt(&idt_descr); /* - * Delete NT - */ - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - - /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 04a405345203..2b62dee35c6c 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -177,9 +177,10 @@ static unsigned int nforce2_fsb_read(int bootfsb) */ static int nforce2_set_fsb(unsigned int fsb) { - u32 pll, temp = 0; + u32 temp = 0; unsigned int tfsb; int diff; + int pll = 0; if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 68a1fc87f4ca..0fbbd4c1072e 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -45,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.4" +#define VERSION "version 1.60.0" #include "powernow-k8.h" /* serialize freq changes */ @@ -216,10 +216,10 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) do { wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { @@ -336,7 +336,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid /* Phase 2 - core frequency transition */ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) { - u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid; + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", @@ -359,9 +359,11 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) : vcoreqfid - vcocurrfid; while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + if (reqfid > data->currfid) { if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + 2)) { + if (write_new_fid(data, data->currfid + fid_interval)) { return 1; } } else { @@ -371,7 +373,7 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) } } } else { - if (write_new_fid(data, data->currfid - 2)) + if (write_new_fid(data, data->currfid - fid_interval)) return 1; } @@ -464,7 +466,7 @@ static int check_supported_cpu(unsigned int cpu) set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); goto out; } @@ -474,7 +476,7 @@ static int check_supported_cpu(unsigned int cpu) eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_F)) { + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; } @@ -517,22 +519,24 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); return -ENODEV; } - if ((pst[j].fid > MAX_FID) - || (pst[j].fid & 1) - || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) { + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); return -EINVAL; } if (pst[j].fid < lastfid) lastfid = pst[j].fid; } if (lastfid & 1) { - printk(KERN_ERR PFX "lastfid invalid\n"); + printk(KERN_ERR BFX "lastfid invalid\n"); return -EINVAL; } if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO PFX "first fid not from lo freq table\n"); + printk(KERN_INFO BFX "first fid not from lo freq table\n"); return 0; } @@ -631,7 +635,7 @@ static int find_psb_table(struct powernow_k8_data *data) dprintk("table vers: 0x%x\n", psb->tableversion); if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_INFO BFX "PSB table is not v1.4\n"); + printk(KERN_ERR BFX "PSB table is not v1.4\n"); return -ENODEV; } @@ -689,7 +693,7 @@ static int find_psb_table(struct powernow_k8_data *data) * BIOS and Kernel Developer's Guide, which is available on * www.amd.com */ - printk(KERN_INFO PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); return -ENODEV; } @@ -912,7 +916,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -982,6 +986,9 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_t oldmask = CPU_MASK_ALL; int rc, i; + if (!cpu_online(pol->cpu)) + return -ENODEV; + if (!check_supported_cpu(pol->cpu)) return -ENODEV; @@ -1021,7 +1028,7 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); goto err_out; } @@ -1162,10 +1169,9 @@ static void __exit powernowk8_exit(void) cpufreq_unregister_driver(&cpufreq_amd64_driver); } -MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com."); +MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); MODULE_LICENSE("GPL"); late_initcall(powernowk8_init); module_exit(powernowk8_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index b1e85bb36396..d0de37d58e9a 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -42,7 +42,7 @@ struct powernow_k8_data { #define CPUID_XFAM 0x0ff00000 /* extended family */ #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_F 0x00040000 +#define CPUID_XMOD_REV_G 0x00060000 #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -86,13 +86,14 @@ struct powernow_k8_data { * low fid table * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry * in the low fid table - * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. * - lowest frequency must be >= interprocessor hypertransport link speed * (only applies to MP systems obviously) */ /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ @@ -106,7 +107,7 @@ struct powernow_k8_data { #define MIN_FREQ 800 /* Min and max freqs, per spec */ #define MAX_FREQ 5000 -#define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ #define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ #define VID_OFF 0x3f diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c index 5b7d18a06afa..b425cd3d1838 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c @@ -40,6 +40,7 @@ static struct pci_dev *speedstep_chipset_dev; */ static unsigned int speedstep_processor = 0; +static u32 pmbase; /* * There are only two frequency states for each processor. Values @@ -56,34 +57,47 @@ static struct cpufreq_frequency_table speedstep_freqs[] = { /** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * speedstep_find_register - read the PMBASE address * - * Tries to change the SpeedStep state. + * Returns: -ENODEV if no register could be found */ -static void speedstep_set_state (unsigned int state) +static int speedstep_find_register (void) { - u32 pmbase; - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (!speedstep_chipset_dev || (state > 0x1)) - return; + if (!speedstep_chipset_dev) + return -ENODEV; /* get PMBASE */ pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); if (!(pmbase & 0x01)) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } pmbase &= 0xFFFFFFFE; if (!pmbase) { printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return; + return -ENODEV; } + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + /* Disable IRQs */ local_irq_save(flags); @@ -315,10 +329,11 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) cpus_allowed = current->cpus_allowed; set_cpus_allowed(current, policy->cpus); - /* detect low and high frequency */ + /* detect low and high frequency and transition latency */ result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, &speedstep_set_state); set_cpus_allowed(current, cpus_allowed); if (result) @@ -335,7 +350,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->cur = speed; result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); @@ -400,6 +414,9 @@ static int __init speedstep_init(void) return -EINVAL; } + if (speedstep_find_register()) + return -ENODEV; + return cpufreq_register_driver(&speedstep_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index d368b3f5fce8..7c47005a1805 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -320,11 +320,13 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)) { unsigned int prev_speed; unsigned int ret = 0; unsigned long flags; + struct timeval tv1, tv2; if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) return -EINVAL; @@ -337,7 +339,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, return -EIO; dprintk("previous speed is %u\n", prev_speed); - + local_irq_save(flags); /* switch to low state */ @@ -350,8 +352,17 @@ unsigned int speedstep_get_freqs(unsigned int processor, dprintk("low speed is %u\n", *low_speed); + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + /* switch to high state */ set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + *high_speed = speedstep_get_processor_frequency(processor); if (!*high_speed) { ret = -EIO; @@ -369,6 +380,25 @@ unsigned int speedstep_get_freqs(unsigned int processor, if (*high_speed != prev_speed) set_state(SPEEDSTEP_LOW); + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + out: local_irq_restore(flags); return (ret); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index 261a2c9b7f6b..6a727fd3a77e 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -44,4 +44,5 @@ extern unsigned int speedstep_get_processor_frequency(unsigned int processor); extern unsigned int speedstep_get_freqs(unsigned int processor, unsigned int *low_speed, unsigned int *high_speed, + unsigned int *transition_latency, void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 2718fb6f6aba..28cc5d524afc 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -269,6 +269,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) result = speedstep_get_freqs(speedstep_processor, &speedstep_freqs[SPEEDSTEP_LOW].frequency, &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, &speedstep_set_state); if (result) { diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ff87cc22b323..75015975d038 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -343,6 +343,31 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) } /* + * Handle National Semiconductor branded processors + */ +static void __devinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC aquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected * by the fact that they preserve the flags across the division of 5/2. * PII and PPro exhibit this behavior too, but they have cpuid available. @@ -422,7 +447,7 @@ int __init cyrix_init_cpu(void) static struct cpu_dev nsc_cpu_dev __initdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, - .c_init = init_cyrix, + .c_init = init_nsc, .c_identify = generic_identify, }; diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index e7921315ae9d..6d91b274589c 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <asm/semaphore.h> #include <linux/seq_file.h> +#include <linux/cpufreq.h> /* * Get CPU information for use by the procfs. @@ -86,8 +87,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 13bae799e626..006141d1c12a 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -117,14 +117,13 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, { char __user *tmp = buf; u32 data[4]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); if (count % 16) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 16) { + for (; count; count -= 16) { do_cpuid(cpu, reg, data); if (copy_to_user(tmp, &data, 16)) return -EFAULT; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index e50b93155249..607c06007508 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -657,6 +657,7 @@ ENTRY(spurious_interrupt_bug) pushl $do_spurious_interrupt_bug jmp error_code +.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e437fb367498..5884469f6bfe 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -504,19 +504,24 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ - /* Segments used for calling PnP BIOS */ - .quad 0x00c09a0000000000 /* 0x90 32-bit code */ - .quad 0x00809a0000000000 /* 0x98 16-bit code */ - .quad 0x0080920000000000 /* 0xa0 16-bit data */ - .quad 0x0080920000000000 /* 0xa8 16-bit data */ - .quad 0x0080920000000000 /* 0xb0 16-bit data */ + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + .quad 0x00409a000000ffff /* 0x90 32-bit code */ + .quad 0x00009a000000ffff /* 0x98 16-bit code */ + .quad 0x000092000000ffff /* 0xa0 16-bit data */ + .quad 0x0000920000000000 /* 0xa8 16-bit data */ + .quad 0x0000920000000000 /* 0xb0 16-bit data */ + /* * The APM segments have byte granularity and their bases - * and limits are set at run time. + * are set at run time. All have 64k limits. */ - .quad 0x00409a0000000000 /* 0xb8 APM CS code */ - .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0xc8 APM DS data */ + .quad 0x00409a000000ffff /* 0xb8 APM CS code */ + .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004092000000ffff /* 0xc8 APM DS data */ .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ @@ -525,3 +530,5 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 180f070d03cb..3999bec50c33 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -3,8 +3,7 @@ #include <asm/checksum.h> #include <asm/desc.h> -/* This is definitely a GPL-only symbol */ -EXPORT_SYMBOL_GPL(cpu_gdt_table); +EXPORT_SYMBOL_GPL(cpu_gdt_descr); EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 22c8675c79f4..7554f8fd874a 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1722,8 +1722,8 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = 0; - + entry.dest.physical.physical_dest = + GET_APIC_ID(apic_read(APIC_ID)); /* * Add it to the IO-APIC irq-routing table: diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 32b0c24ab9a6..19edcd526ba4 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -191,7 +191,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index d7cede83ba2e..0102f3d50e57 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -38,6 +38,12 @@ int smp_found_config; unsigned int __initdata maxcpus = NR_CPUS; +#ifdef CONFIG_HOTPLUG_CPU +#define CPU_HOTPLUG_ENABLED (1) +#else +#define CPU_HOTPLUG_ENABLED (0) +#endif + /* * Various Linux-internal data structures created from the * MP-table. @@ -219,14 +225,18 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) cpu_set(num_processors, cpu_possible_map); num_processors++; - if ((num_processors > 8) && - ((APIC_XAPIC(ver) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) || - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))) - def_to_bigsmp = 1; - else - def_to_bigsmp = 0; - + if (CPU_HOTPLUG_ENABLED || (num_processors > 8)) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(ver)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 44470fea4309..1d0a55e68760 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -172,7 +172,6 @@ static ssize_t msr_read(struct file *file, char __user * buf, { u32 __user *tmp = (u32 __user *) buf; u32 data[2]; - size_t rv; u32 reg = *ppos; int cpu = iminor(file->f_dentry->d_inode); int err; @@ -180,7 +179,7 @@ static ssize_t msr_read(struct file *file, char __user * buf, if (count % 8) return -EINVAL; /* Invalid chunk size */ - for (rv = 0; count; count -= 8) { + for (; count; count -= 8) { err = do_rdmsr(cpu, reg, &data[0], &data[1]); if (err) return err; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index df6c2bcde067..45e7f0ac4b04 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -308,9 +308,7 @@ void show_regs(struct pt_regs * regs) cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); - if (current_cpu_data.x86 > 4) { - cr4 = read_cr4(); - } + cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); show_trace(NULL, ®s->esp); } @@ -404,17 +402,7 @@ void flush_thread(void) void release_thread(struct task_struct *dead_task) { - if (dead_task->mm) { - // temporary debugging check - if (dead_task->mm->context.size) { - printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", - dead_task->comm, - dead_task->mm->context.ldt, - dead_task->mm->context.size); - BUG(); - } - } - + BUG_ON(dead_task->mm); release_vm86_irqs(dead_task); } @@ -554,7 +542,9 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) struct pt_regs ptregs; ptregs = *(struct pt_regs *) - ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs)); + ((unsigned long)tsk->thread_info + + /* see comments in copy_thread() about -8 */ + THREAD_SIZE - sizeof(ptregs) - 8); ptregs.xcs &= 0xffff; ptregs.xds &= 0xffff; ptregs.xes &= 0xffff; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5ffbb4b7ad05..5c1fb6aada5b 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -32,9 +32,12 @@ * in exit.c or in signal.c. */ -/* determines which flags the user has access to. */ -/* 1 = access 0 = no access */ -#define FLAG_MASK 0x00044dd5 +/* + * Determines which flags the user has access to [1 = access, 0 = no access]. + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Also masks reserved bits (31-22, 15, 5, 3, 1). + */ +#define FLAG_MASK 0x00054dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 2afe0f8d555a..2fa5803a759d 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -111,12 +111,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, - { /* Handle problems with rebooting on HP nc6120 */ + { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, - .ident = "HP Compaq nc6120", + .ident = "HP Compaq Laptop", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6120"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, { } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index fdfcb0cba9b4..27c956db0461 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -954,6 +954,12 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) return 0; } +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, start, end); + return 0; +} /* * Find the highest page frame number we have available @@ -965,6 +971,7 @@ void __init find_max_pfn(void) max_pfn = 0; if (efi_enabled) { efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); return; } @@ -979,6 +986,7 @@ void __init find_max_pfn(void) continue; if (end > max_pfn) max_pfn = end; + memory_present(0, start, end); } } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d16520da4550..b3c2e2c26743 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -903,6 +903,12 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; + if (!cpu_gdt_descr[cpu].address && + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { + printk("Failed to allocate GDT for CPU %d\n", cpu); + return 1; + } + ++cpucount; /* @@ -1338,8 +1344,7 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; - /* We enable the timer again on the exit path of the death loop */ - disable_APIC_timer(); + clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ local_irq_enable(); mdelay(1); diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index 9b21a31d4f4e..f7ba4acc20ec 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -1,4 +1,3 @@ -.data ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index d395e3b42485..47675bbbb316 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -330,7 +330,9 @@ int recalibrate_cpu_khz(void) unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { + local_irq_disable(); init_cpu_khz(); + local_irq_enable(); cpu_data[0].loops_per_jiffy = cpufreq_scale(cpu_data[0].loops_per_jiffy, cpu_khz_old, diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index c34d1bfc5161..53ad954e3ba4 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -306,14 +306,17 @@ void die(const char * str, struct pt_regs * regs, long err) .lock_owner_depth = 0 }; static int die_counter; + unsigned long flags; if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irq(&die.lock); + spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } + else + local_save_flags(flags); if (++die.lock_owner_depth < 3) { int nl = 0; @@ -340,7 +343,7 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; - spin_unlock_irq(&die.lock); + spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current)) crash_kexec(regs); @@ -452,7 +455,7 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) @@ -650,13 +653,6 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) cpu = smp_processor_id(); -#ifdef CONFIG_HOTPLUG_CPU - if (!cpu_online(cpu)) { - nmi_exit(); - return; - } -#endif - ++nmi_count(cpu); if (!rcu_dereference(nmi_callback)(regs, cpu)) @@ -1082,9 +1078,9 @@ void __init trap_init(void) set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); - set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ + set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ set_system_gate(4,&overflow); - set_system_gate(5,&bounds); + set_trap_gate(5,&bounds); set_trap_gate(6,&invalid_op); set_trap_gate(7,&device_not_available); set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); @@ -1102,6 +1098,28 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); + if (cpu_has_fxsr) { + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generates a compile-time "error: zero width for bit-field" if + * the alignment is wrong. + */ + struct fxsrAlignAssert { + int _:!(offsetof(struct task_struct, + thread.i387.fxsave) & 15); + }; + + printk(KERN_INFO "Enabling fast FPU save and restore... "); + set_in_cr4(X86_CR4_OSFXSR); + printk("done.\n"); + } + if (cpu_has_xmm) { + printk(KERN_INFO "Enabling unmasked SIMD FPU exception " + "support... "); + set_in_cr4(X86_CR4_OSXMMEXCPT); + printk("done.\n"); + } + set_system_gate(SYSCALL_VECTOR,&system_call); /* diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 06e26f006238..7df494b51a5b 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -735,6 +735,30 @@ void free_initmem(void) printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10); } +#ifdef CONFIG_DEBUG_RODATA + +extern char __start_rodata, __end_rodata; +void mark_rodata_ro(void) +{ + unsigned long addr = (unsigned long)&__start_rodata; + + for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) + change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); + + printk ("Write protecting the kernel read-only data: %luk\n", + (unsigned long)(&__end_rodata - &__start_rodata) >> 10); + + /* + * change_page_attr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + + #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 5d09de8d1c6b..247fde76aaed 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -223,9 +223,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_nocache); +/** + * iounmap - Free a IO remapping + * @addr: virtual address from ioremap_* + * + * Caller must ensure there is only one unmapping for the same pointer. + */ void iounmap(volatile void __iomem *addr) { - struct vm_struct *p; + struct vm_struct *p, *o; if ((void __force *)addr <= high_memory) return; @@ -239,22 +245,37 @@ void iounmap(volatile void __iomem *addr) addr < phys_to_virt(ISA_END_ADDRESS)) return; - write_lock(&vmlist_lock); - p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); - if (!p) { - printk(KERN_WARNING "iounmap: bad address %p\n", addr); + addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); + + /* Use the vm area unlocked, assuming the caller + ensures there isn't another iounmap for the same address + in parallel. Reuse of the virtual address is prevented by + leaving it in the global lists until we're done with it. + cpa takes care of the direct mappings. */ + read_lock(&vmlist_lock); + for (p = vmlist; p; p = p->next) { + if (p->addr == addr) + break; + } + read_unlock(&vmlist_lock); + + if (!p) { + printk("iounmap: bad address %p\n", addr); dump_stack(); - goto out_unlock; + return; } + /* Reset the direct mapping. Can block */ if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { change_page_attr(virt_to_page(__va(p->phys_addr)), p->size >> PAGE_SHIFT, PAGE_KERNEL); global_flush_tlb(); } -out_unlock: - write_unlock(&vmlist_lock); + + /* Finally remove it */ + o = remove_vm_area((void *)addr); + BUG_ON(p != o || o == NULL); kfree(p); } EXPORT_SYMBOL(iounmap); diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index f600fc244f02..c30a16df6440 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -13,6 +13,7 @@ #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/pgalloc.h> +#include <asm/sections.h> static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); @@ -36,7 +37,8 @@ pte_t *lookup_address(unsigned long address) return pte_offset_kernel(pmd, address); } -static struct page *split_large_page(unsigned long address, pgprot_t prot) +static struct page *split_large_page(unsigned long address, pgprot_t prot, + pgprot_t ref_prot) { int i; unsigned long addr; @@ -54,7 +56,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot) pbase = (pte_t *)page_address(base); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : PAGE_KERNEL)); + addr == address ? prot : ref_prot)); } return base; } @@ -98,11 +100,18 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) */ static inline void revert_page(struct page *kpte_page, unsigned long address) { - pte_t *linear = (pte_t *) + pgprot_t ref_prot; + pte_t *linear; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; + + linear = (pte_t *) pmd_offset(pud_offset(pgd_offset_k(address), address), address); set_pmd_pte(linear, address, pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE)); + ref_prot)); } static int @@ -123,10 +132,16 @@ __change_page_attr(struct page *page, pgprot_t prot) if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_atomic(kpte, mk_pte(page, prot)); } else { - struct page *split = split_large_page(address, prot); + pgprot_t ref_prot; + struct page *split; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_EXEC : PAGE_KERNEL; + split = split_large_page(address, prot, ref_prot); if (!split) return -ENOMEM; - set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL)); + set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); kpte_page = split; } get_page(kpte_page); diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index ead6122dd06d..5461d4d5ea1e 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile @@ -1,7 +1,7 @@ obj-y := i386.o obj-$(CONFIG_PCI_BIOS) += pcbios.o -obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_PCI_DIRECT) += direct.o pci-y := fixup.o diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 94331d6be7a3..e3ac502bf2fb 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -13,7 +13,7 @@ #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) -static int pci_conf1_read(unsigned int seg, unsigned int bus, +int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; @@ -42,7 +42,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, return 0; } -static int pci_conf1_write(unsigned int seg, unsigned int bus, +int pci_conf1_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { unsigned long flags; diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 19e6f4871d1e..ee8e01697d96 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -846,7 +846,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) * reported by the device if possible. */ newirq = dev->irq; - if (!((1 << newirq) & mask)) { + if (newirq && !((1 << newirq) & mask)) { if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, pci_name(dev)); } diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index dfbf80cff834..4bb4d4b0f73a 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -19,21 +19,25 @@ /* The base address of the last MMCONFIG device accessed */ static u32 mmcfg_last_accessed_device; +static DECLARE_BITMAP(fallback_slots, 32); + /* * Functions for accessing PCI configuration space with MMCONFIG accesses */ -static u32 get_base_addr(unsigned int seg, int bus) +static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { int cfg_num = -1; struct acpi_table_mcfg_config *cfg; + if (seg == 0 && bus == 0 && + test_bit(PCI_SLOT(devfn), fallback_slots)) + return 0; + while (1) { ++cfg_num; if (cfg_num >= pci_mmcfg_config_num) { - /* something bad is going on, no cfg table is found. */ - /* so we fall back to the old way we used to do this */ - /* and just rely on the first entry to be correct. */ - return pci_mmcfg_config[0].base_address; + /* Not found - fallback to type 1 */ + return 0; } cfg = &pci_mmcfg_config[cfg_num]; if (cfg->pci_segment_group_number != seg) @@ -44,9 +48,9 @@ static u32 get_base_addr(unsigned int seg, int bus) } } -static inline void pci_exp_set_dev_base(unsigned int seg, int bus, int devfn) +static inline void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { - u32 dev_base = get_base_addr(seg, bus) | (bus << 20) | (devfn << 12); + u32 dev_base = base | (bus << 20) | (devfn << 12); if (dev_base != mmcfg_last_accessed_device) { mmcfg_last_accessed_device = dev_base; set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); @@ -57,13 +61,18 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; + u32 base; if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; + base = get_base_addr(seg, bus, devfn); + if (!base) + return pci_conf1_read(seg,bus,devfn,reg,len,value); + spin_lock_irqsave(&pci_config_lock, flags); - pci_exp_set_dev_base(seg, bus, devfn); + pci_exp_set_dev_base(base, bus, devfn); switch (len) { case 1: @@ -86,13 +95,18 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { unsigned long flags; + u32 base; if ((bus > 255) || (devfn > 255) || (reg > 4095)) return -EINVAL; + base = get_base_addr(seg, bus, devfn); + if (!base) + return pci_conf1_write(seg,bus,devfn,reg,len,value); + spin_lock_irqsave(&pci_config_lock, flags); - pci_exp_set_dev_base(seg, bus, devfn); + pci_exp_set_dev_base(base, bus, devfn); switch (len) { case 1: @@ -116,6 +130,37 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; +/* K8 systems have some devices (typically in the builtin northbridge) + that are only accessible using type1 + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM + and fallback for them. + We only do this for bus 0/seg 0 */ +static __init void unreachable_devices(void) +{ + int i; + unsigned long flags; + + for (i = 0; i < 32; i++) { + u32 val1; + u32 addr; + + pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + + /* Locking probably not needed, but safer */ + spin_lock_irqsave(&pci_config_lock, flags); + addr = get_base_addr(0, 0, PCI_DEVFN(i, 0)); + if (addr != 0) + pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0)); + if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1) + set_bit(i, fallback_slots); + spin_unlock_irqrestore(&pci_config_lock, flags); + } +} + static int __init pci_mmcfg_init(void) { if ((pci_probe & PCI_PROBE_MMCONF) == 0) @@ -131,6 +176,8 @@ static int __init pci_mmcfg_init(void) raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; + unreachable_devices(); + out: return 0; } diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index 127d53ad16be..f550781ec310 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -74,3 +74,10 @@ extern spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +extern int pci_conf1_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value); +extern int pci_conf1_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value); + + |