diff options
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/acpi/boot.c | 10 | ||||
-rw-r--r-- | arch/i386/kernel/acpi/processor.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/acpi/sleep.c | 19 | ||||
-rw-r--r-- | arch/i386/kernel/acpi/wakeup.S | 11 | ||||
-rw-r--r-- | arch/i386/kernel/apic.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/apm.c | 39 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 291 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 30 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 4 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 266 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/cyrix.c | 9 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/proc.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/i387.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/i8259.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/io_apic.c | 31 | ||||
-rw-r--r-- | arch/i386/kernel/irq.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/kprobes.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/microcode.c | 73 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 106 | ||||
-rw-r--r-- | arch/i386/kernel/srat.c | 19 | ||||
-rw-r--r-- | arch/i386/kernel/syscall_table.S | 1 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 11 |
23 files changed, 649 insertions, 293 deletions
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 40e5aba3ad3d..97ca17189af5 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -202,6 +202,8 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) if (mcfg->config[i].base_reserved) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config_num = 0; return -ENODEV; } } @@ -215,7 +217,7 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) { struct acpi_table_madt *madt = NULL; - if (!phys_addr || !size) + if (!phys_addr || !size || !cpu_has_apic) return -EINVAL; madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); @@ -621,9 +623,9 @@ extern u32 pmtmr_ioport; static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { - struct fadt_descriptor_rev2 *fadt = NULL; + struct fadt_descriptor *fadt = NULL; - fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size); + fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size); if (!fadt) { printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; @@ -754,7 +756,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) return -ENODEV; } - if (!cpu_has_apic) + if (!cpu_has_apic) return -ENODEV; /* diff --git a/arch/i386/kernel/acpi/processor.c b/arch/i386/kernel/acpi/processor.c index 9f4cc02717ec..b54fded49834 100644 --- a/arch/i386/kernel/acpi/processor.c +++ b/arch/i386/kernel/acpi/processor.c @@ -47,7 +47,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) buf[2] = ACPI_PDC_C_CAPABILITY_SMP; if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 1cb2b186a3af..4ee83577bf61 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -8,30 +8,17 @@ #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/dmi.h> +#include <linux/cpumask.h> + #include <asm/smp.h> -#include <asm/tlbflush.h> /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; unsigned long acpi_video_flags; extern char wakeup_start, wakeup_end; -extern void zap_low_mappings(void); - extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); -static void init_low_mapping(pgd_t * pgd, int pgd_limit) -{ - int pgd_ofs = 0; - - while ((pgd_ofs < pgd_limit) - && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { - set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD)); - pgd_ofs++, pgd++; - } - flush_tlb_all(); -} - /** * acpi_save_state_mem - save kernel state * @@ -42,7 +29,6 @@ int acpi_save_state_mem(void) { if (!acpi_wakeup_address) return 1; - init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); memcpy((void *)acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); @@ -55,7 +41,6 @@ int acpi_save_state_mem(void) */ void acpi_restore_state_mem(void) { - zap_low_mappings(); } /** diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index 7c74fe0dc93c..9f408eee4e6f 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -56,7 +56,7 @@ wakeup_code: 1: # set up page table - movl $swapper_pg_dir-__PAGE_OFFSET, %eax + movl $swsusp_pg_dir-__PAGE_OFFSET, %eax movl %eax, %cr3 testl $1, real_efer_save_restore - wakeup_code @@ -265,11 +265,6 @@ ENTRY(acpi_copy_wakeup_routine) movl $0x12345678, saved_magic ret -.data -ALIGN -ENTRY(saved_magic) .long 0 -ENTRY(saved_eip) .long 0 - save_registers: leal 4(%esp), %eax movl %eax, saved_context_esp @@ -304,7 +299,11 @@ ret_point: call restore_processor_state ret +.data ALIGN +ENTRY(saved_magic) .long 0 +ENTRY(saved_eip) .long 0 + # saved registers saved_gdt: .long 0,0 saved_idt: .long 0,0 diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 3d4b2f3d116a..5ab59c12335b 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -62,7 +62,7 @@ int apic_verbosity; static void apic_pm_activate(void); -int modern_apic(void) +static int modern_apic(void) { unsigned int lvr, version; /* AMD systems use old APIC versions, so check the CPU */ @@ -113,7 +113,7 @@ void __init apic_intr_init(void) } /* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer = 0; +int using_apic_timer __read_mostly = 0; static int enabled_via_apicbase; diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index df0e1745f189..9e819eb68229 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -374,14 +374,14 @@ static struct { unsigned short segment; } apm_bios_entry; static int clock_slowed; -static int idle_threshold = DEFAULT_IDLE_THRESHOLD; -static int idle_period = DEFAULT_IDLE_PERIOD; +static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; +static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; static int set_pm_idle; static int suspends_pending; static int standbys_pending; static int ignore_sys_suspend; static int ignore_normal_resume; -static int bounce_interval = DEFAULT_BOUNCE_INTERVAL; +static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; #ifdef CONFIG_APM_RTC_IS_GMT # define clock_cmos_diff 0 @@ -390,8 +390,8 @@ static int bounce_interval = DEFAULT_BOUNCE_INTERVAL; static long clock_cmos_diff; static int got_clock_diff; #endif -static int debug; -static int smp; +static int debug __read_mostly; +static int smp __read_mostly; static int apm_disabled = -1; #ifdef CONFIG_SMP static int power_off; @@ -403,8 +403,8 @@ static int realmode_power_off = 1; #else static int realmode_power_off; #endif -static int exit_kapmd; -static int kapmd_running; +static int exit_kapmd __read_mostly; +static int kapmd_running __read_mostly; #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else @@ -416,15 +416,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user * user_list; static DEFINE_SPINLOCK(user_list_lock); -static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; +static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; -static char driver_version[] = "1.16ac"; /* no spaces */ +static const char driver_version[] = "1.16ac"; /* no spaces */ /* * APM event names taken from the APM 1.2 specification. These are * the message codes that the BIOS uses to tell us about events */ -static char * apm_event_name[] = { +static const char * const apm_event_name[] = { "system standby", "system suspend", "normal resume", @@ -616,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, * @ecx_in: ECX register value for BIOS call * @eax: EAX register on return from the BIOS call * - * Make a BIOS call that does only returns one value, or just status. + * Make a BIOS call that returns one value only, or just status. * If there is an error, then the error code is returned in AH * (bits 8-15 of eax) and this function returns non-zero. This is * used for simpler BIOS operations. This call may hold interrupts @@ -822,7 +822,7 @@ static void apm_do_busy(void) #define IDLE_CALC_LIMIT (HZ * 100) #define IDLE_LEAKY_MAX 16 -static void (*original_pm_idle)(void); +static void (*original_pm_idle)(void) __read_mostly; /** * apm_cpu_idle - cpu idling for APM capable Linux @@ -1063,7 +1063,8 @@ static int apm_engage_power_management(u_short device, int enable) static int apm_console_blank(int blank) { - int error, i; + int error = APM_NOT_ENGAGED; /* silence gcc */ + int i; u_short state; static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; @@ -1104,7 +1105,8 @@ static int queue_empty(struct apm_user *as) static apm_event_t get_queued_event(struct apm_user *as) { - as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; + if (++as->event_tail >= APM_MAX_EVENTS) + as->event_tail = 0; return as->events[as->event_tail]; } @@ -1118,13 +1120,16 @@ static void queue_event(apm_event_t event, struct apm_user *sender) for (as = user_list; as != NULL; as = as->next) { if ((as == sender) || (!as->reader)) continue; - as->event_head = (as->event_head + 1) % APM_MAX_EVENTS; + if (++as->event_head >= APM_MAX_EVENTS) + as->event_head = 0; + if (as->event_head == as->event_tail) { static int notified; if (notified++ == 0) printk(KERN_ERR "apm: an event queue overflowed\n"); - as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; + if (++as->event_tail >= APM_MAX_EVENTS) + as->event_tail = 0; } as->events[as->event_head] = event; if ((!as->suser) || (!as->writer)) @@ -1282,7 +1287,7 @@ static void standby(void) static apm_event_t get_event(void) { int error; - apm_event_t event; + apm_event_t event = APM_NO_EVENTS; /* silence gcc */ apm_eventinfo_t info; static int notified; diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index a06a49075f10..44f2c5f2dda1 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -11,6 +11,8 @@ #include <asm/msr.h> #include <asm/io.h> #include <asm/mmu_context.h> +#include <asm/mtrr.h> +#include <asm/mce.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 1a7bdcef1926..05668e3598c0 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -48,12 +48,13 @@ MODULE_LICENSE("GPL"); struct cpufreq_acpi_io { - struct acpi_processor_performance acpi_data; + struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; unsigned int resume; }; static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; +static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; static struct cpufreq_driver acpi_cpufreq_driver; @@ -104,64 +105,43 @@ acpi_processor_set_performance ( { u16 port = 0; u8 bit_width = 0; - int ret; - u32 value = 0; int i = 0; - struct cpufreq_freqs cpufreq_freqs; - cpumask_t saved_mask; + int ret = 0; + u32 value = 0; int retval; + struct acpi_processor_performance *perf; dprintk("acpi_processor_set_performance\n"); - /* - * TBD: Use something other than set_cpus_allowed. - * As set_cpus_allowed is a bit racy, - * with any other set_cpus_allowed for this process. - */ - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - return (-EAGAIN); - } - - if (state == data->acpi_data.state) { + retval = 0; + perf = data->acpi_data; + if (state == perf->state) { if (unlikely(data->resume)) { dprintk("Called after resume, resetting to P%d\n", state); data->resume = 0; } else { dprintk("Already at target state (P%d)\n", state); - retval = 0; - goto migrate_end; + return (retval); } } - dprintk("Transitioning from P%d to P%d\n", - data->acpi_data.state, state); - - /* cpufreq frequency struct */ - cpufreq_freqs.cpu = cpu; - cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; - cpufreq_freqs.new = data->freq_table[state].frequency; - - /* notify cpufreq */ - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + dprintk("Transitioning from P%d to P%d\n", perf->state, state); /* * First we write the target state's 'control' value to the * control_register. */ - port = data->acpi_data.control_register.address; - bit_width = data->acpi_data.control_register.bit_width; - value = (u32) data->acpi_data.states[state].control; + port = perf->control_register.address; + bit_width = perf->control_register.bit_width; + value = (u32) perf->states[state].control; dprintk("Writing 0x%08x to port 0x%04x\n", value, port); ret = acpi_processor_write_port(port, bit_width, value); if (ret) { dprintk("Invalid port width 0x%04x\n", bit_width); - retval = ret; - goto migrate_end; + return (ret); } /* @@ -177,48 +157,35 @@ acpi_processor_set_performance ( * before giving up. */ - port = data->acpi_data.status_register.address; - bit_width = data->acpi_data.status_register.bit_width; + port = perf->status_register.address; + bit_width = perf->status_register.bit_width; dprintk("Looking for 0x%08x from port 0x%04x\n", - (u32) data->acpi_data.states[state].status, port); + (u32) perf->states[state].status, port); - for (i=0; i<100; i++) { + for (i = 0; i < 100; i++) { ret = acpi_processor_read_port(port, bit_width, &value); if (ret) { dprintk("Invalid port width 0x%04x\n", bit_width); - retval = ret; - goto migrate_end; + return (ret); } - if (value == (u32) data->acpi_data.states[state].status) + if (value == (u32) perf->states[state].status) break; udelay(10); } } else { - value = (u32) data->acpi_data.states[state].status; + value = (u32) perf->states[state].status; } - /* notify cpufreq */ - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - - if (unlikely(value != (u32) data->acpi_data.states[state].status)) { - unsigned int tmp = cpufreq_freqs.new; - cpufreq_freqs.new = cpufreq_freqs.old; - cpufreq_freqs.old = tmp; - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + if (unlikely(value != (u32) perf->states[state].status)) { printk(KERN_WARNING "acpi-cpufreq: Transition failed\n"); retval = -ENODEV; - goto migrate_end; + return (retval); } dprintk("Transition successful after %d microseconds\n", i * 10); - data->acpi_data.state = state; - - retval = 0; -migrate_end: - set_cpus_allowed(current, saved_mask); + perf->state = state; return (retval); } @@ -230,8 +197,17 @@ acpi_cpufreq_target ( unsigned int relation) { struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + struct acpi_processor_performance *perf; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; + unsigned int cur_state = 0; unsigned int next_state = 0; unsigned int result = 0; + unsigned int j; + unsigned int tmp; dprintk("acpi_cpufreq_setpolicy\n"); @@ -240,11 +216,95 @@ acpi_cpufreq_target ( target_freq, relation, &next_state); - if (result) + if (unlikely(result)) return (result); - result = acpi_processor_set_performance (data, policy->cpu, next_state); + perf = data->acpi_data; + cur_state = perf->state; + freqs.old = data->freq_table[cur_state].frequency; + freqs.new = data->freq_table[next_state].frequency; + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* + * We need to call driver->target() on all or any CPU in + * policy->cpus, depending on policy->shared_type. + */ + saved_mask = current->cpus_allowed; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + result = -EAGAIN; + break; + } + + result = acpi_processor_set_performance (data, j, next_state); + if (result) { + result = -EAGAIN; + break; + } + + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + break; + + cpu_set(j, covered_cpus); + } + + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + if (unlikely(result)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to online_policy_cpus and + * acpi_processor_set_performance() has been called on + * coverd_cpus. Best effort undo.. + */ + + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + policy->cpu = j; + acpi_processor_set_performance (data, + j, + cur_state); + } + } + + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + } + + set_cpus_allowed(current, saved_mask); return (result); } @@ -270,30 +330,65 @@ acpi_cpufreq_guess_freq ( struct cpufreq_acpi_io *data, unsigned int cpu) { + struct acpi_processor_performance *perf = data->acpi_data; + if (cpu_khz) { /* search the closest match to cpu_khz */ unsigned int i; unsigned long freq; - unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000; + unsigned long freqn = perf->states[0].core_frequency * 1000; - for (i=0; i < (data->acpi_data.state_count - 1); i++) { + for (i = 0; i < (perf->state_count - 1); i++) { freq = freqn; - freqn = data->acpi_data.states[i+1].core_frequency * 1000; + freqn = perf->states[i+1].core_frequency * 1000; if ((2 * cpu_khz) > (freqn + freq)) { - data->acpi_data.state = i; + perf->state = i; return (freq); } } - data->acpi_data.state = data->acpi_data.state_count - 1; + perf->state = perf->state_count - 1; return (freqn); - } else + } else { /* assume CPU is at P0... */ - data->acpi_data.state = 0; - return data->acpi_data.states[0].core_frequency * 1000; - + perf->state = 0; + return perf->states[0].core_frequency * 1000; + } } +/* + * acpi_cpufreq_early_init - initialize ACPI P-States library + * + * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) + * in order to determine correct frequency and voltage pairings. We can + * do _PDC and _PSD and find out the processor dependency for the + * actual init that will happen later... + */ +static int acpi_cpufreq_early_init_acpi(void) +{ + struct acpi_processor_performance *data; + unsigned int i, j; + + dprintk("acpi_cpufreq_early_init\n"); + + for_each_cpu(i) { + data = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!data) { + for_each_cpu(j) { + kfree(acpi_perf_data[j]); + acpi_perf_data[j] = NULL; + } + return (-ENOMEM); + } + acpi_perf_data[i] = data; + } + + /* Do initialization in ACPI core */ + acpi_processor_preregister_performance(acpi_perf_data); + return 0; +} + static int acpi_cpufreq_cpu_init ( struct cpufreq_policy *policy) @@ -303,41 +398,51 @@ acpi_cpufreq_cpu_init ( struct cpufreq_acpi_io *data; unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); + if (!acpi_perf_data[cpu]) + return (-ENODEV); + data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) return (-ENOMEM); + data->acpi_data = acpi_perf_data[cpu]; acpi_io_data[cpu] = data; - result = acpi_processor_register_performance(&data->acpi_data, cpu); + result = acpi_processor_register_performance(data->acpi_data, cpu); if (result) goto err_free; + perf = data->acpi_data; + policy->cpus = perf->shared_cpu_map; + policy->shared_type = perf->shared_type; + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; } /* capability check */ - if (data->acpi_data.state_count <= 1) { + if (perf->state_count <= 1) { dprintk("No P-States\n"); result = -ENODEV; goto err_unreg; } - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { + + if ((perf->control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) || + (perf->status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { dprintk("Unsupported address space [%d, %d]\n", - (u32) (data->acpi_data.control_register.space_id), - (u32) (data->acpi_data.status_register.space_id)); + (u32) (perf->control_register.space_id), + (u32) (perf->status_register.space_id)); result = -ENODEV; goto err_unreg; } /* alloc freq_table */ - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL); + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (perf->state_count + 1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; @@ -345,9 +450,9 @@ acpi_cpufreq_cpu_init ( /* detect transition latency */ policy->cpuinfo.transition_latency = 0; - for (i=0; i<data->acpi_data.state_count; i++) { - if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000; + for (i=0; i<perf->state_count; i++) { + if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; @@ -355,11 +460,11 @@ acpi_cpufreq_cpu_init ( policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); /* table init */ - for (i=0; i<=data->acpi_data.state_count; i++) + for (i=0; i<=perf->state_count; i++) { data->freq_table[i].index = i; - if (i<data->acpi_data.state_count) - data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; + if (i<perf->state_count) + data->freq_table[i].frequency = perf->states[i].core_frequency * 1000; else data->freq_table[i].frequency = CPUFREQ_TABLE_END; } @@ -374,12 +479,12 @@ acpi_cpufreq_cpu_init ( printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n", cpu); - for (i = 0; i < data->acpi_data.state_count; i++) + for (i = 0; i < perf->state_count; i++) dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == data->acpi_data.state?'*':' '), i, - (u32) data->acpi_data.states[i].core_frequency, - (u32) data->acpi_data.states[i].power, - (u32) data->acpi_data.states[i].transition_latency); + (i == perf->state?'*':' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency); cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); @@ -394,7 +499,7 @@ acpi_cpufreq_cpu_init ( err_freqfree: kfree(data->freq_table); err_unreg: - acpi_processor_unregister_performance(&data->acpi_data, cpu); + acpi_processor_unregister_performance(perf, cpu); err_free: kfree(data); acpi_io_data[cpu] = NULL; @@ -415,7 +520,7 @@ acpi_cpufreq_cpu_exit ( if (data) { cpufreq_frequency_table_put_attr(policy->cpu); acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(&data->acpi_data, policy->cpu); + acpi_processor_unregister_performance(data->acpi_data, policy->cpu); kfree(data); } @@ -462,7 +567,10 @@ acpi_cpufreq_init (void) dprintk("acpi_cpufreq_init\n"); - result = cpufreq_register_driver(&acpi_cpufreq_driver); + result = acpi_cpufreq_early_init_acpi(); + + if (!result) + result = cpufreq_register_driver(&acpi_cpufreq_driver); return (result); } @@ -471,10 +579,15 @@ acpi_cpufreq_init (void) static void __exit acpi_cpufreq_exit (void) { + unsigned int i; dprintk("acpi_cpufreq_exit\n"); cpufreq_unregister_driver(&acpi_cpufreq_driver); + for_each_cpu(i) { + kfree(acpi_perf_data[i]); + acpi_perf_data[i] = NULL; + } return; } diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index b4277f58f40c..2d6491672559 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -120,7 +120,7 @@ static int pending_bit_stuck(void) { u32 lo, hi; - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) return 0; rdmsr(MSR_FIDVID_STATUS, lo, hi); @@ -136,7 +136,7 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 lo, hi; u32 i = 0; - if (cpu_family) { + if (cpu_family == CPU_HW_PSTATE) { rdmsr(MSR_PSTATE_STATUS, lo, hi); i = lo & HW_PSTATE_MASK; rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); @@ -598,7 +598,7 @@ static void print_basics(struct powernow_k8_data *data) int j; for (j = 0; j < data->numps; j++) { if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { - if (cpu_family) { + if (cpu_family == CPU_HW_PSTATE) { printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8, (data->powernow_table[j].index & 0xff0000) >> 16, data->powernow_table[j].frequency/1000); @@ -758,7 +758,7 @@ static int find_psb_table(struct powernow_k8_data *data) #ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { - if (!data->acpi_data.state_count || cpu_family) + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; @@ -801,7 +801,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) ret_val = fill_powernow_table_pstate(data, powernow_table); else ret_val = fill_powernow_table_fidvid(data, powernow_table); @@ -885,8 +885,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf u32 vid; if (data->exttype) { - fid = data->acpi_data.states[i].status & FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK; + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; } else { fid = data->acpi_data.states[i].control & FID_MASK; vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; @@ -1082,7 +1082,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) dprintk("targ: curr fid 0x%x, did 0x%x\n", data->currfid, data->currvid); else { @@ -1103,7 +1103,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi powernow_k8_acpi_pst_values(data, newstate); - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) ret = transition_frequency_pstate(data, newstate); else ret = transition_frequency_fidvid(data, newstate); @@ -1115,7 +1115,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi } mutex_unlock(&fidvid_mutex); - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); else pol->cur = find_khz_freq_from_fid(data->currfid); @@ -1163,7 +1163,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. */ - if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { + if (num_online_cpus() != 1) { printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); kfree(data); return -ENODEV; @@ -1197,14 +1197,14 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (query_current_values_with_pending_wait(data)) goto err_out; - if (!cpu_family) + if (cpu_family == CPU_OPTERON) fidvid_msr_init(); /* run on any CPU again */ set_cpus_allowed(current, oldmask); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); else pol->cpus = cpu_core_map[pol->cpu]; @@ -1215,7 +1215,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); else pol->cur = find_khz_freq_from_fid(data->currfid); @@ -1232,7 +1232,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - if (cpu_family) + if (cpu_family == CPU_HW_PSTATE) dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", data->currfid, data->currdid); else diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index bf8ad9e43da3..0fb2a3001ba5 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -169,7 +169,9 @@ struct powernow_k8_data { #define MVS_MASK 3 #define VST_MASK 0x7f #define VID_MASK 0x1f -#define FID_MASK 0x3f +#define FID_MASK 0x1f +#define EXT_VID_MASK 0x3f +#define EXT_FID_MASK 0x3f /* diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index ce54ff12c15d..31c3a5baaa7f 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -2,19 +2,15 @@ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium * M (part of the Centrino chipset). * + * Since the original Pentium M, most new Intel CPUs support Enhanced + * SpeedStep. + * * Despite the "SpeedStep" in the name, this is almost entirely unlike * traditional SpeedStep. * * Modelled on speedstep.c * * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> - * - * WARNING WARNING WARNING - * - * This driver manipulates the PERF_CTL MSR, which is only somewhat - * documented. While it seems to work on my laptop, it has not been - * tested anywhere else, and it may not work for you, do strange - * things or simply crash. */ #include <linux/kernel.h> @@ -36,7 +32,7 @@ #include <asm/cpufeature.h> #define PFX "speedstep-centrino: " -#define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>" +#define MAINTAINER "cpufreq@lists.linux.org.uk" #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) @@ -351,7 +347,36 @@ static unsigned int get_cur_freq(unsigned int cpu) #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI -static struct acpi_processor_performance p; +static struct acpi_processor_performance *acpi_perf_data[NR_CPUS]; + +/* + * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States + * library + * + * Before doing the actual init, we need to do _PSD related setup whenever + * supported by the BIOS. These are handled by this early_init routine. + */ +static int centrino_cpu_early_init_acpi(void) +{ + unsigned int i, j; + struct acpi_processor_performance *data; + + for_each_cpu(i) { + data = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!data) { + for_each_cpu(j) { + kfree(acpi_perf_data[j]); + acpi_perf_data[j] = NULL; + } + return (-ENOMEM); + } + acpi_perf_data[i] = data; + } + + acpi_processor_preregister_performance(acpi_perf_data); + return 0; +} /* * centrino_cpu_init_acpi - register with ACPI P-States library @@ -365,46 +390,51 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) unsigned long cur_freq; int result = 0, i; unsigned int cpu = policy->cpu; + struct acpi_processor_performance *p; + + p = acpi_perf_data[cpu]; /* register with ACPI core */ - if (acpi_processor_register_performance(&p, cpu)) { - dprintk("obtaining ACPI data failed\n"); + if (acpi_processor_register_performance(p, cpu)) { + dprintk(PFX "obtaining ACPI data failed\n"); return -EIO; } + policy->cpus = p->shared_cpu_map; + policy->shared_type = p->shared_type; /* verify the acpi_data */ - if (p.state_count <= 1) { + if (p->state_count <= 1) { dprintk("No P-States\n"); result = -ENODEV; goto err_unreg; } - if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - p.control_register.space_id, p.status_register.space_id); + p->control_register.space_id, p->status_register.space_id); result = -EIO; goto err_unreg; } - for (i=0; i<p.state_count; i++) { - if (p.states[i].control != p.states[i].status) { + for (i=0; i<p->state_count; i++) { + if (p->states[i].control != p->states[i].status) { dprintk("Different control (%llu) and status values (%llu)\n", - p.states[i].control, p.states[i].status); + p->states[i].control, p->states[i].status); result = -EINVAL; goto err_unreg; } - if (!p.states[i].core_frequency) { + if (!p->states[i].core_frequency) { dprintk("Zero core frequency for state %u\n", i); result = -EINVAL; goto err_unreg; } - if (p.states[i].core_frequency > p.states[0].core_frequency) { + if (p->states[i].core_frequency > p->states[0].core_frequency) { dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i, - p.states[i].core_frequency, p.states[0].core_frequency); - p.states[i].core_frequency = 0; + p->states[i].core_frequency, p->states[0].core_frequency); + p->states[i].core_frequency = 0; continue; } } @@ -416,26 +446,26 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } centrino_model[cpu]->model_name=NULL; - centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000; + centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000; centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) * - (p.state_count + 1), GFP_KERNEL); + (p->state_count + 1), GFP_KERNEL); if (!centrino_model[cpu]->op_points) { result = -ENOMEM; goto err_kfree; } - for (i=0; i<p.state_count; i++) { - centrino_model[cpu]->op_points[i].index = p.states[i].control; - centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000; + for (i=0; i<p->state_count; i++) { + centrino_model[cpu]->op_points[i].index = p->states[i].control; + centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000; dprintk("adding state %i with frequency %u and control value %04x\n", i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); } - centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END; + centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END; cur_freq = get_cur_freq(cpu); - for (i=0; i<p.state_count; i++) { - if (!p.states[i].core_frequency) { + for (i=0; i<p->state_count; i++) { + if (!p->states[i].core_frequency) { dprintk("skipping state %u\n", i); centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID; continue; @@ -451,7 +481,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } if (cur_freq == centrino_model[cpu]->op_points[i].frequency) - p.state = i; + p->state = i; } /* notify BIOS that we exist */ @@ -464,12 +494,13 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) err_kfree: kfree(centrino_model[cpu]); err_unreg: - acpi_processor_unregister_performance(&p, cpu); - dprintk("invalid ACPI data\n"); + acpi_processor_unregister_performance(p, cpu); + dprintk(PFX "invalid ACPI data\n"); return (result); } #else static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; } +static inline int centrino_cpu_early_init_acpi(void) { return 0; } #endif static int centrino_cpu_init(struct cpufreq_policy *policy) @@ -555,10 +586,15 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI if (!centrino_model[cpu]->model_name) { - dprintk("unregistering and freeing ACPI data\n"); - acpi_processor_unregister_performance(&p, cpu); - kfree(centrino_model[cpu]->op_points); - kfree(centrino_model[cpu]); + static struct acpi_processor_performance *p; + + if (acpi_perf_data[cpu]) { + p = acpi_perf_data[cpu]; + dprintk("unregistering and freeing ACPI data\n"); + acpi_processor_unregister_performance(p, cpu); + kfree(centrino_model[cpu]->op_points); + kfree(centrino_model[cpu]); + } } #endif @@ -592,63 +628,128 @@ static int centrino_target (struct cpufreq_policy *policy, unsigned int relation) { unsigned int newstate = 0; - unsigned int msr, oldmsr, h, cpu = policy->cpu; + unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; cpumask_t saved_mask; - int retval; + cpumask_t set_mask; + cpumask_t covered_cpus; + int retval = 0; + unsigned int j, k, first_cpu, tmp; - if (centrino_model[cpu] == NULL) + if (unlikely(centrino_model[cpu] == NULL)) return -ENODEV; - /* - * Support for SMP systems. - * Make sure we are running on the CPU that wants to change frequency - */ - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, policy->cpus); - if (!cpu_isset(smp_processor_id(), policy->cpus)) { - dprintk("couldn't limit to CPUs in this domain\n"); - return(-EAGAIN); + if (unlikely(cpufreq_frequency_table_target(policy, + centrino_model[cpu]->op_points, + target_freq, + relation, + &newstate))) { + return -EINVAL; } - if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq, - relation, &newstate)) { - retval = -EINVAL; - goto migrate_end; - } +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif - msr = centrino_model[cpu]->op_points[newstate].index; - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + saved_mask = current->cpus_allowed; + first_cpu = 1; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + retval = -EAGAIN; + if (first_cpu) { + /* We haven't started the transition yet. */ + goto migrate_end; + } + break; + } - if (msr == (oldmsr & 0xffff)) { - retval = 0; - dprintk("no change needed - msr was and needs to be %x\n", oldmsr); - goto migrate_end; - } + msr = centrino_model[cpu]->op_points[newstate].index; + + if (first_cpu) { + rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (msr == (oldmsr & 0xffff)) { + dprintk("no change needed - msr was and needs " + "to be %x\n", oldmsr); + retval = 0; + goto migrate_end; + } + + freqs.old = extract_clock(oldmsr, cpu, 0); + freqs.new = extract_clock(msr, cpu, 0); + + dprintk("target=%dkHz old=%d new=%d msr=%04x\n", + target_freq, freqs.old, freqs.new, msr); + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, + CPUFREQ_PRECHANGE); + } + + first_cpu = 0; + /* all but 16 LSB are reserved, treat them with care */ + oldmsr &= ~0xffff; + msr &= 0xffff; + oldmsr |= msr; + } - freqs.cpu = cpu; - freqs.old = extract_clock(oldmsr, cpu, 0); - freqs.new = extract_clock(msr, cpu, 0); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + break; - dprintk("target=%dkHz old=%d new=%d msr=%04x\n", - target_freq, freqs.old, freqs.new, msr); + cpu_set(j, covered_cpus); + } - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } - /* all but 16 LSB are "reserved", so treat them with - care */ - oldmsr &= ~0xffff; - msr &= 0xffff; - oldmsr |= msr; + if (unlikely(retval)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to policy->cpus and + * MSRs have already been written on coverd_cpus. + * Best effort undo.. + */ - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + set_cpus_allowed(current, cpumask_of_cpu(j)); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + } + } - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + } - retval = 0; migrate_end: set_cpus_allowed(current, saved_mask); - return (retval); + return 0; } static struct freq_attr* centrino_attr[] = { @@ -690,12 +791,25 @@ static int __init centrino_init(void) if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; + centrino_cpu_early_init_acpi(); + return cpufreq_register_driver(¢rino_driver); } static void __exit centrino_exit(void) { +#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI + unsigned int j; +#endif + cpufreq_unregister_driver(¢rino_driver); + +#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI + for_each_cpu(j) { + kfree(acpi_perf_data[j]); + acpi_perf_data[j] = NULL; + } +#endif } MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index 00f2e058797c..fc32c8028e24 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -184,7 +184,7 @@ static void __init geode_configure(void) #ifdef CONFIG_PCI -static struct pci_device_id cyrix_55x0[] = { +static struct pci_device_id __initdata cyrix_55x0[] = { { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, { }, @@ -272,14 +272,15 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; -#endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ - + + /* * The 5510/5520 companion chips have a funky PIT. */ if (pci_dev_present(cyrix_55x0)) pit_latch_buggy = 1; +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index f94cdb7aca50..a19fcb262dbb 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -52,7 +52,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index d75524758daf..c4351972d9af 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -25,7 +25,7 @@ #define HAVE_HWFP 1 #endif -static unsigned long mxcsr_feature_mask = 0xffffffff; +static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff; void mxcsr_feature_mask_init(void) { diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 323ef8ab3244..b7636b96e104 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -271,8 +271,8 @@ static int i8259A_shutdown(struct sys_device *dev) * the kernel initialization code can get it * out of. */ - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-1 */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ return 0; } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index d70f2ade5cde..a62df3e764c5 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -267,7 +267,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # include <linux/slab.h> /* kmalloc() */ # include <linux/timer.h> /* time_after() */ -# ifdef CONFIG_BALANCED_IRQ_DEBUG +#ifdef CONFIG_BALANCED_IRQ_DEBUG # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) # define Dprintk(x...) do { TDprintk(x); } while (0) # else @@ -275,10 +275,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif - #define IRQBALANCE_CHECK_ARCH -999 -static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; -static int physical_balance = 0; +#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) +#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) +#define BALANCED_IRQ_MORE_DELTA (HZ/10) +#define BALANCED_IRQ_LESS_DELTA (HZ) + +static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; +static int physical_balance __read_mostly; +static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; static struct irq_cpu_info { unsigned long * last_irq; @@ -297,12 +302,14 @@ static struct irq_cpu_info { #define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) +static cpumask_t balance_irq_affinity[NR_IRQS] = { + [0 ... NR_IRQS-1] = CPU_MASK_ALL +}; -static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; +void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) +{ + balance_irq_affinity[irq] = mask; +} static unsigned long move(int curr_cpu, cpumask_t allowed_mask, unsigned long now, int direction) @@ -340,7 +347,7 @@ static inline void balance_irq(int cpu, int irq) if (irqbalance_disabled) return; - cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); + cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { set_pending_irq(irq, cpumask_of_cpu(new_cpu)); @@ -529,7 +536,9 @@ tryanotherirq: } } - cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); + cpus_and(allowed_mask, + cpu_online_map, + balance_irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); cpus_and(tmp, target_cpu_mask, allowed_mask); diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index f3a9c78c4a24..248e922ee13a 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -42,8 +42,8 @@ union irq_ctx { u32 stack[THREAD_SIZE/sizeof(u32)]; }; -static union irq_ctx *hardirq_ctx[NR_CPUS]; -static union irq_ctx *softirq_ctx[NR_CPUS]; +static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; +static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif /* diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 38806f427849..395a9a6dff88 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -607,7 +607,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - if (args->regs && user_mode(args->regs)) + if (args->regs && user_mode_vm(args->regs)) return ret; switch (val) { diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index e7c138f66c5a..0a865889b2a9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -91,7 +91,10 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14" +static int verbose; +module_param(verbose, int, 0644); + +#define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ #define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ @@ -122,14 +125,15 @@ static unsigned int user_buffer_size; /* it's size */ typedef enum mc_error_code { MC_SUCCESS = 0, - MC_NOTFOUND = 1, - MC_MARKED = 2, - MC_ALLOCATED = 3, + MC_IGNORED = 1, + MC_NOTFOUND = 2, + MC_MARKED = 3, + MC_ALLOCATED = 4, } mc_error_code_t; static struct ucode_cpu_info { unsigned int sig; - unsigned int pf; + unsigned int pf, orig_pf; unsigned int rev; unsigned int cksum; mc_error_code_t err; @@ -164,6 +168,7 @@ static void collect_cpu_info (void *unused) rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); uci->pf = 1 << ((val[1] >> 18) & 7); } + uci->orig_pf = uci->pf; } wrmsr(MSR_IA32_UCODE_REV, 0, 0); @@ -197,21 +202,34 @@ static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_he pr_debug(" Checksum 0x%x\n", cksum); if (mc_header->rev < uci->rev) { - printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision" - " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); - goto out; + if (uci->err == MC_NOTFOUND) { + uci->err = MC_IGNORED; + uci->cksum = mc_header->rev; + } else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev) + uci->cksum = mc_header->rev; } else if (mc_header->rev == uci->rev) { - /* notify the caller of success on this cpu */ - uci->err = MC_SUCCESS; - goto out; + if (uci->err < MC_MARKED) { + /* notify the caller of success on this cpu */ + uci->err = MC_SUCCESS; + } + } else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) { + pr_debug("microcode: CPU%d found a matching microcode update with " + " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); + uci->cksum = cksum; + uci->pf = pf; /* keep the original mc pf for cksum calculation */ + uci->err = MC_MARKED; /* found the match */ + for_each_online_cpu(cpu_num) { + if (ucode_cpu_info + cpu_num != uci + && ucode_cpu_info[cpu_num].mc == uci->mc) { + uci->mc = NULL; + break; + } + } + if (uci->mc != NULL) { + vfree(uci->mc); + uci->mc = NULL; + } } - - pr_debug("microcode: CPU%d found a matching microcode update with " - " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); - uci->cksum = cksum; - uci->pf = pf; /* keep the original mc pf for cksum calculation */ - uci->err = MC_MARKED; /* found the match */ -out: return; } @@ -253,10 +271,8 @@ static int find_matching_ucodes (void) for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ - continue; - if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf)) + if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf)) mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum); } @@ -295,9 +311,8 @@ static int find_matching_ucodes (void) } for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ - continue; - if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) { + + if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) { mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum); } } @@ -368,6 +383,13 @@ static void do_update_one (void * unused) struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->mc == NULL) { + if (verbose) { + if (uci->err == MC_SUCCESS) + printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n", + cpu_num, uci->rev); + else + printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); + } return; } @@ -426,6 +448,9 @@ out_free: ucode_cpu_info[j].mc = NULL; } } + if (ucode_cpu_info[i].err == MC_IGNORED && verbose) + printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision" + " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev); } out: return error; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index dd6b0e3386ce..e6023970aa40 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -48,6 +48,7 @@ #include <linux/crash_dump.h> #include <linux/dmi.h> #include <linux/pfn.h> +#include <linux/suspend.h> #include <video/edid.h> @@ -1434,6 +1435,111 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif +#ifdef CONFIG_SOFTWARE_SUSPEND +static void __init mark_nosave_page_range(unsigned long start, unsigned long end) +{ + struct page *page; + while (start <= end) { + page = pfn_to_page(start); + SetPageNosave(page); + start++; + } +} + +static void __init e820_nosave_reserved_pages(void) +{ + int i; + unsigned long r_start = 0, r_end = 0; + + /* Assume e820 map is sorted */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start, end; + + start = PFN_DOWN(ei->addr); + end = PFN_UP(ei->addr + ei->size); + if (start >= end) + continue; + if (ei->type == E820_RESERVED) + continue; + r_end = start; + /* + * Highmem 'Reserved' pages are marked as reserved, swsusp + * will not save/restore them, so we ignore these pages here. + */ + if (r_end > max_low_pfn) + r_end = max_low_pfn; + if (r_end > r_start) + mark_nosave_page_range(r_start, r_end-1); + if (r_end >= max_low_pfn) + break; + r_start = end; + } +} + +static void __init e820_save_acpi_pages(void) +{ + int i; + + /* Assume e820 map is sorted */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start, end; + + start = ei->addr; + end = ei->addr + ei->size; + if (start >= end) + continue; + if (ei->type != E820_ACPI && ei->type != E820_NVS) + continue; + /* + * If the region is below max_low_pfn, it will be + * saved/restored by swsusp follow 'RAM' type. + */ + if (start < (max_low_pfn << PAGE_SHIFT)) + start = max_low_pfn << PAGE_SHIFT; + /* + * Highmem pages (ACPI NVS/Data) are reserved, but swsusp + * highmem save/restore will not save/restore them. We marked + * them as arch saveable pages here + */ + if (end > start) + swsusp_add_arch_pages(start, end); + } +} + +extern char __start_rodata, __end_rodata; +/* + * BIOS reserved region/hole - no save/restore + * ACPI NVS - save/restore + * ACPI Data - this is a little tricky, the mem could be used by OS after OS + * reads tables from the region, but anyway save/restore the memory hasn't any + * side effect and Linux runtime module load/unload might use it. + * kernel rodata - no save/restore (kernel rodata isn't changed) + */ +static int __init mark_nosave_pages(void) +{ + unsigned long pfn_start, pfn_end; + + /* FIXME: provide a version for efi BIOS */ + if (efi_enabled) + return 0; + /* BIOS reserved regions & holes */ + e820_nosave_reserved_pages(); + + /* kernel rodata */ + pfn_start = PFN_UP(virt_to_phys(&__start_rodata)); + pfn_end = PFN_DOWN(virt_to_phys(&__end_rodata)); + mark_nosave_page_range(pfn_start, pfn_end-1); + + /* record ACPI Data/NVS as saveable */ + e820_save_acpi_pages(); + + return 0; +} +core_initcall(mark_nosave_pages); +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 52b3ed5d2cb5..989c85255dbe 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -39,7 +39,6 @@ #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) -#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ @@ -213,19 +212,11 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ - -int pxm_to_node(int pxm) -{ - return pxm_to_nid_map[pxm]; -} - /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ p = start; @@ -235,10 +226,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); memset(zholes_size, 0, sizeof(zholes_size)); - /* -1 in these maps means not available */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); - num_memory_chunks = 0; while (p < end) { switch (*p) { @@ -278,9 +265,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { - nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } @@ -288,7 +273,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) - node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; + node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk("pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index af56987f69b0..dd63d4775398 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -316,3 +316,4 @@ ENTRY(sys_call_table) .long sys_sync_file_range .long sys_tee /* 315 */ .long sys_vmsplice + .long sys_move_pages diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0e498369f35e..dcc14477af1f 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -149,6 +149,12 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printed = print_addr_and_symbol(addr, log_lvl, printed); + /* + * break out of recursive entries (such as + * end_of_stack_stop_unwind_function): + */ + if (ebp == *(unsigned long *)ebp) + break; ebp = *(unsigned long *)ebp; } #else @@ -268,8 +274,9 @@ void show_registers(struct pt_regs *regs) regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", - current->comm, current->pid, current_thread_info(), current); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, + current_thread_info(), current, current->thread_info); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. |