summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/alternative.c12
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c39
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c42
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c7
-rw-r--r--arch/x86/kernel/hpet.c9
-rw-r--r--arch/x86/kernel/io_delay.c8
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/mfgpt_32.c1
-rw-r--r--arch/x86/kernel/pci-gart_64.c10
-rw-r--r--arch/x86/kernel/process_32.c47
-rw-r--r--arch/x86/kernel/process_64.c47
-rw-r--r--arch/x86/kernel/ptrace.c169
-rw-r--r--arch/x86/kernel/setup_32.c9
-rw-r--r--arch/x86/kernel/setup_64.c2
-rw-r--r--arch/x86/kernel/step.c11
-rw-r--r--arch/x86/kernel/tls.c4
-rw-r--r--arch/x86/kernel/tsc_32.c4
-rw-r--r--arch/x86/kernel/tsc_64.c4
-rw-r--r--arch/x86/lguest/boot.c108
-rw-r--r--arch/x86/lguest/i386_head.S15
-rw-r--r--arch/x86/mach-rdc321x/gpio.c199
-rw-r--r--arch/x86/mach-rdc321x/platform.c2
-rw-r--r--arch/x86/mm/discontig_32.c1
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/highmem_32.c6
-rw-r--r--arch/x86/mm/hugetlbpage.c2
-rw-r--r--arch/x86/mm/ioremap.c6
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/xen/enlighten.c74
-rw-r--r--arch/x86/xen/mmu.c7
-rw-r--r--arch/x86/xen/mmu.h7
-rw-r--r--arch/x86/xen/xen-asm.S9
33 files changed, 502 insertions, 380 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 45d79ea890ae..5fed98ca0e1f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -65,7 +65,8 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
get them easily into strings. */
asm("\t.section .rodata, \"a\"\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
- GENERIC_NOP7 GENERIC_NOP8);
+ GENERIC_NOP7 GENERIC_NOP8
+ "\t.previous");
extern const unsigned char intelnops[];
static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
NULL,
@@ -83,7 +84,8 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
#ifdef K8_NOP1
asm("\t.section .rodata, \"a\"\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
+ K8_NOP7 K8_NOP8
+ "\t.previous");
extern const unsigned char k8nops[];
static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
NULL,
@@ -101,7 +103,8 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
#ifdef K7_NOP1
asm("\t.section .rodata, \"a\"\nk7nops: "
K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
+ K7_NOP7 K7_NOP8
+ "\t.previous");
extern const unsigned char k7nops[];
static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
NULL,
@@ -119,7 +122,8 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
#ifdef P6_NOP1
asm("\t.section .rodata, \"a\"\np6nops: "
P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
- P6_NOP7 P6_NOP8);
+ P6_NOP7 P6_NOP8
+ "\t.previous");
extern const unsigned char p6nops[];
static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
NULL,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 027e5c003b16..170d2f5523b2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -143,14 +143,6 @@ static void __init check_config(void)
#endif
/*
- * If we configured ourselves for a TSC, we'd better have one!
- */
-#ifdef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- panic("Kernel compiled for Pentium+, requires TSC feature!");
-#endif
-
-/*
* If we were told we had a good local APIC, check for buggy Pentia,
* i.e. all B steppings and the C2 stepping of P54C when using their
* integrated APIC (see 11AP erratum in "Pentium Processor
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index f2b5a621d27b..8a85c93bd62a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -63,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
*/
static int speedstep_smi_ownership (void)
{
- u32 command, result, magic;
+ u32 command, result, magic, dummy;
u32 function = GET_SPEEDSTEP_OWNER;
unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation";
@@ -73,8 +73,11 @@ static int speedstep_smi_ownership (void)
dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port);
__asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=D" (result)
+ "pop %%ebp\n"
+ : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
+ "=S" (dummy)
: "a" (command), "b" (function), "c" (0), "d" (smi_port),
"D" (0), "S" (magic)
: "memory"
@@ -96,7 +99,7 @@ static int speedstep_smi_ownership (void)
*/
static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
{
- u32 command, result = 0, edi, high_mhz, low_mhz;
+ u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
u32 state=0;
u32 function = GET_SPEEDSTEP_FREQS;
@@ -109,10 +112,12 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port);
- __asm__ __volatile__("movl $0, %%edi\n"
+ __asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+ "pop %%ebp"
+ : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
);
dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz);
@@ -135,16 +140,18 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
static int speedstep_get_state (void)
{
u32 function=GET_SPEEDSTEP_STATE;
- u32 result, state, edi, command;
+ u32 result, state, edi, command, dummy;
command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port);
- __asm__ __volatile__("movl $0, %%edi\n"
+ __asm__ __volatile__(
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=a" (result), "=b" (state), "=D" (edi)
- : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0)
+ "pop %%ebp\n"
+ : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0)
);
dprintk("state is %x, result is %x\n", state, result);
@@ -160,7 +167,7 @@ static int speedstep_get_state (void)
*/
static void speedstep_set_state (unsigned int state)
{
- unsigned int result = 0, command, new_state;
+ unsigned int result = 0, command, new_state, dummy;
unsigned long flags;
unsigned int function=SET_SPEEDSTEP_STATE;
unsigned int retry = 0;
@@ -182,10 +189,12 @@ static void speedstep_set_state (unsigned int state)
}
retry++;
__asm__ __volatile__(
- "movl $0, %%edi\n"
+ "push %%ebp\n"
"out %%al, (%%dx)\n"
- : "=b" (new_state), "=D" (result)
- : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0)
+ "pop %%ebp"
+ : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy),
+ "=d" (dummy), "=S" (dummy)
+ : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0)
);
} while ((new_state != state) && (retry <= SMI_TRIES));
@@ -195,7 +204,7 @@ static void speedstep_set_state (unsigned int state)
if (new_state == state) {
dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result);
} else {
- printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result);
+ printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result);
}
return;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 103d61a59b19..3e18db4cefee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -176,12 +176,13 @@ static inline void k8_enable_fixed_iorrs(void)
}
/**
- * Checks and updates an fixed-range MTRR if it differs from the value it
- * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also.
- * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
- * \param msr MSR address of the MTTR which should be checked and updated
- * \param changed pointer which indicates whether the MTRR needed to be changed
- * \param msrwords pointer to the MSR values which the MSR should have
+ * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
+ * @msr: MSR address of the MTTR which should be checked and updated
+ * @changed: pointer which indicates whether the MTRR needed to be changed
+ * @msrwords: pointer to the MSR values which the MSR should have
+ *
+ * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
+ * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
*/
static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
{
@@ -199,12 +200,15 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
}
}
+/**
+ * generic_get_free_region - Get a free MTRR.
+ * @base: The starting (base) address of the region.
+ * @size: The size (in bytes) of the region.
+ * @replace_reg: mtrr index to be replaced; set to invalid value if none.
+ *
+ * Returns: The index of the region on success, else negative on error.
+ */
int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
{
int i, max;
mtrr_type ltype;
@@ -249,8 +253,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
}
/**
- * Checks and updates the fixed-range MTRRs if they differ from the saved set
- * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
+ * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
*/
static int set_fixed_ranges(mtrr_type * frs)
{
@@ -294,13 +298,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
static u32 deftype_lo, deftype_hi;
+/**
+ * set_mtrr_state - Set the MTRR state for this CPU.
+ *
+ * NOTE: The CPU must already be in a safe state for MTRR changes.
+ * RETURNS: 0 if no changes made, else a mask indicating what was changed.
+ */
static unsigned long set_mtrr_state(void)
-/* [SUMMARY] Set the MTRR state for this CPU.
- <state> The MTRR state information to read.
- <ctxt> Some relevant CPU context.
- [NOTE] The CPU must already be in a safe state for MTRR changes.
- [RETURNS] 0 if no changes made, else a mask indication what was changed.
-*/
{
unsigned int i;
unsigned long change_mask = 0;
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9b838324b818..b943e10ad814 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -652,9 +652,6 @@ static void probe_nmi_watchdog(void)
wd_ops = &p6_wd_ops;
break;
case 15:
- if (boot_cpu_data.x86_model > 0x4)
- return;
-
wd_ops = &p4_wd_ops;
break;
default:
@@ -670,8 +667,10 @@ int lapic_watchdog_init(unsigned nmi_hz)
{
if (!wd_ops) {
probe_nmi_watchdog();
- if (!wd_ops)
+ if (!wd_ops) {
+ printk(KERN_INFO "NMI watchdog: CPU not supported\n");
return -1;
+ }
if (!wd_ops->reserve()) {
printk(KERN_ERR
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 235fd6c77504..36652ea1a265 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -133,13 +133,16 @@ static void hpet_reserve_platform_timers(unsigned long id)
#ifdef CONFIG_HPET_EMULATE_RTC
hpet_reserve_timer(&hd, 1);
#endif
+
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
- for (i = 2; i < nrtimers; timer++, i++)
- hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
+ for (i = 2; i < nrtimers; timer++, i++)
+ hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
+ Tn_INT_ROUTE_CNF_SHIFT;
+
hpet_alloc(&hd);
+
}
#else
static void hpet_reserve_platform_timers(unsigned long id) { }
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index c706a3061553..5921e5f0a640 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -78,6 +78,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
},
{
.callback = dmi_io_delay_0xed_port,
+ .ident = "HP Pavilion dv6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B8")
+ }
+ },
+ {
+ .callback = dmi_io_delay_0xed_port,
.ident = "HP Pavilion tx1000",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 236d2f8f7ddc..576a03db4511 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -233,6 +233,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
+ VMCOREINFO_SYMBOL(phys_base);
VMCOREINFO_SYMBOL(init_level4_pgt);
#ifdef CONFIG_NUMA
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 027fc067b399..b402c0f3f192 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -30,6 +30,7 @@
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
#include <asm/geode.h>
static struct mfgpt_timer_t {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index faf3229f8fb3..700e4647dd30 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -615,8 +615,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
nommu:
/* Should not happen anymore */
- printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
- KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
+ printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
+ KERN_WARNING "falling back to iommu=soft.\n");
return -1;
}
@@ -692,9 +692,9 @@ void __init gart_iommu_init(void)
!gart_iommu_aperture ||
(no_agp && init_k8_gatt(&info) < 0)) {
if (end_pfn > MAX_DMA32_PFN) {
- printk(KERN_ERR "WARNING more than 4GB of memory "
- "but GART IOMMU not available.\n"
- KERN_ERR "WARNING 32bit PCI may malfunction.\n");
+ printk(KERN_WARNING "More than 4GB of memory "
+ "but GART IOMMU not available.\n"
+ KERN_WARNING "falling back to iommu=soft.\n");
}
return;
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index be3c7a299f02..43930e73f657 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -82,7 +82,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
void disable_hlt(void)
{
@@ -190,9 +189,6 @@ void cpu_idle(void)
while (!need_resched()) {
void (*idle)(void);
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
check_pgt_cache();
rmb();
idle = pm_idle;
@@ -220,40 +216,19 @@ static void do_nothing(void *unused)
{
}
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
void cpu_idle_wait(void)
{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- /*
- * We waited 1 sec, if a CPU still did not call idle
- * it may be because it is in idle and not waking up
- * because it has nothing to do.
- * Give all the remaining CPUS a kick.
- */
- smp_call_function_mask(map, do_nothing, NULL, 0);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3baf9b9f4c87..46c4c546b499 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(boot_option_idle_override);
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -173,9 +172,6 @@ void cpu_idle(void)
while (!need_resched()) {
void (*idle)(void);
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
rmb();
idle = pm_idle;
if (!idle)
@@ -207,40 +203,19 @@ static void do_nothing(void *unused)
{
}
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
void cpu_idle_wait(void)
{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map, tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- /*
- * We waited 1 sec, if a CPU still did not call idle
- * it may be because it is in idle and not waking up
- * because it has nothing to do.
- * Give all the remaining CPUS a kick.
- */
- smp_call_function_mask(map, do_nothing, 0, 0);
- } while (!cpus_empty(map));
-
- set_cpus_allowed(current, tmp);
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d5904eef1d31..eb92ccbb3502 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -600,21 +600,6 @@ static int ptrace_bts_read_record(struct task_struct *child,
return sizeof(ret);
}
-static int ptrace_bts_write_record(struct task_struct *child,
- const struct bts_struct *in)
-{
- int retval;
-
- if (!child->thread.ds_area_msr)
- return -ENXIO;
-
- retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
- if (retval)
- return retval;
-
- return sizeof(*in);
-}
-
static int ptrace_bts_clear(struct task_struct *child)
{
if (!child->thread.ds_area_msr)
@@ -657,75 +642,6 @@ static int ptrace_bts_drain(struct task_struct *child,
return end;
}
-static int ptrace_bts_realloc(struct task_struct *child,
- int size, int reduce_size)
-{
- unsigned long rlim, vm;
- int ret, old_size;
-
- if (size < 0)
- return -EINVAL;
-
- old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
- if (old_size < 0)
- return old_size;
-
- ret = ds_free((void **)&child->thread.ds_area_msr);
- if (ret < 0)
- goto out;
-
- size >>= PAGE_SHIFT;
- old_size >>= PAGE_SHIFT;
-
- current->mm->total_vm -= old_size;
- current->mm->locked_vm -= old_size;
-
- if (size == 0)
- goto out;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->total_vm;
- if (size <= 0)
- goto out;
- }
-
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + size;
- if (rlim < vm) {
- ret = -ENOMEM;
-
- if (!reduce_size)
- goto out;
-
- size = rlim - current->mm->locked_vm;
- if (size <= 0)
- goto out;
- }
-
- ret = ds_allocate((void **)&child->thread.ds_area_msr,
- size << PAGE_SHIFT);
- if (ret < 0)
- goto out;
-
- current->mm->total_vm += size;
- current->mm->locked_vm += size;
-
-out:
- if (child->thread.ds_area_msr)
- set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
- else
- clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
-
- return ret;
-}
-
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
@@ -828,6 +744,91 @@ static int ptrace_bts_status(struct task_struct *child,
return sizeof(cfg);
}
+
+static int ptrace_bts_write_record(struct task_struct *child,
+ const struct bts_struct *in)
+{
+ int retval;
+
+ if (!child->thread.ds_area_msr)
+ return -ENXIO;
+
+ retval = ds_write_bts((void *)child->thread.ds_area_msr, in);
+ if (retval)
+ return retval;
+
+ return sizeof(*in);
+}
+
+static int ptrace_bts_realloc(struct task_struct *child,
+ int size, int reduce_size)
+{
+ unsigned long rlim, vm;
+ int ret, old_size;
+
+ if (size < 0)
+ return -EINVAL;
+
+ old_size = ds_get_bts_size((void *)child->thread.ds_area_msr);
+ if (old_size < 0)
+ return old_size;
+
+ ret = ds_free((void **)&child->thread.ds_area_msr);
+ if (ret < 0)
+ goto out;
+
+ size >>= PAGE_SHIFT;
+ old_size >>= PAGE_SHIFT;
+
+ current->mm->total_vm -= old_size;
+ current->mm->locked_vm -= old_size;
+
+ if (size == 0)
+ goto out;
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->total_vm;
+ if (size <= 0)
+ goto out;
+ }
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + size;
+ if (rlim < vm) {
+ ret = -ENOMEM;
+
+ if (!reduce_size)
+ goto out;
+
+ size = rlim - current->mm->locked_vm;
+ if (size <= 0)
+ goto out;
+ }
+
+ ret = ds_allocate((void **)&child->thread.ds_area_msr,
+ size << PAGE_SHIFT);
+ if (ret < 0)
+ goto out;
+
+ current->mm->total_vm += size;
+ current->mm->locked_vm += size;
+
+out:
+ if (child->thread.ds_area_msr)
+ set_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+ else
+ clear_tsk_thread_flag(child, TIF_DS_AREA_MSR);
+
+ return ret;
+}
+
void ptrace_bts_take_timestamp(struct task_struct *tsk,
enum bts_qualifier qualifier)
{
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index a1d7071a51c9..2b3e5d45176b 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -406,8 +406,6 @@ static unsigned long __init setup_memory(void)
*/
min_low_pfn = PFN_UP(init_pg_tables_end);
- find_max_pfn();
-
max_low_pfn = find_max_low_pfn();
#ifdef CONFIG_HIGHMEM
@@ -764,12 +762,13 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_init();
- max_low_pfn = setup_memory();
-
/* update e820 for memory not covered by WB MTRRs */
+ find_max_pfn();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
- max_low_pfn = setup_memory();
+ find_max_pfn();
+
+ max_low_pfn = setup_memory();
#ifdef CONFIG_VMI
/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 7637dc91c79b..f4f7ecfb898c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -801,7 +801,7 @@ static void __cpuinit srat_detect_node(void)
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
+ if (node == NUMA_NO_NODE || !node_online(node))
node = first_node(node_online_map);
numa_set_node(cpu, node);
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 9d406cdc847f..071ff4798236 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -140,6 +140,9 @@ static int enable_single_step(struct task_struct *child)
*/
static void write_debugctlmsr(struct task_struct *child, unsigned long val)
{
+ if (child->thread.debugctlmsr == val)
+ return;
+
child->thread.debugctlmsr = val;
if (child != current)
@@ -165,11 +168,11 @@ static void enable_step(struct task_struct *child, bool block)
write_debugctlmsr(child,
child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
} else {
- write_debugctlmsr(child,
- child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
+ write_debugctlmsr(child,
+ child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
}
}
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 022bcaa3b42e..ab6bf375a307 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -92,7 +92,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
asmlinkage int sys_set_thread_area(struct user_desc __user *u_info)
{
int ret = do_set_thread_area(current, -1, u_info, 1);
- prevent_tail_call(ret);
+ asmlinkage_protect(1, ret, u_info);
return ret;
}
@@ -142,7 +142,7 @@ int do_get_thread_area(struct task_struct *p, int idx,
asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
{
int ret = do_get_thread_area(current, -1, u_info);
- prevent_tail_call(ret);
+ asmlinkage_protect(1, ret, u_info);
return ret;
}
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index f14cfd9d1f94..c2241e04ea5f 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -256,9 +256,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
- preempt_disable();
- set_cyc2ns_scale(cpu_khz, smp_processor_id());
- preempt_enable();
+ set_cyc2ns_scale(cpu_khz, freq->cpu);
/*
* TSC based sched_clock turns
* to junk w/ cpufreq
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 947554ddabb6..d3bebaaad842 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -148,9 +148,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
mark_tsc_unstable("cpufreq changes");
}
- preempt_disable();
- set_cyc2ns_scale(tsc_khz_ref, smp_processor_id());
- preempt_enable();
+ set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
return 0;
}
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a104c532ff70..3335b4595efd 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -10,21 +10,19 @@
* (such as the example in Documentation/lguest/lguest.c) is called the
* Launcher.
*
- * Secondly, we only run specially modified Guests, not normal kernels. When
- * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets
- * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows
- * how to be a Guest. This means that you can use the same kernel you boot
- * normally (ie. as a Host) as a Guest.
+ * Secondly, we only run specially modified Guests, not normal kernels: setting
+ * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
+ * how to be a Guest at boot time. This means that you can use the same kernel
+ * you boot normally (ie. as a Host) as a Guest.
*
* These Guests know that they cannot do privileged operations, such as disable
* interrupts, and that they have to ask the Host to do such things explicitly.
* This file consists of all the replacements for such low-level native
* hardware operations: these special Guest versions call the Host.
*
- * So how does the kernel know it's a Guest? The Guest starts at a special
- * entry point marked with a magic string, which sets up a few things then
- * calls here. We replace the native functions various "paravirt" structures
- * with our Guest versions, then boot like normal. :*/
+ * So how does the kernel know it's a Guest? We'll see that later, but let's
+ * just say that we end up here where we replace the native functions various
+ * "paravirt" structures with our Guest versions, then boot like normal. :*/
/*
* Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
@@ -134,7 +132,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
* lguest_leave_lazy_mode().
*
* So, when we're in lazy mode, we call async_hcall() to store the call for
- * future processing. */
+ * future processing: */
static void lazy_hcall(unsigned long call,
unsigned long arg1,
unsigned long arg2,
@@ -147,7 +145,7 @@ static void lazy_hcall(unsigned long call,
}
/* When lazy mode is turned off reset the per-cpu lazy mode variable and then
- * issue a hypercall to flush any stored calls. */
+ * issue the do-nothing hypercall to flush any stored calls. */
static void lguest_leave_lazy_mode(void)
{
paravirt_leave_lazy(paravirt_get_lazy_mode());
@@ -164,7 +162,7 @@ static void lguest_leave_lazy_mode(void)
*
* So instead we keep an "irq_enabled" field inside our "struct lguest_data",
* which the Guest can update with a single instruction. The Host knows to
- * check there when it wants to deliver an interrupt.
+ * check there before it tries to deliver an interrupt.
*/
/* save_flags() is expected to return the processor state (ie. "flags"). The
@@ -196,10 +194,15 @@ static void irq_enable(void)
/*M:003 Note that we don't check for outstanding interrupts when we re-enable
* them (or when we unmask an interrupt). This seems to work for the moment,
* since interrupts are rare and we'll just get the interrupt on the next timer
- * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way
+ * tick, but now we can run with CONFIG_NO_HZ, we should revisit this. One way
* would be to put the "irq_enabled" field in a page by itself, and have the
* Host write-protect it when an interrupt comes in when irqs are disabled.
- * There will then be a page fault as soon as interrupts are re-enabled. :*/
+ * There will then be a page fault as soon as interrupts are re-enabled.
+ *
+ * A better method is to implement soft interrupt disable generally for x86:
+ * instead of disabling interrupts, we set a flag. If an interrupt does come
+ * in, we then disable them for real. This is uncommon, so we could simply use
+ * a hypercall for interrupt control and not worry about efficiency. :*/
/*G:034
* The Interrupt Descriptor Table (IDT).
@@ -212,6 +215,10 @@ static void irq_enable(void)
static void lguest_write_idt_entry(gate_desc *dt,
int entrynum, const gate_desc *g)
{
+ /* The gate_desc structure is 8 bytes long: we hand it to the Host in
+ * two 32-bit chunks. The whole 32-bit kernel used to hand descriptors
+ * around like this; typesafety wasn't a big concern in Linux's early
+ * years. */
u32 *desc = (u32 *)g;
/* Keep the local copy up to date. */
native_write_idt_entry(dt, entrynum, g);
@@ -243,7 +250,8 @@ static void lguest_load_idt(const struct desc_ptr *desc)
*
* This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
* hypercall and use that repeatedly to load a new IDT. I don't think it
- * really matters, but wouldn't it be nice if they were the same?
+ * really matters, but wouldn't it be nice if they were the same? Wouldn't
+ * it be even better if you were the one to send the patch to fix it?
*/
static void lguest_load_gdt(const struct desc_ptr *desc)
{
@@ -298,9 +306,9 @@ static void lguest_load_tr_desc(void)
/* The "cpuid" instruction is a way of querying both the CPU identity
* (manufacturer, model, etc) and its features. It was introduced before the
- * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you
- * might imagine, after a decade and a half this treatment, it is now a giant
- * ball of hair. Its entry in the current Intel manual runs to 28 pages.
+ * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
+ * As you might imagine, after a decade and a half this treatment, it is now a
+ * giant ball of hair. Its entry in the current Intel manual runs to 28 pages.
*
* This instruction even it has its own Wikipedia entry. The Wikipedia entry
* has been translated into 4 languages. I am not making this up!
@@ -594,17 +602,17 @@ static unsigned long lguest_get_wallclock(void)
return lguest_data.time.tv_sec;
}
-/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at,
- * or 0 if it's unusable as a reliable clock source. This matches what we want
- * here: if we return 0 from this function, the x86 TSC clock will not register
- * itself. */
+/* The TSC is an Intel thing called the Time Stamp Counter. The Host tells us
+ * what speed it runs at, or 0 if it's unusable as a reliable clock source.
+ * This matches what we want here: if we return 0 from this function, the x86
+ * TSC clock will give up and not register itself. */
static unsigned long lguest_cpu_khz(void)
{
return lguest_data.tsc_khz;
}
-/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
- * we read the time value given to us by the Host. */
+/* If we can't use the TSC, the kernel falls back to our lower-priority
+ * "lguest_clock", where we read the time value given to us by the Host. */
static cycle_t lguest_clock_read(void)
{
unsigned long sec, nsec;
@@ -648,12 +656,16 @@ static struct clocksource lguest_clock = {
static int lguest_clockevent_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
+ /* FIXME: I don't think this can ever happen, but James tells me he had
+ * to put this code in. Maybe we should remove it now. Anyone? */
if (delta < LG_CLOCK_MIN_DELTA) {
if (printk_ratelimit())
printk(KERN_DEBUG "%s: small delta %lu ns\n",
__FUNCTION__, delta);
return -ETIME;
}
+
+ /* Please wake us this far in the future. */
hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0);
return 0;
}
@@ -738,7 +750,7 @@ static void lguest_time_init(void)
* will not tolerate us trying to use that), the stack pointer, and the number
* of pages in the stack. */
static void lguest_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+ struct thread_struct *thread)
{
lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0,
THREAD_SIZE/PAGE_SIZE);
@@ -786,9 +798,8 @@ static void lguest_safe_halt(void)
hcall(LHCALL_HALT, 0, 0, 0);
}
-/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a
- * message out when we're crashing as well as elegant termination like powering
- * off.
+/* The SHUTDOWN hypercall takes a string to describe what's happening, and
+ * an argument which says whether this to restart (reboot) the Guest or not.
*
* Note that the Host always prefers that the Guest speak in physical addresses
* rather than virtual addresses, so we use __pa() here. */
@@ -816,8 +827,9 @@ static struct notifier_block paniced = {
/* Setting up memory is fairly easy. */
static __init char *lguest_memory_setup(void)
{
- /* We do this here and not earlier because lockcheck barfs if we do it
- * before start_kernel() */
+ /* We do this here and not earlier because lockcheck used to barf if we
+ * did it before start_kernel(). I think we fixed that, so it'd be
+ * nice to move it back to lguest_init. Patch welcome... */
atomic_notifier_chain_register(&panic_notifier_list, &paniced);
/* The Linux bootloader header contains an "e820" memory map: the
@@ -850,12 +862,19 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
return len;
}
+/* Rebooting also tells the Host we're finished, but the RESTART flag tells the
+ * Launcher to reboot us. */
+static void lguest_restart(char *reason)
+{
+ hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+}
+
/*G:050
* Patching (Powerfully Placating Performance Pedants)
*
- * We have already seen that pv_ops structures let us replace simple
- * native instructions with calls to the appropriate back end all throughout
- * the kernel. This allows the same kernel to run as a Guest and as a native
+ * We have already seen that pv_ops structures let us replace simple native
+ * instructions with calls to the appropriate back end all throughout the
+ * kernel. This allows the same kernel to run as a Guest and as a native
* kernel, but it's slow because of all the indirect branches.
*
* Remember that David Wheeler quote about "Any problem in computer science can
@@ -908,14 +927,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
return insn_len;
}
-static void lguest_restart(char *reason)
-{
- hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
-}
-
-/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
- * structures in the kernel provide points for (almost) every routine we have
- * to override to avoid privileged instructions. */
+/*G:030 Once we get to lguest_init(), we know we're a Guest. The various
+ * pv_ops structures in the kernel provide points for (almost) every routine we
+ * have to override to avoid privileged instructions. */
__init void lguest_init(void)
{
/* We're under lguest, paravirt is enabled, and we're running at
@@ -1003,9 +1017,9 @@ __init void lguest_init(void)
* the normal data segment to get through booting. */
asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
- /* The Host uses the top of the Guest's virtual address space for the
- * Host<->Guest Switcher, and it tells us how big that is in
- * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */
+ /* The Host<->Guest Switcher lives at the top of our address space, and
+ * the Host told us how big it is when we made LGUEST_INIT hypercall:
+ * it put the answer in lguest_data.reserve_mem */
reserve_top_address(lguest_data.reserve_mem);
/* If we don't initialize the lock dependency checker now, it crashes
@@ -1027,6 +1041,7 @@ __init void lguest_init(void)
/* Math is always hard! */
new_cpu_data.hard_math = 1;
+ /* We don't have features. We have puppies! Puppies! */
#ifdef CONFIG_X86_MCE
mce_disabled = 1;
#endif
@@ -1044,10 +1059,11 @@ __init void lguest_init(void)
virtio_cons_early_init(early_put_chars);
/* Last of all, we set the power management poweroff hook to point to
- * the Guest routine to power off. */
+ * the Guest routine to power off, and the reboot hook to our restart
+ * routine. */
pm_power_off = lguest_power_off;
-
machine_ops.restart = lguest_restart;
+
/* Now we're set up, call start_kernel() in init/main.c and we proceed
* to boot as normal. It never returns. */
start_kernel();
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 95b6fbcded63..5c7cef34c9e7 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -5,13 +5,20 @@
#include <asm/thread_info.h>
#include <asm/processor-flags.h>
-/*G:020 This is where we begin: head.S notes that the boot header's platform
- * type field is "1" (lguest), so calls us here.
+/*G:020 Our story starts with the kernel booting into startup_32 in
+ * arch/x86/kernel/head_32.S. It expects a boot header, which is created by
+ * the bootloader (the Launcher in our case).
+ *
+ * The startup_32 function does very little: it clears the uninitialized global
+ * C variables which we expect to be zero (ie. BSS) and then copies the boot
+ * header and kernel command line somewhere safe. Finally it checks the
+ * 'hardware_subarch' field. This was introduced in 2.6.24 for lguest and Xen:
+ * if it's set to '1' (lguest's assigned number), then it calls us here.
*
* WARNING: be very careful here! We're running at addresses equal to physical
* addesses (around 0), not above PAGE_OFFSET as most code expectes
* (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any
- * data.
+ * data without remembering to subtract __PAGE_OFFSET!
*
* The .section line puts this code in .init.text so it will be discarded after
* boot. */
@@ -24,7 +31,7 @@ ENTRY(lguest_entry)
int $LGUEST_TRAP_ENTRY
/* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl
- * instruction uses %esi implicitly as the source for the copy we'
+ * instruction uses %esi implicitly as the source for the copy we're
* about to do. */
movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
index 031269163bd6..247f33d3a407 100644
--- a/arch/x86/mach-rdc321x/gpio.c
+++ b/arch/x86/mach-rdc321x/gpio.c
@@ -1,91 +1,194 @@
/*
- * Copyright (C) 2007, OpenWrt.org, Florian Fainelli <florian@openwrt.org>
- * RDC321x architecture specific GPIO support
+ * GPIO support for RDC SoC R3210/R8610
+ *
+ * Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
+ * Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <linux/autoconf.h>
-#include <linux/init.h>
+
+#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/types.h>
#include <linux/module.h>
-#include <linux/delay.h>
+#include <asm/gpio.h>
#include <asm/mach-rdc321x/rdc321x_defs.h>
-static inline int rdc_gpio_is_valid(unsigned gpio)
+
+/* spin lock to protect our private copy of GPIO data register plus
+ the access to PCI conf registers. */
+static DEFINE_SPINLOCK(gpio_lock);
+
+/* copy of GPIO data registers */
+static u32 gpio_data_reg1;
+static u32 gpio_data_reg2;
+
+static u32 gpio_request_data[2];
+
+
+static inline void rdc321x_conf_write(unsigned addr, u32 value)
{
- return (gpio <= RDC_MAX_GPIO);
+ outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+ outl(value, RDC3210_CFGREG_DATA);
}
-static unsigned int rdc_gpio_read(unsigned gpio)
+static inline void rdc321x_conf_or(unsigned addr, u32 value)
{
- unsigned int val;
-
- val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x84:0x48));
- outl(val, RDC3210_CFGREG_ADDR);
- udelay(10);
- val = inl(RDC3210_CFGREG_DATA);
- val |= (0x1 << (gpio & 0x1F));
- outl(val, RDC3210_CFGREG_DATA);
- udelay(10);
- val = 0x80000000 | (7 << 11) | ((gpio&0x20?0x88:0x4C));
- outl(val, RDC3210_CFGREG_ADDR);
- udelay(10);
- val = inl(RDC3210_CFGREG_DATA);
-
- return val;
+ outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+ value |= inl(RDC3210_CFGREG_DATA);
+ outl(value, RDC3210_CFGREG_DATA);
}
-static void rdc_gpio_write(unsigned int val)
+static inline u32 rdc321x_conf_read(unsigned addr)
{
- if (val) {
- outl(val, RDC3210_CFGREG_DATA);
- udelay(10);
- }
+ outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+
+ return inl(RDC3210_CFGREG_DATA);
}
-int rdc_gpio_get_value(unsigned gpio)
+/* configure pin as GPIO */
+static void rdc321x_configure_gpio(unsigned gpio)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ rdc321x_conf_or(gpio < 32
+ ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
+ 1 << (gpio & 0x1f));
+ spin_unlock_irqrestore(&gpio_lock, flags);
+}
+
+/* initially setup the 2 copies of the gpio data registers.
+ This function must be called by the platform setup code. */
+void __init rdc321x_gpio_setup()
+{
+ /* this might not be, what others (BIOS, bootloader, etc.)
+ wrote to these registers before, but it's a good guess. Still
+ better than just using 0xffffffff. */
+
+ gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
+ gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
+}
+
+/* determine, if gpio number is valid */
+static inline int rdc321x_is_gpio(unsigned gpio)
+{
+ return gpio <= RDC321X_MAX_GPIO;
+}
+
+/* request GPIO */
+int rdc_gpio_request(unsigned gpio, const char *label)
{
- if (rdc_gpio_is_valid(gpio))
- return (int)rdc_gpio_read(gpio);
- else
+ unsigned long flags;
+
+ if (!rdc321x_is_gpio(gpio))
return -EINVAL;
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
+ goto inuse;
+ gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ return 0;
+inuse:
+ spin_unlock_irqrestore(&gpio_lock, flags);
+ return -EINVAL;
}
-EXPORT_SYMBOL(rdc_gpio_get_value);
+EXPORT_SYMBOL(rdc_gpio_request);
-void rdc_gpio_set_value(unsigned gpio, int value)
+/* release previously-claimed GPIO */
+void rdc_gpio_free(unsigned gpio)
{
- unsigned int val;
+ unsigned long flags;
- if (!rdc_gpio_is_valid(gpio))
+ if (!rdc321x_is_gpio(gpio))
return;
- val = rdc_gpio_read(gpio);
+ spin_lock_irqsave(&gpio_lock, flags);
+ gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
+ spin_unlock_irqrestore(&gpio_lock, flags);
+}
+EXPORT_SYMBOL(rdc_gpio_free);
+
+/* read GPIO pin */
+int rdc_gpio_get_value(unsigned gpio)
+{
+ u32 reg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ reg = rdc321x_conf_read(gpio < 32
+ ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
+ spin_unlock_irqrestore(&gpio_lock, flags);
- if (value)
- val &= ~(0x1 << (gpio & 0x1F));
- else
- val |= (0x1 << (gpio & 0x1F));
+ return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
+}
+EXPORT_SYMBOL(rdc_gpio_get_value);
- rdc_gpio_write(val);
+/* set GPIO pin to value */
+void rdc_gpio_set_value(unsigned gpio, int value)
+{
+ unsigned long flags;
+ u32 reg;
+
+ reg = 1 << (gpio & 0x1f);
+ if (gpio < 32) {
+ spin_lock_irqsave(&gpio_lock, flags);
+ if (value)
+ gpio_data_reg1 |= reg;
+ else
+ gpio_data_reg1 &= ~reg;
+ rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
+ spin_unlock_irqrestore(&gpio_lock, flags);
+ } else {
+ spin_lock_irqsave(&gpio_lock, flags);
+ if (value)
+ gpio_data_reg2 |= reg;
+ else
+ gpio_data_reg2 &= ~reg;
+ rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
+ spin_unlock_irqrestore(&gpio_lock, flags);
+ }
}
EXPORT_SYMBOL(rdc_gpio_set_value);
+/* configure GPIO pin as input */
int rdc_gpio_direction_input(unsigned gpio)
{
+ if (!rdc321x_is_gpio(gpio))
+ return -EINVAL;
+
+ rdc321x_configure_gpio(gpio);
+
return 0;
}
EXPORT_SYMBOL(rdc_gpio_direction_input);
+/* configure GPIO pin as output and set value */
int rdc_gpio_direction_output(unsigned gpio, int value)
{
+ if (!rdc321x_is_gpio(gpio))
+ return -EINVAL;
+
+ gpio_set_value(gpio, value);
+ rdc321x_configure_gpio(gpio);
+
return 0;
}
EXPORT_SYMBOL(rdc_gpio_direction_output);
-
-
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index dda6024a5862..a037041817c7 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -62,6 +62,8 @@ static struct platform_device *rdc321x_devs[] = {
static int __init rdc_board_setup(void)
{
+ rdc321x_gpio_setup();
+
return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
}
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index c394ca0720b8..8e25e06ff730 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -324,7 +324,6 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundary between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
- find_max_pfn();
get_memcfg_numa();
kva_pages = calculate_numa_remap_pages();
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fdc667422df9..ec08d8389850 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -91,12 +91,10 @@ static int is_prefetch(struct pt_regs *regs, unsigned long addr,
int prefetch = 0;
unsigned char *max_instr;
-#ifdef CONFIG_X86_32
- if (!(__supported_pte_mask & _PAGE_NX))
- return 0;
-#endif
-
- /* If it was a exec fault on NX page, ignore */
+ /*
+ * If it was a exec (instruction fetch) fault on NX page, then
+ * do not ignore the fault:
+ */
if (error_code & PF_INSTR)
return 0;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 3d936f232704..9cf33d3ee5bc 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -73,15 +73,15 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
-
- debug_kmap_atomic_prot(type);
+ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
+ debug_kmap_atomic_prot(type);
+
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 4fbafb4bc2f0..0b3d567e686d 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -178,7 +178,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
- WARN_ON(!PageCompound(page));
+ WARN_ON(!PageHead(page));
return page;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 4afaba0ed722..794895c6dcc9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -137,7 +137,11 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
switch (mode) {
case IOR_MODE_UNCACHED:
default:
- prot = PAGE_KERNEL_NOCACHE;
+ /*
+ * FIXME: we will use UC MINUS for now, as video fb drivers
+ * depend on it. Upcoming ioremap_wc() will fix this behavior.
+ */
+ prot = PAGE_KERNEL_UC_MINUS;
break;
case IOR_MODE_CACHED:
prot = PAGE_KERNEL;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 14e48b5a94ba..7b79f6be4e7d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -771,7 +771,7 @@ static inline int change_page_attr_clear(unsigned long addr, int numpages,
int set_memory_uc(unsigned long addr, int numpages)
{
return change_page_attr_set(addr, numpages,
- __pgprot(_PAGE_PCD | _PAGE_PWT));
+ __pgprot(_PAGE_PCD));
}
EXPORT_SYMBOL(set_memory_uc);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8b9ee27805fd..27ee26aedf94 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -95,7 +95,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info;
*
* 0: not available, 1: available
*/
-static int have_vcpu_info_placement = 0;
+static int have_vcpu_info_placement = 1;
static void __init xen_vcpu_setup(int cpu)
{
@@ -103,6 +103,7 @@ static void __init xen_vcpu_setup(int cpu)
int err;
struct vcpu_info *vcpup;
+ BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info);
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
if (!have_vcpu_info_placement)
@@ -666,10 +667,10 @@ static void xen_release_pt_init(u32 pfn)
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
- op.cmd = level;
+ op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
BUG();
@@ -686,7 +687,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
- pin_pagetable_pfn(level, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
} else
/* make sure there are no stray mappings of
this page */
@@ -696,27 +698,39 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L1_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PTE);
}
static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
{
- xen_alloc_ptpage(mm, pfn, MMUEXT_PIN_L2_TABLE);
+ xen_alloc_ptpage(mm, pfn, PT_PMD);
}
/* This should never happen until we're OK to use struct page */
-static void xen_release_pt(u32 pfn)
+static void xen_release_ptpage(u32 pfn, unsigned level)
{
struct page *page = pfn_to_page(pfn);
if (PagePinned(page)) {
if (!PageHighMem(page)) {
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+ if (level == PT_PTE)
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
+ ClearPagePinned(page);
}
}
+static void xen_release_pt(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PTE);
+}
+
+static void xen_release_pd(u32 pfn)
+{
+ xen_release_ptpage(pfn, PT_PMD);
+}
+
#ifdef CONFIG_HIGHPTE
static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
{
@@ -805,33 +819,43 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
PFN_DOWN(__pa(xen_start_info->pt_base)));
}
-static __init void xen_pagetable_setup_done(pgd_t *base)
+static __init void setup_shared_info(void)
{
- /* This will work as long as patching hasn't happened yet
- (which it hasn't) */
- pv_mmu_ops.alloc_pt = xen_alloc_pt;
- pv_mmu_ops.alloc_pd = xen_alloc_pd;
- pv_mmu_ops.release_pt = xen_release_pt;
- pv_mmu_ops.release_pd = xen_release_pt;
- pv_mmu_ops.set_pte = xen_set_pte;
-
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
+
/*
* Create a mapping for the shared info page.
* Should be set_fixmap(), but shared_info is a machine
* address with no corresponding pseudo-phys address.
*/
- set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
+ set_pte_mfn(addr,
PFN_DOWN(xen_start_info->shared_info),
PAGE_KERNEL);
- HYPERVISOR_shared_info =
- (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-
+ HYPERVISOR_shared_info = (struct shared_info *)addr;
} else
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);
+#ifndef CONFIG_SMP
+ /* In UP this is as good a place as any to set up shared info */
+ xen_setup_vcpu_info_placement();
+#endif
+}
+
+static __init void xen_pagetable_setup_done(pgd_t *base)
+{
+ /* This will work as long as patching hasn't happened yet
+ (which it hasn't) */
+ pv_mmu_ops.alloc_pt = xen_alloc_pt;
+ pv_mmu_ops.alloc_pd = xen_alloc_pd;
+ pv_mmu_ops.release_pt = xen_release_pt;
+ pv_mmu_ops.release_pd = xen_release_pd;
+ pv_mmu_ops.set_pte = xen_set_pte;
+
+ setup_shared_info();
+
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
{
@@ -1182,15 +1206,9 @@ asmlinkage void __init xen_start_kernel(void)
x86_write_percpu(xen_cr3, __pa(pgd));
x86_write_percpu(xen_current_cr3, __pa(pgd));
-#ifdef CONFIG_SMP
/* Don't do the full vcpu_info placement stuff until we have a
- possible map. */
+ possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
-#else
- /* May as well do it now, since there's no good time to call
- it later on UP. */
- xen_setup_vcpu_info_placement();
-#endif
pv_info.kernel_rpl = 1;
if (xen_feature(XENFEAT_supervisor_mode_kernel))
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 0144395448ae..2a054ef2a3da 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -310,13 +310,6 @@ pgd_t xen_make_pgd(unsigned long pgd)
}
#endif /* CONFIG_X86_PAE */
-enum pt_level {
- PT_PGD,
- PT_PUD,
- PT_PMD,
- PT_PTE
-};
-
/*
(Yet another) pagetable walker. This one is intended for pinning a
pagetable. This means that it walks a pagetable and calls the
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index c9ff27f3ac3a..b5e189b1519d 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -3,6 +3,13 @@
#include <linux/linkage.h>
#include <asm/page.h>
+enum pt_level {
+ PT_PGD,
+ PT_PUD,
+ PT_PMD,
+ PT_PTE
+};
+
/*
* Page-directory addresses above 4GB do not fit into architectural %cr3.
* When accessing %cr3, or equivalent field in vcpu_guest_context, guests
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1a43b60c0c62..6b7190449d07 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -33,12 +33,17 @@
events, then enter the hypervisor to get them handled.
*/
ENTRY(xen_irq_enable_direct)
- /* Clear mask and test pending */
- andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+ /* Unmask events */
+ movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+
/* Preempt here doesn't matter because that will deal with
any pending interrupts. The pending check may end up being
run on the wrong CPU, but that doesn't hurt. */
+
+ /* Test for pending */
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
jz 1f
+
2: call check_events
1:
ENDPATCH(xen_irq_enable_direct)