summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2005-09-08 07:45:47 +0200
committerLen Brown <len.brown@intel.com>2005-09-08 07:45:47 +0200
commit64e47488c913ac704d465a6af86a26786d1412a5 (patch)
treed3b0148592963dcde26e4bb35ddfec8b1eaf8e23 /arch/i386/kernel
parent[ACPI] revert bad processor_core.c patch for bug 5128 (diff)
parent[SCSI] Re-do "final klist fixes" (diff)
downloadlinux-64e47488c913ac704d465a6af86a26786d1412a5.tar.xz
linux-64e47488c913ac704d465a6af86a26786d1412a5.zip
Merge linux-2.6 with linux-acpi-2.6
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/acpi/boot.c3
-rw-r--r--arch/i386/kernel/cpu/common.c16
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c12
-rw-r--r--arch/i386/kernel/cpu/cyrix.c6
-rw-r--r--arch/i386/kernel/cpu/intel.c9
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c9
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/crash.c2
-rw-r--r--arch/i386/kernel/dmi_scan.c231
-rw-r--r--arch/i386/kernel/doublefault.c2
-rw-r--r--arch/i386/kernel/efi.c110
-rw-r--r--arch/i386/kernel/entry.S22
-rw-r--r--arch/i386/kernel/head.S48
-rw-r--r--arch/i386/kernel/i8237.c67
-rw-r--r--arch/i386/kernel/io_apic.c65
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/kprobes.c35
-rw-r--r--arch/i386/kernel/ldt.c7
-rw-r--r--arch/i386/kernel/machine_kexec.c18
-rw-r--r--arch/i386/kernel/microcode.c7
-rw-r--r--arch/i386/kernel/mpparse.c17
-rw-r--r--arch/i386/kernel/msr.c31
-rw-r--r--arch/i386/kernel/nmi.c10
-rw-r--r--arch/i386/kernel/process.c43
-rw-r--r--arch/i386/kernel/ptrace.c63
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/semaphore.c162
-rw-r--r--arch/i386/kernel/setup.c24
-rw-r--r--arch/i386/kernel/signal.c8
-rw-r--r--arch/i386/kernel/smp.c2
-rw-r--r--arch/i386/kernel/smpboot.c7
-rw-r--r--arch/i386/kernel/time.c23
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c23
-rw-r--r--arch/i386/kernel/timers/timer_pit.c27
-rw-r--r--arch/i386/kernel/timers/timer_pm.c9
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c14
-rw-r--r--arch/i386/kernel/traps.c27
-rw-r--r--arch/i386/kernel/vm86.c10
-rw-r--r--arch/i386/kernel/vmlinux.lds.S1
-rw-r--r--arch/i386/kernel/vsyscall-sigreturn.S3
41 files changed, 630 insertions, 563 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index c52b4fad011b..f10de0f2c5e6 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o
+ doublefault.o quirks.o i8237.o
obj-y += cpu/
obj-y += timers/
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 0fb23c30eb98..a63351c085c6 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -814,6 +814,9 @@ static void __init acpi_process_madt(void)
if (!error) {
acpi_lapic = 1;
+#ifdef CONFIG_X86_GENERICARCH
+ generic_bigsmp_probe();
+#endif
/*
* Parse MADT IO-APIC entries
*/
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 4553ffd94b1f..46ce9b248f55 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -613,8 +613,8 @@ void __devinit cpu_init(void)
memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu),
GDT_ENTRY_TLS_ENTRIES * 8);
- __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu]));
- __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ load_gdt(&cpu_gdt_descr[cpu]);
+ load_idt(&idt_descr);
/*
* Delete NT
@@ -642,12 +642,12 @@ void __devinit cpu_init(void)
asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
/* Clear all 6 debug registers: */
-
-#define CD(register) set_debugreg(0, register)
-
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+ set_debugreg(0, 0);
+ set_debugreg(0, 1);
+ set_debugreg(0, 2);
+ set_debugreg(0, 3);
+ set_debugreg(0, 6);
+ set_debugreg(0, 7);
/*
* Force FPU initialization:
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 04e3563da4fe..bf02b5026e62 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -64,8 +64,6 @@ static int dont_scale_voltage;
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
-#define __hlt() __asm__ __volatile__("hlt": : :"memory")
-
/* Clock ratios multiplied by 10 */
static int clock_ratio[32];
static int eblcr_table[32];
@@ -168,11 +166,9 @@ static void do_powersaver(union msr_longhaul *longhaul,
outb(0xFE,0x21); /* TMR0 only */
outb(0xFF,0x80); /* delay */
- local_irq_enable();
-
- __hlt();
+ safe_halt();
wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
- __hlt();
+ halt();
local_irq_disable();
@@ -251,9 +247,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
bcr2.bits.CLOCKMUL = clock_ratio_index;
local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- local_irq_enable();
-
- __hlt();
+ safe_halt();
/* Disable software clock multiplier */
rdmsrl (MSR_VIA_BCR2, bcr2.val);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ba4b01138c8f..ff87cc22b323 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -132,11 +132,7 @@ static void __init set_cx86_memwb(void)
setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
cr0 = 0x20000000;
- __asm__("movl %%cr0,%%eax\n\t"
- "orl %0,%%eax\n\t"
- "movl %%eax,%%cr0\n"
- : : "r" (cr0)
- :"ax");
+ write_cr0(read_cr0() | cr0);
/* CCR2 bit 2: lock NW bit and set WT1 */
setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
}
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index a2c33c1a46c5..43601de0f633 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -82,16 +82,13 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
*/
static int __devinit num_cpu_cores(struct cpuinfo_x86 *c)
{
- unsigned int eax;
+ unsigned int eax, ebx, ecx, edx;
if (c->cpuid_level < 4)
return 1;
- __asm__("cpuid"
- : "=a" (eax)
- : "0" (4), "c" (0)
- : "bx", "dx");
-
+ /* Intel has a non-standard dependency on %ecx for this CPUID level. */
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return ((eax >> 26) + 1);
else
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 6c55b50cf048..9e0d5f83cb9f 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -305,6 +305,9 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf;
unsigned long num_threads_sharing;
+#ifdef CONFIG_X86_HT
+ struct cpuinfo_x86 *c = cpu_data + cpu;
+#endif
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -314,10 +317,12 @@ static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
#ifdef CONFIG_X86_HT
else if (num_threads_sharing == smp_num_siblings)
this_leaf->shared_cpu_map = cpu_sibling_map[cpu];
-#endif
+ else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings))
+ this_leaf->shared_cpu_map = cpu_core_map[cpu];
else
- printk(KERN_INFO "Number of CPUs sharing cache didn't match "
+ printk(KERN_DEBUG "Number of CPUs sharing cache didn't match "
"any known set of CPUs\n");
+#endif
}
#else
static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 764cac64e211..dd4ebd6af7e4 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -561,7 +561,7 @@ struct mtrr_value {
static struct mtrr_value * mtrr_state;
-static int mtrr_save(struct sys_device * sysdev, u32 state)
+static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
int i;
int size = num_var_ranges * sizeof(struct mtrr_value);
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index e5fab12f7926..913be77bb844 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -153,7 +153,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
disable_local_APIC();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
- __asm__("hlt");
+ halt();
for(;;);
return 1;
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c
index a3cdf894302b..58516e2ac172 100644
--- a/arch/i386/kernel/dmi_scan.c
+++ b/arch/i386/kernel/dmi_scan.c
@@ -6,32 +6,28 @@
#include <linux/bootmem.h>
-struct dmi_header {
- u8 type;
- u8 length;
- u16 handle;
-};
-
-#undef DMI_DEBUG
-
-#ifdef DMI_DEBUG
-#define dmi_printk(x) printk x
-#else
-#define dmi_printk(x)
-#endif
-
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
u8 *bp = ((u8 *) dm) + dm->length;
+ char *str = "";
- if (!s)
- return "";
- s--;
- while (s > 0 && *bp) {
- bp += strlen(bp) + 1;
+ if (s) {
s--;
- }
- return bp;
+ while (s > 0 && *bp) {
+ bp += strlen(bp) + 1;
+ s--;
+ }
+
+ if (*bp != 0) {
+ str = alloc_bootmem(strlen(bp) + 1);
+ if (str != NULL)
+ strcpy(str, bp);
+ else
+ printk(KERN_ERR "dmi_string: out of memory.\n");
+ }
+ }
+
+ return str;
}
/*
@@ -84,69 +80,76 @@ static int __init dmi_checksum(u8 *buf)
return sum == 0;
}
-static int __init dmi_iterate(void (*decode)(struct dmi_header *))
+static char *dmi_ident[DMI_STRING_MAX];
+static LIST_HEAD(dmi_devices);
+
+/*
+ * Save a DMI string
+ */
+static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
{
- u8 buf[15];
- char __iomem *p, *q;
+ char *p, *d = (char*) dm;
- /*
- * no iounmap() for that ioremap(); it would be a no-op, but it's
- * so early in setup that sucker gets confused into doing what
- * it shouldn't if we actually call it.
- */
- p = ioremap(0xF0000, 0x10000);
+ if (dmi_ident[slot])
+ return;
+
+ p = dmi_string(dm, d[string]);
if (p == NULL)
- return -1;
+ return;
- for (q = p; q < p + 0x10000; q += 16) {
- memcpy_fromio(buf, q, 15);
- if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
- u16 num = (buf[13] << 8) | buf[12];
- u16 len = (buf[7] << 8) | buf[6];
- u32 base = (buf[11] << 24) | (buf[10] << 16) |
- (buf[9] << 8) | buf[8];
+ dmi_ident[slot] = p;
+}
- /*
- * DMI version 0.0 means that the real version is taken from
- * the SMBIOS version, which we don't know at this point.
- */
- if (buf[14] != 0)
- printk(KERN_INFO "DMI %d.%d present.\n",
- buf[14] >> 4, buf[14] & 0xF);
- else
- printk(KERN_INFO "DMI present.\n");
+static void __init dmi_save_devices(struct dmi_header *dm)
+{
+ int i, count = (dm->length - sizeof(struct dmi_header)) / 2;
+ struct dmi_device *dev;
+
+ for (i = 0; i < count; i++) {
+ char *d = ((char *) dm) + (i * 2);
- dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n",
- num, len));
- dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base));
+ /* Skip disabled device */
+ if ((*d & 0x80) == 0)
+ continue;
- if (dmi_table(base,len, num, decode) == 0)
- return 0;
+ dev = alloc_bootmem(sizeof(*dev));
+ if (!dev) {
+ printk(KERN_ERR "dmi_save_devices: out of memory.\n");
+ break;
}
+
+ dev->type = *d++ & 0x7f;
+ dev->name = dmi_string(dm, *d);
+ dev->device_data = NULL;
+
+ list_add(&dev->list, &dmi_devices);
}
- return -1;
}
-static char *dmi_ident[DMI_STRING_MAX];
-
-/*
- * Save a DMI string
- */
-static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
+static void __init dmi_save_ipmi_device(struct dmi_header *dm)
{
- char *d = (char*)dm;
- char *p = dmi_string(dm, d[string]);
+ struct dmi_device *dev;
+ void * data;
- if (p == NULL || *p == 0)
+ data = alloc_bootmem(dm->length);
+ if (data == NULL) {
+ printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
return;
- if (dmi_ident[slot])
+ }
+
+ memcpy(data, dm, dm->length);
+
+ dev = alloc_bootmem(sizeof(*dev));
+ if (!dev) {
+ printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
return;
+ }
- dmi_ident[slot] = alloc_bootmem(strlen(p) + 1);
- if(dmi_ident[slot])
- strcpy(dmi_ident[slot], p);
- else
- printk(KERN_ERR "dmi_save_ident: out of memory.\n");
+ dev->type = DMI_DEV_TYPE_IPMI;
+ dev->name = "IPMI controller";
+ dev->device_data = data;
+
+ list_add(&dev->list, &dmi_devices);
}
/*
@@ -156,42 +159,69 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string)
*/
static void __init dmi_decode(struct dmi_header *dm)
{
- u8 *data __attribute__((__unused__)) = (u8 *)dm;
-
switch(dm->type) {
- case 0:
- dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4])));
+ case 0: /* BIOS Information */
dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
- dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5])));
dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
- dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8])));
dmi_save_ident(dm, DMI_BIOS_DATE, 8);
break;
- case 1:
- dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4])));
+ case 1: /* System Information */
dmi_save_ident(dm, DMI_SYS_VENDOR, 4);
- dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5])));
dmi_save_ident(dm, DMI_PRODUCT_NAME, 5);
- dmi_printk(("Version: %s\n", dmi_string(dm, data[6])));
dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6);
- dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7])));
dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7);
break;
- case 2:
- dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4])));
+ case 2: /* Base Board Information */
dmi_save_ident(dm, DMI_BOARD_VENDOR, 4);
- dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5])));
dmi_save_ident(dm, DMI_BOARD_NAME, 5);
- dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6])));
dmi_save_ident(dm, DMI_BOARD_VERSION, 6);
break;
+ case 10: /* Onboard Devices Information */
+ dmi_save_devices(dm);
+ break;
+ case 38: /* IPMI Device Information */
+ dmi_save_ipmi_device(dm);
}
}
void __init dmi_scan_machine(void)
{
- if (dmi_iterate(dmi_decode))
- printk(KERN_INFO "DMI not present.\n");
+ u8 buf[15];
+ char __iomem *p, *q;
+
+ /*
+ * no iounmap() for that ioremap(); it would be a no-op, but it's
+ * so early in setup that sucker gets confused into doing what
+ * it shouldn't if we actually call it.
+ */
+ p = ioremap(0xF0000, 0x10000);
+ if (p == NULL)
+ goto out;
+
+ for (q = p; q < p + 0x10000; q += 16) {
+ memcpy_fromio(buf, q, 15);
+ if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) {
+ u16 num = (buf[13] << 8) | buf[12];
+ u16 len = (buf[7] << 8) | buf[6];
+ u32 base = (buf[11] << 24) | (buf[10] << 16) |
+ (buf[9] << 8) | buf[8];
+
+ /*
+ * DMI version 0.0 means that the real version is taken from
+ * the SMBIOS version, which we don't know at this point.
+ */
+ if (buf[14] != 0)
+ printk(KERN_INFO "DMI %d.%d present.\n",
+ buf[14] >> 4, buf[14] & 0xF);
+ else
+ printk(KERN_INFO "DMI present.\n");
+
+ if (dmi_table(base,len, num, dmi_decode) == 0)
+ return;
+ }
+ }
+
+out: printk(KERN_INFO "DMI not present.\n");
}
@@ -218,9 +248,9 @@ int dmi_check_system(struct dmi_system_id *list)
/* No match */
goto fail;
}
+ count++;
if (d->callback && d->callback(d))
break;
- count++;
fail: d++;
}
@@ -240,3 +270,32 @@ char *dmi_get_system_info(int field)
return dmi_ident[field];
}
EXPORT_SYMBOL(dmi_get_system_info);
+
+/**
+ * dmi_find_device - find onboard device by type/name
+ * @type: device type or %DMI_DEV_TYPE_ANY to match all device types
+ * @desc: device name string or %NULL to match all
+ * @from: previous device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known onboard devices. If a device is
+ * found with a matching @vendor and @device, a pointer to its device
+ * structure is returned. Otherwise, %NULL is returned.
+ * A new search is initiated by passing %NULL to the @from argument.
+ * If @from is not %NULL, searches continue from next device.
+ */
+struct dmi_device * dmi_find_device(int type, const char *name,
+ struct dmi_device *from)
+{
+ struct list_head *d, *head = from ? &from->list : &dmi_devices;
+
+ for(d = head->next; d != &dmi_devices; d = d->next) {
+ struct dmi_device *dev = list_entry(d, struct dmi_device, list);
+
+ if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) &&
+ ((name == NULL) || (strcmp(dev->name, name) == 0)))
+ return dev;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(dmi_find_device);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 789af3e9fb1f..5edb1d379add 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -20,7 +20,7 @@ static void doublefault_fn(void)
struct Xgt_desc_struct gdt_desc = {0, 0};
unsigned long gdt, tss;
- __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory");
+ store_gdt(&gdt_desc);
gdt = gdt_desc.address;
printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 385883ea8c19..ecad519fd395 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -79,7 +79,7 @@ static void efi_call_phys_prelog(void)
* directory. If I have PSE, I just need to duplicate one entry in
* page directory.
*/
- __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+ cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) {
efi_bak_pg_dir_pointer[0].pgd =
@@ -104,8 +104,7 @@ static void efi_call_phys_prelog(void)
local_flush_tlb();
cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address);
- __asm__ __volatile__("lgdt %0":"=m"
- (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])));
+ load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]));
}
static void efi_call_phys_epilog(void)
@@ -114,8 +113,8 @@ static void efi_call_phys_epilog(void)
cpu_gdt_descr[0].address =
(unsigned long) __va(cpu_gdt_descr[0].address);
- __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr));
- __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4));
+ load_gdt(&cpu_gdt_descr[0]);
+ cr4 = read_cr4();
if (cr4 & X86_CR4_PSE) {
swapper_pg_dir[pgd_index(0)].pgd =
@@ -233,22 +232,23 @@ void __init efi_map_memmap(void)
{
memmap.map = NULL;
- memmap.map = (efi_memory_desc_t *)
- bt_ioremap((unsigned long) memmap.phys_map,
- (memmap.nr_map * sizeof(efi_memory_desc_t)));
-
+ memmap.map = bt_ioremap((unsigned long) memmap.phys_map,
+ (memmap.nr_map * memmap.desc_size));
if (memmap.map == NULL)
printk(KERN_ERR PFX "Could not remap the EFI memmap!\n");
+
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
}
#if EFI_DEBUG
static void __init print_efi_memmap(void)
{
efi_memory_desc_t *md;
+ void *p;
int i;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
+ md = p;
printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, "
"range=[0x%016llx-0x%016llx) (%lluMB)\n",
i, md->type, md->attribute, md->phys_addr,
@@ -271,10 +271,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg)
} prev, curr;
efi_memory_desc_t *md;
unsigned long start, end;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->num_pages == 0) || (!is_available_memory(md)))
continue;
@@ -325,6 +325,7 @@ void __init efi_init(void)
memmap.phys_map = EFI_MEMMAP;
memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE;
memmap.desc_version = EFI_MEMDESC_VERSION;
+ memmap.desc_size = EFI_MEMDESC_SIZE;
efi.systab = (efi_system_table_t *)
boot_ioremap((unsigned long) efi_phys.systab,
@@ -428,22 +429,30 @@ void __init efi_init(void)
printk(KERN_ERR PFX "Could not map the runtime service table!\n");
/* Map the EFI memory map for use until paging_init() */
-
- memmap.map = (efi_memory_desc_t *)
- boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
-
+ memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE);
if (memmap.map == NULL)
printk(KERN_ERR PFX "Could not map the EFI memory map!\n");
- if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) {
- printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't "
- "match the one from EFI!\n");
- }
+ memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
#if EFI_DEBUG
print_efi_memmap();
#endif
}
+static inline void __init check_range_for_systab(efi_memory_desc_t *md)
+{
+ if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) &&
+ ((unsigned long)efi_phys.systab < md->phys_addr +
+ ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) {
+ unsigned long addr;
+
+ addr = md->virt_addr - md->phys_addr +
+ (unsigned long)efi_phys.systab;
+ efi.systab = (efi_system_table_t *)addr;
+ }
+}
+
/*
* This function will switch the EFI runtime services to virtual mode.
* Essentially, look through the EFI memmap and map every region that
@@ -457,43 +466,32 @@ void __init efi_enter_virtual_mode(void)
{
efi_memory_desc_t *md;
efi_status_t status;
- int i;
+ void *p;
efi.systab = NULL;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
- if (md->attribute & EFI_MEMORY_RUNTIME) {
- md->virt_addr =
- (unsigned long)ioremap(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT);
- if (!(unsigned long)md->virt_addr) {
- printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
- (unsigned long)md->phys_addr);
- }
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
- if (((unsigned long)md->phys_addr <=
- (unsigned long)efi_phys.systab) &&
- ((unsigned long)efi_phys.systab <
- md->phys_addr +
- ((unsigned long)md->num_pages <<
- EFI_PAGE_SHIFT))) {
- unsigned long addr;
-
- addr = md->virt_addr - md->phys_addr +
- (unsigned long)efi_phys.systab;
- efi.systab = (efi_system_table_t *)addr;
- }
+ md->virt_addr = (unsigned long)ioremap(md->phys_addr,
+ md->num_pages << EFI_PAGE_SHIFT);
+ if (!(unsigned long)md->virt_addr) {
+ printk(KERN_ERR PFX "ioremap of 0x%lX failed\n",
+ (unsigned long)md->phys_addr);
}
+ /* update the virtual address of the EFI system table */
+ check_range_for_systab(md);
}
if (!efi.systab)
BUG();
status = phys_efi_set_virtual_address_map(
- sizeof(efi_memory_desc_t) * memmap.nr_map,
- sizeof(efi_memory_desc_t),
+ memmap.desc_size * memmap.nr_map,
+ memmap.desc_size,
memmap.desc_version,
memmap.phys_map);
@@ -533,10 +531,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
{
struct resource *res;
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >
0x100000000ULL)
@@ -613,10 +611,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
u32 efi_mem_type(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr <= phys_addr) && (phys_addr <
(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
return md->type;
@@ -627,10 +625,10 @@ u32 efi_mem_type(unsigned long phys_addr)
u64 efi_mem_attributes(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- int i;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if ((md->phys_addr <= phys_addr) && (phys_addr <
(md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) ))
return md->attribute;
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index a991d4e5edd2..3aad03839660 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -203,7 +203,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -226,9 +226,9 @@ ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
- # system call tracing in operation
+ # system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
- testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
+ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@@ -338,6 +338,9 @@ syscall_trace_entry:
movl %esp, %eax
xorl %edx,%edx
call do_syscall_trace
+ cmpl $0, %eax
+ jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
+ # so must skip actual syscall
movl ORIG_EAX(%esp), %eax
cmpl $(nr_syscalls), %eax
jnae syscall_call
@@ -504,7 +507,7 @@ label: \
pushl $__KERNEL_CS; \
pushl $sysenter_past_esp
-ENTRY(debug)
+KPROBE_ENTRY(debug)
cmpl $sysenter_entry,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
@@ -515,7 +518,7 @@ debug_stack_correct:
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
-
+ .previous .text
/*
* NMI is doubly nasty. It can happen _while_ we're handling
* a debug fault, and the debug fault hasn't yet been able to
@@ -588,13 +591,14 @@ nmi_16bit_stack:
.long 1b,iret_exc
.previous
-ENTRY(int3)
+KPROBE_ENTRY(int3)
pushl $-1 # mark this as an int
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
+ .previous .text
ENTRY(overflow)
pushl $0
@@ -628,17 +632,19 @@ ENTRY(stack_segment)
pushl $do_stack_segment
jmp error_code
-ENTRY(general_protection)
+KPROBE_ENTRY(general_protection)
pushl $do_general_protection
jmp error_code
+ .previous .text
ENTRY(alignment_check)
pushl $do_alignment_check
jmp error_code
-ENTRY(page_fault)
+KPROBE_ENTRY(page_fault)
pushl $do_page_fault
jmp error_code
+ .previous .text
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 4477bb107098..0480ca9e9e57 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -77,6 +77,32 @@ ENTRY(startup_32)
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
+/*
+ * Copy bootup parameters out of the way.
+ * Note: %esi still has the pointer to the real-mode data.
+ * With the kexec as boot loader, parameter segment might be loaded beyond
+ * kernel image and might not even be addressable by early boot page tables.
+ * (kexec on panic case). Hence copy out the parameters before initializing
+ * page tables.
+ */
+ movl $(boot_params - __PAGE_OFFSET),%edi
+ movl $(PARAM_SIZE/4),%ecx
+ cld
+ rep
+ movsl
+ movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+ andl %esi,%esi
+ jnz 2f # New command line protocol
+ cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
+ jne 1f
+ movzwl OLD_CL_OFFSET,%esi
+ addl $(OLD_CL_BASE_ADDR),%esi
+2:
+ movl $(saved_command_line - __PAGE_OFFSET),%edi
+ movl $(COMMAND_LINE_SIZE/4),%ecx
+ rep
+ movsl
+1:
/*
* Initialize page tables. This creates a PDE and a set of page
@@ -214,28 +240,6 @@ ENTRY(startup_32_smp)
*/
call setup_idt
-/*
- * Copy bootup parameters out of the way.
- * Note: %esi still has the pointer to the real-mode data.
- */
- movl $boot_params,%edi
- movl $(PARAM_SIZE/4),%ecx
- cld
- rep
- movsl
- movl boot_params+NEW_CL_POINTER,%esi
- andl %esi,%esi
- jnz 2f # New command line protocol
- cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
- jne 1f
- movzwl OLD_CL_OFFSET,%esi
- addl $(OLD_CL_BASE_ADDR),%esi
-2:
- movl $saved_command_line,%edi
- movl $(COMMAND_LINE_SIZE/4),%ecx
- rep
- movsl
-1:
checkCPUtype:
movl $-1,X86_CPUID # -1 for no CPUID initially
diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c
new file mode 100644
index 000000000000..c36d1c006c2f
--- /dev/null
+++ b/arch/i386/kernel/i8237.c
@@ -0,0 +1,67 @@
+/*
+ * i8237.c: 8237A DMA controller suspend functions.
+ *
+ * Written by Pierre Ossman, 2005.
+ */
+
+#include <linux/init.h>
+#include <linux/sysdev.h>
+
+#include <asm/dma.h>
+
+/*
+ * This module just handles suspend/resume issues with the
+ * 8237A DMA controller (used for ISA and LPC).
+ * Allocation is handled in kernel/dma.c and normal usage is
+ * in asm/dma.h.
+ */
+
+static int i8237A_resume(struct sys_device *dev)
+{
+ unsigned long flags;
+ int i;
+
+ flags = claim_dma_lock();
+
+ dma_outb(DMA1_RESET_REG, 0);
+ dma_outb(DMA2_RESET_REG, 0);
+
+ for (i = 0;i < 8;i++) {
+ set_dma_addr(i, 0x000000);
+ /* DMA count is a bit weird so this is not 0 */
+ set_dma_count(i, 1);
+ }
+
+ /* Enable cascade DMA or channel 0-3 won't work */
+ enable_dma(4);
+
+ release_dma_lock(flags);
+
+ return 0;
+}
+
+static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
+{
+ return 0;
+}
+
+static struct sysdev_class i8237_sysdev_class = {
+ set_kset_name("i8237"),
+ .suspend = i8237A_suspend,
+ .resume = i8237A_resume,
+};
+
+static struct sys_device device_i8237A = {
+ .id = 0,
+ .cls = &i8237_sysdev_class,
+};
+
+static int __init i8237A_init_sysfs(void)
+{
+ int error = sysdev_class_register(&i8237_sysdev_class);
+ if (!error)
+ error = sysdev_register(&device_i8237A);
+ return error;
+}
+
+device_initcall(i8237A_init_sysfs);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index ebedd2e21670..889eda2d7b17 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -33,6 +33,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/sysdev.h>
+
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/desc.h>
@@ -77,7 +78,7 @@ static struct irq_pin_list {
int apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
-int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
#ifdef CONFIG_PCI_MSI
#define vector_to_irq(vector) \
(platform_legacy_irq(vector) ? vector : vector_irq[vector])
@@ -222,13 +223,21 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
+#ifdef CONFIG_SMP
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
{
unsigned long flags;
int pin;
struct irq_pin_list *entry = irq_2_pin + irq;
unsigned int apicid_value;
+ cpumask_t tmp;
+ cpus_and(tmp, cpumask, cpu_online_map);
+ if (cpus_empty(tmp))
+ tmp = TARGET_CPUS;
+
+ cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
apicid_value = cpu_mask_to_apicid(cpumask);
/* Prepare to do the io_apic_write */
apicid_value = apicid_value << 24;
@@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
break;
entry = irq_2_pin + entry->next;
}
+ set_irq_info(irq, cpumask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# define Dprintk(x...)
# endif
-cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
#define IRQBALANCE_CHECK_ARCH -999
static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
@@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq)
cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
new_cpu = move(cpu, allowed_mask, now, 1);
if (cpu != new_cpu) {
- irq_desc_t *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&desc->lock, flags);
- pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
- spin_unlock_irqrestore(&desc->lock, flags);
+ set_pending_irq(irq, cpumask_of_cpu(new_cpu));
}
}
@@ -528,16 +532,12 @@ tryanotherirq:
cpus_and(tmp, target_cpu_mask, allowed_mask);
if (!cpus_empty(tmp)) {
- irq_desc_t *desc = irq_desc + selected_irq;
- unsigned long flags;
Dprintk("irq = %d moved to cpu = %d\n",
selected_irq, min_loaded);
/* mark for change destination */
- spin_lock_irqsave(&desc->lock, flags);
- pending_irq_balance_cpumask[selected_irq] =
- cpumask_of_cpu(min_loaded);
- spin_unlock_irqrestore(&desc->lock, flags);
+ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
+
/* Since we made a change, come back sooner to
* check for more variation.
*/
@@ -568,7 +568,8 @@ static int balanced_irq(void *unused)
/* push everything to CPU 0 to give us a starting point. */
for (i = 0 ; i < NR_IRQS ; i++) {
- pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
+ pending_irq_cpumask[i] = cpumask_of_cpu(0);
+ set_pending_irq(i, cpumask_of_cpu(0));
}
for ( ; ; ) {
@@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str)
__setup("noirqbalance", irqbalance_disable);
-static inline void move_irq(int irq)
-{
- /* note - we hold the desc->lock */
- if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
- set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
- cpus_clear(pending_irq_balance_cpumask[irq]);
- }
-}
-
late_initcall(balanced_irq_init);
-
-#else /* !CONFIG_IRQBALANCE */
-static inline void move_irq(int irq) { }
#endif /* CONFIG_IRQBALANCE */
+#endif /* CONFIG_SMP */
#ifndef CONFIG_SMP
void fastcall send_IPI_self(int vector)
@@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
* we need to reprogram the ioredtbls to cater for the cpus which have come online
* so mask in all cases should simply be TARGET_CPUS
*/
+#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
@@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void)
}
}
+#endif
/*
* EISA Edge/Level control register, ELCR
@@ -1127,7 +1119,7 @@ static inline int IO_APIC_irq_trigger(int irq)
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
@@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void)
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
}
@@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
{
int irq = vector_to_irq(vector);
+ move_irq(vector);
ack_edge_ioapic_irq(irq);
}
@@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector)
{
int irq = vector_to_irq(vector);
+ move_irq(vector);
end_level_ioapic_irq(irq);
}
@@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
unmask_IO_APIC_irq(irq);
}
+#ifdef CONFIG_SMP
static void set_ioapic_affinity_vector (unsigned int vector,
cpumask_t cpu_mask)
{
int irq = vector_to_irq(vector);
+ set_native_irq_info(vector, cpu_mask);
set_ioapic_affinity_irq(irq, cpu_mask);
}
#endif
+#endif
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -1992,7 +1990,7 @@ static void set_ioapic_affinity_vector (unsigned int vector,
* edge-triggered handler, without risking IRQ storms and other ugly
* races.
*/
-static struct hw_interrupt_type ioapic_edge_type = {
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
.typename = "IO-APIC-edge",
.startup = startup_edge_ioapic,
.shutdown = shutdown_edge_ioapic,
@@ -2000,10 +1998,12 @@ static struct hw_interrupt_type ioapic_edge_type = {
.disable = disable_edge_ioapic,
.ack = ack_edge_ioapic,
.end = end_edge_ioapic,
+#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity,
+#endif
};
-static struct hw_interrupt_type ioapic_level_type = {
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
.typename = "IO-APIC-level",
.startup = startup_level_ioapic,
.shutdown = shutdown_level_ioapic,
@@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = {
.disable = disable_level_ioapic,
.ack = mask_and_ack_level_ioapic,
.end = end_level_ioapic,
+#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity,
+#endif
};
static inline void init_IO_APIC_traps(void)
@@ -2074,7 +2076,7 @@ static void ack_lapic_irq (unsigned int irq)
static void end_lapic_irq (unsigned int i) { /* nothing */ }
-static struct hw_interrupt_type lapic_irq_type = {
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
.typename = "local-APIC-edge",
.startup = NULL, /* startup_irq() not used for IRQ0 */
.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
@@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c1..f2b37654777f 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
volatile struct pt_regs * regs = (struct pt_regs *) &unused;
unsigned int level = regs->ebx;
unsigned int old = (regs->eflags >> 12) & 3;
+ struct thread_struct *t = &current->thread;
if (level > 3)
return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
}
- regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
- /* Make sure we return the long way (not sysenter) */
- set_thread_flag(TIF_IRET);
+ t->iopl = level << 12;
+ regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+ set_iopl_mask(t->iopl);
return 0;
}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index a6d8c45961d3..6345b430b105 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -62,32 +62,32 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode)
return 0;
}
-int arch_prepare_kprobe(struct kprobe *p)
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
return 0;
}
-void arch_copy_kprobe(struct kprobe *p)
+void __kprobes arch_copy_kprobe(struct kprobe *p)
{
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
}
-void arch_arm_kprobe(struct kprobe *p)
+void __kprobes arch_arm_kprobe(struct kprobe *p)
{
*p->addr = BREAKPOINT_INSTRUCTION;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
-void arch_disarm_kprobe(struct kprobe *p)
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
*p->addr = p->opcode;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
-void arch_remove_kprobe(struct kprobe *p)
+void __kprobes arch_remove_kprobe(struct kprobe *p)
{
}
@@ -127,7 +127,8 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->eip = (unsigned long)&p->ainsn.insn;
}
-void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+ struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)&regs->esp;
struct kretprobe_instance *ri;
@@ -150,7 +151,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled thorough out this function.
*/
-static int kprobe_handler(struct pt_regs *regs)
+static int __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
@@ -176,7 +177,8 @@ static int kprobe_handler(struct pt_regs *regs)
Disarm the probe we just hit, and ignore it. */
p = get_kprobe(addr);
if (p) {
- if (kprobe_status == KPROBE_HIT_SS) {
+ if (kprobe_status == KPROBE_HIT_SS &&
+ *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
regs->eflags &= ~TF_MASK;
regs->eflags |= kprobe_saved_eflags;
unlock_kprobes();
@@ -220,7 +222,10 @@ static int kprobe_handler(struct pt_regs *regs)
* either a probepoint or a debugger breakpoint
* at this address. In either case, no further
* handling of this interrupt is appropriate.
+ * Back up over the (now missing) int3 and run
+ * the original instruction.
*/
+ regs->eip -= sizeof(kprobe_opcode_t);
ret = 1;
}
/* Not one of ours: let kernel handle it */
@@ -259,7 +264,7 @@ no_kprobe:
/*
* Called when we hit the probe point at kretprobe_trampoline
*/
-int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
@@ -338,7 +343,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*/
-static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
unsigned long *tos = (unsigned long *)&regs->esp;
unsigned long next_eip = 0;
@@ -444,8 +449,8 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
/*
* Wrapper routine to for handling exceptions.
*/
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
- void *data)
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
switch (val) {
@@ -473,7 +478,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
return NOTIFY_DONE;
}
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr;
@@ -495,7 +500,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
-void jprobe_return(void)
+void __kprobes jprobe_return(void)
{
preempt_enable_no_resched();
asm volatile (" xchgl %%ebx,%%esp \n"
@@ -506,7 +511,7 @@ void jprobe_return(void)
(jprobe_saved_esp):"memory");
}
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
u8 *addr = (u8 *) (regs->eip - 1);
unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index bb50afbee921..fe1ffa55587d 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -177,7 +177,7 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct * mm = current->mm;
- __u32 entry_1, entry_2, *lp;
+ __u32 entry_1, entry_2;
int error;
struct user_desc ldt_info;
@@ -205,8 +205,6 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
goto out_unlock;
}
- lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
-
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
@@ -223,8 +221,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
/* Install the new entry ... */
install:
- *lp = entry_1;
- *(lp+1) = entry_2;
+ write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2);
error = 0;
out_unlock:
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index cb699a2aa1f8..a912fed48482 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -17,13 +17,7 @@
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
-
-static inline unsigned long read_cr3(void)
-{
- unsigned long cr3;
- asm volatile("movl %%cr3,%0": "=r"(cr3));
- return cr3;
-}
+#include <asm/system.h>
#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
@@ -99,10 +93,7 @@ static void set_idt(void *newidt, __u16 limit)
curidt.size = limit;
curidt.address = (unsigned long)newidt;
- __asm__ __volatile__ (
- "lidtl %0\n"
- : : "m" (curidt)
- );
+ load_idt(&curidt);
};
@@ -114,10 +105,7 @@ static void set_gdt(void *newgdt, __u16 limit)
curgdt.size = limit;
curgdt.address = (unsigned long)newgdt;
- __asm__ __volatile__ (
- "lgdtl %0\n"
- : : "m" (curgdt)
- );
+ load_gdt(&curgdt);
};
static void load_segments(void)
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index a77c612aad00..165f13158c60 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -164,7 +164,8 @@ static void collect_cpu_info (void *unused)
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+ /* see notes above for revision 1.07. Apparent chip bug */
+ serialize_cpu();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
@@ -377,7 +378,9 @@ static void do_update_one (void * unused)
(unsigned long) uci->mc->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
- __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
+ /* see notes above for revision 1.07. Apparent chip bug */
+ serialize_cpu();
+
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 97dbf289dbd5..cafaeffe3818 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -65,6 +65,8 @@ int nr_ioapics;
int pic_mode;
unsigned long mp_lapic_addr;
+unsigned int def_to_bigsmp = 0;
+
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
@@ -120,7 +122,7 @@ static int MP_valid_apicid(int apicid, int version)
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, apicid;
+ int ver, apicid, cpu, found_bsp = 0;
physid_mask_t tmp;
if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -179,6 +181,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
Dprintk(" Bootup CPU\n");
boot_cpu_physical_apicid = m->mpc_apicid;
+ found_bsp = 1;
}
if (num_processors >= NR_CPUS) {
@@ -202,6 +205,11 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
return;
}
+ if (found_bsp)
+ cpu = 0;
+ else
+ cpu = num_processors - 1;
+ cpu_set(cpu, cpu_possible_map);
tmp = apicid_to_cpu_present(apicid);
physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp);
@@ -213,6 +221,13 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
+ if ((num_processors > 8) &&
+ APIC_XAPIC(ver) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+ def_to_bigsmp = 1;
+ else
+ def_to_bigsmp = 0;
+
bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
}
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index b2f03c39a6fe..03100d6fc5d6 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -46,23 +46,13 @@
static struct class *msr_class;
-/* Note: "err" is handled in a funny way below. Otherwise one version
- of gcc or another breaks. */
-
static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
{
int err;
- asm volatile ("1: wrmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err)
- :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0));
-
+ err = wrmsr_safe(reg, eax, edx);
+ if (err)
+ err = -EIO;
return err;
}
@@ -70,18 +60,9 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
{
int err;
- asm volatile ("1: rdmsr\n"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl %4,%0\n"
- " jmp 2b\n"
- ".previous\n"
- ".section __ex_table,\"a\"\n"
- " .align 4\n"
- " .long 1b,3b\n"
- ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx)
- :"c"(reg), "i"(-EIO), "0"(0));
-
+ err = rdmsr_safe(reg, eax, edx);
+ if (err)
+ err = -EIO;
return err;
}
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 8c242bb1ef45..0178457db721 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -478,6 +478,11 @@ void touch_nmi_watchdog (void)
*/
for (i = 0; i < NR_CPUS; i++)
alert_counter[i] = 0;
+
+ /*
+ * Tickle the softlockup detector too:
+ */
+ touch_softlockup_watchdog();
}
extern void die_nmi(struct pt_regs *, const char *msg);
@@ -501,8 +506,11 @@ void nmi_watchdog_tick (struct pt_regs * regs)
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*nmi_hz)
+ /*
+ * die_nmi will return ONLY if NOTIFY_STOP happens..
+ */
die_nmi(regs, "NMI Watchdog detected LOCKUP");
- } else {
+
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index e3f362e8af5b..b45cbf93d439 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -164,7 +164,7 @@ static inline void play_dead(void)
*/
local_irq_disable();
while (1)
- __asm__ __volatile__("hlt":::"memory");
+ halt();
}
#else
static inline void play_dead(void)
@@ -313,16 +313,12 @@ void show_regs(struct pt_regs * regs)
printk(" DS: %04x ES: %04x\n",
0xffff & regs->xds,0xffff & regs->xes);
- __asm__("movl %%cr0, %0": "=r" (cr0));
- __asm__("movl %%cr2, %0": "=r" (cr2));
- __asm__("movl %%cr3, %0": "=r" (cr3));
- /* This could fault if %cr4 does not exist */
- __asm__("1: movl %%cr4, %0 \n"
- "2: \n"
- ".section __ex_table,\"a\" \n"
- ".long 1b,2b \n"
- ".previous \n"
- : "=r" (cr4): "0" (0));
+ cr0 = read_cr0();
+ cr2 = read_cr2();
+ cr3 = read_cr3();
+ if (current_cpu_data.x86 > 4) {
+ cr4 = read_cr4();
+ }
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
show_trace(NULL, &regs->esp);
}
@@ -682,21 +678,26 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
__unlazy_fpu(prev_p);
/*
- * Reload esp0, LDT and the page table pointer:
+ * Reload esp0.
*/
load_esp0(tss, next);
/*
- * Load the per-thread Thread-Local Storage descriptor.
+ * Save away %fs and %gs. No need to save %es and %ds, as
+ * those are always kernel segments while inside the kernel.
+ * Doing this before setting the new TLS descriptors avoids
+ * the situation where we temporarily have non-reloadable
+ * segments in %fs and %gs. This could be an issue if the
+ * NMI handler ever used %fs or %gs (it does not today), or
+ * if the kernel is running inside of a hypervisor layer.
*/
- load_TLS(next, cpu);
+ savesegment(fs, prev->fs);
+ savesegment(gs, prev->gs);
/*
- * Save away %fs and %gs. No need to save %es and %ds, as
- * those are always kernel segments while inside the kernel.
+ * Load the per-thread Thread-Local Storage descriptor.
*/
- asm volatile("mov %%fs,%0":"=m" (prev->fs));
- asm volatile("mov %%gs,%0":"=m" (prev->gs));
+ load_TLS(next, cpu);
/*
* Restore %fs and %gs if needed.
@@ -711,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
loadsegment(gs, next->gs);
/*
+ * Restore IOPL if needed.
+ */
+ if (unlikely(prev->iopl != next->iopl))
+ set_iopl_mask(next->iopl);
+
+ /*
* Now maybe reload the debug registers
*/
if (unlikely(next->debugreg[7])) {
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 0da59b42843c..340980203b09 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -271,6 +271,8 @@ static void clear_singlestep(struct task_struct *child)
void ptrace_disable(struct task_struct *child)
{
clear_singlestep(child);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
/*
@@ -509,15 +511,20 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
}
break;
+ case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */
case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
case PTRACE_CONT: /* restart after signal. */
ret = -EIO;
if (!valid_signal(data))
break;
- if (request == PTRACE_SYSCALL) {
+ if (request == PTRACE_SYSEMU) {
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ } else if (request == PTRACE_SYSCALL) {
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- }
- else {
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ } else {
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
child->exit_code = data;
@@ -542,10 +549,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
wake_up_process(child);
break;
+ case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */
case PTRACE_SINGLESTEP: /* set the trap flag. */
ret = -EIO;
if (!valid_signal(data))
break;
+
+ if (request == PTRACE_SYSEMU_SINGLESTEP)
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
set_singlestep(child);
child->exit_code = data;
@@ -678,26 +692,52 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
* - triggered by current->work.syscall_trace
*/
__attribute__((regparm(3)))
-void do_syscall_trace(struct pt_regs *regs, int entryexit)
+int do_syscall_trace(struct pt_regs *regs, int entryexit)
{
+ int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU), ret = 0;
+ /* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall
+ * interception. */
+ int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP);
+
/* do the secure computing check first */
secure_computing(regs->orig_eax);
- if (unlikely(current->audit_context) && entryexit)
- audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ if (unlikely(current->audit_context)) {
+ if (entryexit)
+ audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
+ * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
+ * not used, entry.S will call us only on syscall exit, not
+ * entry; so when TIF_SYSCALL_AUDIT is used we must avoid
+ * calling send_sigtrap() on syscall entry.
+ *
+ * Note that when PTRACE_SYSEMU_SINGLESTEP is used,
+ * is_singlestep is false, despite his name, so we will still do
+ * the correct thing.
+ */
+ else if (is_singlestep)
+ goto out;
+ }
if (!(current->ptrace & PT_PTRACED))
goto out;
+ /* If a process stops on the 1st tracepoint with SYSCALL_TRACE
+ * and then is resumed with SYSEMU_SINGLESTEP, it will come in
+ * here. We have to check this and return */
+ if (is_sysemu && entryexit)
+ return 0;
+
/* Fake a debug trap */
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (is_singlestep)
send_sigtrap(current, regs, 0);
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
+ if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu)
goto out;
/* the 0x80 provides a way for the tracing parent to distinguish
between a syscall stop and SIGTRAP delivery */
+ /* Note that the debugger could change the result of test_thread_flag!*/
ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
/*
@@ -709,9 +749,16 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit)
send_sig(current->exit_code, current, 1);
current->exit_code = 0;
}
+ ret = is_sysemu;
out:
if (unlikely(current->audit_context) && !entryexit)
audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
regs->ebx, regs->ecx, regs->edx, regs->esi);
+ if (ret == 0)
+ return 0;
+ regs->orig_eax = -1; /* force skip of syscall restarting */
+ if (unlikely(current->audit_context))
+ audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax);
+ return 1;
}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index c71fef31dc47..1cbb9c0f4704 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -13,6 +13,7 @@
#include <linux/dmi.h>
#include <asm/uaccess.h>
#include <asm/apic.h>
+#include <asm/desc.h>
#include "mach_reboot.h"
#include <linux/reboot_fixups.h>
@@ -242,13 +243,13 @@ void machine_real_restart(unsigned char *code, int length)
/* Set up the IDT for real mode. */
- __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
+ load_idt(&real_mode_idt);
/* Set up a GDT from which we can load segment descriptors for real
mode. The GDT is not used in real mode; it is just needed here to
prepare the descriptors. */
- __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
+ load_gdt(&real_mode_gdt);
/* Load the data segment registers, and thus the descriptors ready for
real mode. The base address of each segment is 0x100, 16 times the
@@ -316,7 +317,7 @@ void machine_emergency_restart(void)
if (!reboot_thru_bios) {
if (efi_enabled) {
efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL);
- __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ load_idt(&no_idt);
__asm__ __volatile__("int3");
}
/* rebooting needs to touch the page at absolute addr 0 */
@@ -325,7 +326,7 @@ void machine_emergency_restart(void)
mach_reboot_fixups(); /* for board specific fixups */
mach_reboot();
/* That didn't work - force a triple fault.. */
- __asm__ __volatile__("lidt %0": :"m" (no_idt));
+ load_idt(&no_idt);
__asm__ __volatile__("int3");
}
}
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
index 469f496e55c0..7455ab643943 100644
--- a/arch/i386/kernel/semaphore.c
+++ b/arch/i386/kernel/semaphore.c
@@ -13,171 +13,9 @@
* rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
*/
#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
#include <asm/semaphore.h>
/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- * - only on a boundary condition do we need to care. When we go
- * from a negative count to a non-negative, we wake people up.
- * - when we go from a non-negative count to a negative do we
- * (a) synchronize with the "sleeper" count and (b) make sure
- * that we're on the wakeup list before we synchronize so that
- * we cannot lose wakeup events.
- */
-
-static fastcall void __attribute_used__ __up(struct semaphore *sem)
-{
- wake_up(&sem->wait);
-}
-
-static fastcall void __attribute_used__ __sched __down(struct semaphore * sem)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_UNINTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * the wait_queue_head.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_UNINTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- tsk->state = TASK_RUNNING;
-}
-
-static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem)
-{
- int retval = 0;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_INTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * With signals pending, this turns into
- * the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
- * it has contention. Just correct the count
- * and exit.
- */
- if (signal_pending(current)) {
- retval = -EINTR;
- sem->sleepers = 0;
- atomic_add(sleepers, &sem->count);
- break;
- }
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * wait_queue_head. The "-1" is because we're
- * still hoping to get the semaphore.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_INTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- tsk->state = TASK_RUNNING;
- return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem)
-{
- int sleepers;
- unsigned long flags;
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- sleepers = sem->sleepers + 1;
- sem->sleepers = 0;
-
- /*
- * Add "everybody else" and us into it. They aren't
- * playing, because we own the spinlock in the
- * wait_queue_head.
- */
- if (!atomic_add_negative(sleepers, &sem->count)) {
- wake_up_locked(&sem->wait);
- }
-
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- return 1;
-}
-
-
-/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
* need to convert that sequence back into the C sequence when
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index d52eda399a7a..a659d274914c 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(efi_enabled);
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
/* common cpu data for all cpus */
-struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
EXPORT_SYMBOL(boot_cpu_data);
unsigned long mmu_cr4_features;
@@ -370,12 +370,16 @@ static void __init limit_regions(unsigned long long size)
int i;
if (efi_enabled) {
- for (i = 0; i < memmap.nr_map; i++) {
- current_addr = memmap.map[i].phys_addr +
- (memmap.map[i].num_pages << 12);
- if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
+ efi_memory_desc_t *md;
+ void *p;
+
+ for (p = memmap.map, i = 0; p < memmap.map_end;
+ p += memmap.desc_size, i++) {
+ md = p;
+ current_addr = md->phys_addr + (md->num_pages << 12);
+ if (md->type == EFI_CONVENTIONAL_MEMORY) {
if (current_addr >= size) {
- memmap.map[i].num_pages -=
+ md->num_pages -=
(((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
memmap.nr_map = i + 1;
return;
@@ -1581,8 +1585,14 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_table_init();
acpi_boot_init();
-#endif
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
+ if (def_to_bigsmp)
+ printk(KERN_WARNING "More than 8 CPUs detected and "
+ "CONFIG_X86_PC cannot handle it.\nUse "
+ "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
+#endif
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
if (smp_found_config)
get_smp_config();
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 140e340569c6..61eb0c8a6e47 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -278,9 +278,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
int tmp, err = 0;
tmp = 0;
- __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(gs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
- __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+ savesegment(fs, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
@@ -604,7 +604,9 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset)
* We want the common case to go fast, which
* is why we may in certain cases get here from
* kernel mode. Just return without doing anything
- * if so.
+ * if so. vm86 regs switched out by assembly code
+ * before reaching here, so testing against kernel
+ * CS suffices.
*/
if (!user_mode(regs))
return 1;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index cec4bde67161..48b55db3680f 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -576,7 +576,7 @@ static void stop_this_cpu (void * dummy)
local_irq_disable();
disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) __asm__("hlt");
+ for(;;) halt();
for (;;);
}
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8ac8e9fd5614..5e4893d2b9f2 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -88,6 +88,8 @@ EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
@@ -1017,8 +1019,8 @@ int __devinit smp_prepare_cpu(int cpu)
tsc_sync_disabled = 1;
/* init low mem mapping */
- memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ KERNEL_PGD_PTRS);
flush_tlb_all();
schedule_work(&task);
wait_for_completion(&done);
@@ -1265,6 +1267,7 @@ void __devinit smp_prepare_boot_cpu(void)
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_callout_map);
cpu_set(smp_processor_id(), cpu_present_map);
+ cpu_set(smp_processor_id(), cpu_possible_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 0ee9dee8af06..eefea7c55008 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv)
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
+ ntp_clear();
write_sequnlock_irq(&xtime_lock);
clock_was_set();
return 0;
@@ -252,8 +249,7 @@ EXPORT_SYMBOL(profile_pc);
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
*/
-static inline void do_timer_interrupt(int irq, void *dev_id,
- struct pt_regs *regs)
+static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
{
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
@@ -307,7 +303,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
cur_timer->mark_offset();
- do_timer_interrupt(irq, NULL, regs);
+ do_timer_interrupt(irq, regs);
write_sequnlock(&xtime_lock);
return IRQ_HANDLED;
@@ -348,7 +344,7 @@ static void sync_cmos_clock(unsigned long dummy)
* This code is run on a timer. If the clock is set, that timer
* may not expire at the correct time. Thus, we adjust...
*/
- if ((time_status & STA_UNSYNC) != 0)
+ if (!ntp_synced())
/*
* Not synced, exit, do not restart a timer (if one is
* running, let it run out).
@@ -383,6 +379,7 @@ void notify_arch_cmos_timer(void)
static long clock_cmos_diff, sleep_start;
+static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -391,6 +388,10 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
+ last_timer = cur_timer;
+ cur_timer = &timer_none;
+ if (last_timer->suspend)
+ last_timer->suspend(state);
return 0;
}
@@ -404,6 +405,7 @@ static int timer_resume(struct sys_device *dev)
if (is_hpet_enabled())
hpet_reenable();
#endif
+ setup_pit_timer();
sec = get_cmos_time() + clock_cmos_diff;
sleep_length = (get_cmos_time() - sleep_start) * HZ;
write_seqlock_irqsave(&xtime_lock, flags);
@@ -412,6 +414,11 @@ static int timer_resume(struct sys_device *dev)
write_sequnlock_irqrestore(&xtime_lock, flags);
jiffies += sleep_length;
wall_jiffies += sleep_length;
+ if (last_timer->resume)
+ last_timer->resume();
+ cur_timer = last_timer;
+ last_timer = NULL;
+ touch_softlockup_watchdog();
return 0;
}
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index ef8dac5dd33b..d973a8b681fd 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -18,8 +18,8 @@
#include "mach_timer.h"
#include <asm/hpet.h>
-static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */
+static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
+static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
static unsigned long hpet_last; /* hpet counter value at last tick*/
static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
@@ -136,6 +136,8 @@ static void delay_hpet(unsigned long loops)
} while ((hpet_end - hpet_start) < (loops));
}
+static struct timer_opts timer_hpet;
+
static int __init init_hpet(char* override)
{
unsigned long result, remain;
@@ -163,6 +165,8 @@ static int __init init_hpet(char* override)
}
set_cyc2ns_scale(cpu_khz/1000);
}
+ /* set this only when cpu_has_tsc */
+ timer_hpet.read_timer = read_timer_tsc;
}
/*
@@ -177,6 +181,19 @@ static int __init init_hpet(char* override)
return 0;
}
+static int hpet_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ rdtsc(last_tsc_low, last_tsc_high);
+
+ if (hpet_use_timer)
+ hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ else
+ hpet_last = hpet_readl(HPET_COUNTER);
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
/************************************************************/
/* tsc timer_opts struct */
@@ -186,7 +203,7 @@ static struct timer_opts timer_hpet __read_mostly = {
.get_offset = get_offset_hpet,
.monotonic_clock = monotonic_clock_hpet,
.delay = delay_hpet,
- .read_timer = read_timer_tsc,
+ .resume = hpet_resume,
};
struct init_timer_opts __initdata timer_hpet_init = {
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 06de036a820c..eddb64038234 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -175,30 +175,3 @@ void setup_pit_timer(void)
outb(LATCH >> 8 , PIT_CH0); /* MSB */
spin_unlock_irqrestore(&i8253_lock, flags);
}
-
-static int timer_resume(struct sys_device *dev)
-{
- setup_pit_timer();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- set_kset_name("timer_pit"),
- .resume = timer_resume,
-};
-
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int __init init_timer_sysfs(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(init_timer_sysfs);
-
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
index 4ef20e663498..264edaaac315 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ b/arch/i386/kernel/timers/timer_pm.c
@@ -186,6 +186,14 @@ static void mark_offset_pmtmr(void)
}
}
+static int pmtmr_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ offset_tick = read_pmtmr();
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
static unsigned long long monotonic_clock_pmtmr(void)
{
@@ -247,6 +255,7 @@ static struct timer_opts timer_pmtmr = {
.monotonic_clock = monotonic_clock_pmtmr,
.delay = delay_pmtmr,
.read_timer = read_timer_tsc,
+ .resume = pmtmr_resume,
};
struct init_timer_opts __initdata timer_pmtmr_init = {
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 8f4e4d5bc560..6dd470cc9f72 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -543,6 +543,19 @@ static int __init init_tsc(char* override)
return -ENODEV;
}
+static int tsc_resume(void)
+{
+ write_seqlock(&monotonic_lock);
+ /* Assume this is the last mark offset time */
+ rdtsc(last_tsc_low, last_tsc_high);
+#ifdef CONFIG_HPET_TIMER
+ if (is_hpet_enabled() && hpet_use_timer)
+ hpet_last = hpet_readl(HPET_COUNTER);
+#endif
+ write_sequnlock(&monotonic_lock);
+ return 0;
+}
+
#ifndef CONFIG_X86_TSC
/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
* in cpu/common.c */
@@ -573,6 +586,7 @@ static struct timer_opts timer_tsc = {
.monotonic_clock = monotonic_clock_tsc,
.delay = delay_tsc,
.read_timer = read_timer_tsc,
+ .resume = tsc_resume,
};
struct init_timer_opts __initdata timer_tsc_init = {
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index cd2d5d5514fe..09a58cb6daa7 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -210,7 +210,7 @@ void show_registers(struct pt_regs *regs)
unsigned short ss;
esp = (unsigned long) (&regs->esp);
- ss = __KERNEL_DS;
+ savesegment(ss, ss);
if (user_mode(regs)) {
in_kernel = 0;
esp = regs->esp;
@@ -267,9 +267,6 @@ static void handle_BUG(struct pt_regs *regs)
char c;
unsigned long eip;
- if (user_mode(regs))
- goto no_bug; /* Not in kernel */
-
eip = regs->eip;
if (eip < PAGE_OFFSET)
@@ -366,8 +363,9 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e
die(str, regs, err);
}
-static void do_trap(int trapnr, int signr, char *str, int vm86,
- struct pt_regs * regs, long error_code, siginfo_t *info)
+static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
+ struct pt_regs * regs, long error_code,
+ siginfo_t *info)
{
struct task_struct *tsk = current;
tsk->thread.error_code = error_code;
@@ -463,7 +461,8 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
-fastcall void do_general_protection(struct pt_regs * regs, long error_code)
+fastcall void __kprobes do_general_protection(struct pt_regs * regs,
+ long error_code)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
@@ -568,6 +567,10 @@ static DEFINE_SPINLOCK(nmi_print_lock);
void die_nmi (struct pt_regs *regs, const char *msg)
{
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) ==
+ NOTIFY_STOP)
+ return;
+
spin_lock(&nmi_print_lock);
/*
* We are in trouble anyway, lets at least try
@@ -656,7 +659,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
++nmi_count(cpu);
- if (!nmi_callback(regs, cpu))
+ if (!rcu_dereference(nmi_callback)(regs, cpu))
default_do_nmi(regs);
nmi_exit();
@@ -664,7 +667,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
void set_nmi_callback(nmi_callback_t callback)
{
- nmi_callback = callback;
+ rcu_assign_pointer(nmi_callback, callback);
}
EXPORT_SYMBOL_GPL(set_nmi_callback);
@@ -675,7 +678,7 @@ void unset_nmi_callback(void)
EXPORT_SYMBOL_GPL(unset_nmi_callback);
#ifdef CONFIG_KPROBES
-fastcall void do_int3(struct pt_regs *regs, long error_code)
+fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
@@ -709,7 +712,7 @@ fastcall void do_int3(struct pt_regs *regs, long error_code)
* find every occurrence of the TF bit that could be saved away even
* by user code)
*/
-fastcall void do_debug(struct pt_regs * regs, long error_code)
+fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code)
{
unsigned int condition;
struct task_struct *tsk = current;
@@ -1008,7 +1011,7 @@ void __init trap_init_f00f_bug(void)
* it uses the read-only mapped virtual address.
*/
idt_descr.address = fix_to_virt(FIX_F00F_IDT);
- __asm__ __volatile__("lidt %0" : : "m" (idt_descr));
+ load_idt(&idt_descr);
}
#endif
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index ec0f68ce6886..16b485009622 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -294,8 +294,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
*/
info->regs32->eax = 0;
tsk->thread.saved_esp0 = tsk->thread.esp0;
- asm volatile("mov %%fs,%0":"=m" (tsk->thread.saved_fs));
- asm volatile("mov %%gs,%0":"=m" (tsk->thread.saved_gs));
+ savesegment(fs, tsk->thread.saved_fs);
+ savesegment(gs, tsk->thread.saved_gs);
tss = &per_cpu(init_tss, get_cpu());
tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -542,7 +542,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
unsigned char opcode;
unsigned char __user *csp;
unsigned char __user *ssp;
- unsigned short ip, sp;
+ unsigned short ip, sp, orig_flags;
int data32, pref_done;
#define CHECK_IF_IN_TRAP \
@@ -551,8 +551,12 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
#define VM86_FAULT_RETURN do { \
if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \
return_to_32bit(regs, VM86_PICRETURN); \
+ if (orig_flags & TF_MASK) \
+ handle_vm86_trap(regs, 0, 1); \
return; } while (0)
+ orig_flags = *(unsigned short *)&regs->eflags;
+
csp = (unsigned char __user *) (regs->cs << 4);
ssp = (unsigned char __user *) (regs->ss << 4);
sp = SP(regs);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 761972f8cb6c..13b9c62cbbb4 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -22,6 +22,7 @@ SECTIONS
*(.text)
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
} = 0x9090
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
index c8fcf75b9be3..68afa50dd7cf 100644
--- a/arch/i386/kernel/vsyscall-sigreturn.S
+++ b/arch/i386/kernel/vsyscall-sigreturn.S
@@ -15,7 +15,7 @@
*/
.text
- .org __kernel_vsyscall+32
+ .org __kernel_vsyscall+32,0x90
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
__kernel_sigreturn:
@@ -35,6 +35,7 @@ __kernel_rt_sigreturn:
int $0x80
.LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+ .balign 32
.previous
.section .eh_frame,"a",@progbits