summaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig43
-rw-r--r--arch/x86_64/Makefile8
-rw-r--r--arch/x86_64/boot/setup.S5
-rw-r--r--arch/x86_64/defconfig27
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c4
-rw-r--r--arch/x86_64/ia32/ia32_signal.c5
-rw-r--r--arch/x86_64/ia32/ptrace32.c2
-rw-r--r--arch/x86_64/ia32/syscall32.c2
-rw-r--r--arch/x86_64/kernel/apic.c104
-rw-r--r--arch/x86_64/kernel/crash.c69
-rw-r--r--arch/x86_64/kernel/e820.c4
-rw-r--r--arch/x86_64/kernel/early-quirks.c26
-rw-r--r--arch/x86_64/kernel/early_printk.c2
-rw-r--r--arch/x86_64/kernel/entry.S36
-rw-r--r--arch/x86_64/kernel/genapic.c9
-rw-r--r--arch/x86_64/kernel/head64.c6
-rw-r--r--arch/x86_64/kernel/i387.c7
-rw-r--r--arch/x86_64/kernel/i8259.c3
-rw-r--r--arch/x86_64/kernel/io_apic.c365
-rw-r--r--arch/x86_64/kernel/irq.c2
-rw-r--r--arch/x86_64/kernel/kprobes.c2
-rw-r--r--arch/x86_64/kernel/mce.c9
-rw-r--r--arch/x86_64/kernel/mce_amd.c4
-rw-r--r--arch/x86_64/kernel/mpparse.c2
-rw-r--r--arch/x86_64/kernel/nmi.c29
-rw-r--r--arch/x86_64/kernel/pci-calgary.c218
-rw-r--r--arch/x86_64/kernel/pci-dma.c5
-rw-r--r--arch/x86_64/kernel/pci-gart.c3
-rw-r--r--arch/x86_64/kernel/process.c52
-rw-r--r--arch/x86_64/kernel/setup.c24
-rw-r--r--arch/x86_64/kernel/smp.c8
-rw-r--r--arch/x86_64/kernel/smpboot.c20
-rw-r--r--arch/x86_64/kernel/time.c15
-rw-r--r--arch/x86_64/kernel/traps.c59
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S10
-rw-r--r--arch/x86_64/kernel/vsyscall.c47
-rw-r--r--arch/x86_64/lib/csum-partial.c11
-rw-r--r--arch/x86_64/lib/csum-wrappers.c37
-rw-r--r--arch/x86_64/lib/delay.c4
-rw-r--r--arch/x86_64/mm/fault.c10
-rw-r--r--arch/x86_64/mm/init.c31
-rw-r--r--arch/x86_64/mm/pageattr.c58
-rw-r--r--arch/x86_64/pci/mmconfig.c32
43 files changed, 856 insertions, 563 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 010d2265f1cf..bfbb9bcae123 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -122,7 +122,7 @@ endchoice
choice
prompt "Processor family"
- default MK8
+ default GENERIC_CPU
config MK8
bool "AMD-Opteron/Athlon64"
@@ -130,16 +130,31 @@ config MK8
Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
config MPSC
- bool "Intel EM64T"
+ bool "Intel P4 / older Netburst based Xeon"
help
- Optimize for Intel Pentium 4 and Xeon CPUs with Intel
- Extended Memory 64 Technology(EM64T). For details see
+ Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
+ with Intel Extended Memory 64 Technology(EM64T). For details see
<http://www.intel.com/technology/64bitextensions/>.
+ Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the
+ Netburst core and shouldn't use this option. You can distingush them
+ using the cpu family field
+ in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one
+ (this rule only applies to system that support EM64T)
+
+config MCORE2
+ bool "Intel Core2 / newer Xeon"
+ help
+ Optimize for Intel Core2 and newer Xeons (51xx)
+ You can distingush the newer Xeons from the older ones using
+ the cpu family field in /proc/cpuinfo. 15 is a older Xeon
+ (use CONFIG_MPSC then), 6 is a newer one. This rule only
+ applies to CPUs that support EM64T.
config GENERIC_CPU
bool "Generic-x86-64"
help
Generic x86-64 CPU.
+ Run equally well on all x86-64 CPUs.
endchoice
@@ -149,12 +164,12 @@ endchoice
config X86_L1_CACHE_BYTES
int
default "128" if GENERIC_CPU || MPSC
- default "64" if MK8
+ default "64" if MK8 || MCORE2
config X86_L1_CACHE_SHIFT
int
default "7" if GENERIC_CPU || MPSC
- default "6" if MK8
+ default "6" if MK8 || MCORE2
config X86_INTERNODE_CACHE_BYTES
int
@@ -344,11 +359,6 @@ config ARCH_DISCONTIGMEM_ENABLE
depends on NUMA
default y
-
-config ARCH_DISCONTIGMEM_ENABLE
- def_bool y
- depends on NUMA
-
config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
depends on NUMA
@@ -455,6 +465,17 @@ config CALGARY_IOMMU
Normally the kernel will make the right choice by itself.
If unsure, say Y.
+config CALGARY_IOMMU_ENABLED_BY_DEFAULT
+ bool "Should Calgary be enabled by default?"
+ default y
+ depends on CALGARY_IOMMU
+ help
+ Should Calgary be enabled by default? if you choose 'y', Calgary
+ will be used (if it exists). If you choose 'n', Calgary will not be
+ used even if it exists. If you choose 'n' and would like to use
+ Calgary anyway, pass 'iommu=calgary' on the kernel command line.
+ If unsure, say Y.
+
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 13972148058d..b471b8550d03 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -30,6 +30,10 @@ cflags-y :=
cflags-kernel-y :=
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
+# will eventually. Use -mtune=generic as fallback
+cflags-$(CONFIG_MCORE2) += \
+ $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
cflags-y += -m64
@@ -66,8 +70,8 @@ AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
-cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector )
-cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector-all )
+cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
+cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
CFLAGS += $(cflags-y)
CFLAGS_KERNEL += $(cflags-kernel-y)
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index c3bfd223ab49..770940cc0108 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -836,13 +836,12 @@ gdt:
.word 0x9200 # data read/write
.word 0x00CF # granularity = 4096, 386
# (+5th nibble of limit)
+gdt_end:
idt_48:
.word 0 # idt limit = 0
.word 0, 0 # idt base = 0L
gdt_48:
- .word 0x8000 # gdt limit=2048,
- # 256 GDT entries
-
+ .word gdt_end-gdt-1 # gdt limit
.word 0, 0 # gdt base (filled in later)
# Include video setup & detection code
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 0f5d44e86be5..96f226cfb339 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.19-rc2-git4
-# Sat Oct 21 03:38:52 2006
+# Linux kernel version: 2.6.19-git7
+# Wed Dec 6 23:50:47 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -47,13 +47,14 @@ CONFIG_POSIX_MQUEUE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_CPUSETS is not set
+CONFIG_SYSFS_DEPRECATED=y
# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SYSCTL=y
# CONFIG_EMBEDDED is not set
CONFIG_UID16=y
-# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_SYSCTL_SYSCALL=y
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -87,9 +88,7 @@ CONFIG_STOP_MACHINE=y
# Block layer
#
CONFIG_BLOCK=y
-CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
#
# IO Schedulers
@@ -111,10 +110,11 @@ CONFIG_X86_PC=y
# CONFIG_X86_VSMP is not set
# CONFIG_MK8 is not set
# CONFIG_MPSC is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
# CONFIG_MICROCODE is not set
@@ -322,6 +322,7 @@ CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
CONFIG_TCP_CONG_CUBIC=y
CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
CONFIG_IPV6=y
# CONFIG_IPV6_PRIVACY is not set
# CONFIG_IPV6_ROUTER_PREF is not set
@@ -624,6 +625,7 @@ CONFIG_SATA_INTEL_COMBINED=y
# CONFIG_PATA_IT821X is not set
# CONFIG_PATA_JMICRON is not set
# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
# CONFIG_PATA_MPIIX is not set
# CONFIG_PATA_OLDPIIX is not set
# CONFIG_PATA_NETCELL is not set
@@ -795,6 +797,7 @@ CONFIG_BNX2=y
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
# CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
#
# Token Ring devices
@@ -927,10 +930,6 @@ CONFIG_RTC=y
# CONFIG_DTLK is not set
# CONFIG_R3964 is not set
# CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
@@ -1135,6 +1134,7 @@ CONFIG_USB_DEVICEFS=y
# CONFIG_USB_BANDWIDTH is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_MULTITHREAD_PROBE is not set
# CONFIG_USB_OTG is not set
#
@@ -1212,6 +1212,7 @@ CONFIG_USB_HIDINPUT=y
# CONFIG_USB_KAWETH is not set
# CONFIG_USB_PEGASUS is not set
# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET_MII is not set
# CONFIG_USB_USBNET is not set
CONFIG_USB_MON=y
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 82ef182de6ae..543ef4f405e9 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -305,8 +305,6 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
#undef MODULE_DESCRIPTION
#undef MODULE_AUTHOR
-#define elf_addr_t __u32
-
static void elf32_init(struct pt_regs *);
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
@@ -351,7 +349,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
bprm->loader += stack_base;
bprm->exec += stack_base;
- mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!mpnt)
return -ENOMEM;
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 0e0a266d976f..ff499ef2a1ba 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -584,6 +584,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->rdx = (unsigned long) &frame->info;
regs->rcx = (unsigned long) &frame->uc;
+ /* Make -mregparm=3 work */
+ regs->rax = sig;
+ regs->rdx = (unsigned long) &frame->info;
+ regs->rcx = (unsigned long) &frame->uc;
+
asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 3a7561d4703e..04566fe5de49 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -244,6 +244,8 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
case PTRACE_DETACH:
case PTRACE_SYSCALL:
case PTRACE_SETOPTIONS:
+ case PTRACE_SET_THREAD_AREA:
+ case PTRACE_GET_THREAD_AREA:
return sys_ptrace(request, pid, addr, data);
default:
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3a01329473ab..3e5ed20cba45 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
struct mm_struct *mm = current->mm;
int ret;
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!vma)
return -ENOMEM;
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed942b2..124b2d27b4ac 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
#include <linux/module.h>
+#include <linux/ioport.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -45,6 +46,12 @@ int apic_calibrate_pmtmr __initdata;
int disable_apic_timer __initdata;
+static struct resource *ioapic_resources;
+static struct resource lapic_resource = {
+ .name = "Local APIC",
+ .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
/*
* cpu_mask that denotes the CPUs that needs timer interrupt coming in as
* IPIs in place of local APIC timers
@@ -133,7 +140,6 @@ void clear_local_APIC(void)
apic_write(APIC_LVTERR, APIC_LVT_MASKED);
if (maxlvt >= 4)
apic_write(APIC_LVTPC, APIC_LVT_MASKED);
- v = GET_APIC_VERSION(apic_read(APIC_LVR));
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
}
@@ -452,23 +458,30 @@ static struct {
static int lapic_suspend(struct sys_device *dev, pm_message_t state)
{
unsigned long flags;
+ int maxlvt;
if (!apic_pm_state.active)
return 0;
+ maxlvt = get_maxlvt();
+
apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
- apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+ if (maxlvt >= 4)
+ apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
- apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 5)
+ apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
@@ -479,10 +492,13 @@ static int lapic_resume(struct sys_device *dev)
{
unsigned int l, h;
unsigned long flags;
+ int maxlvt;
if (!apic_pm_state.active)
return 0;
+ maxlvt = get_maxlvt();
+
local_irq_save(flags);
rdmsr(MSR_IA32_APICBASE, l, h);
l &= ~MSR_IA32_APICBASE_BASE;
@@ -496,8 +512,12 @@ static int lapic_resume(struct sys_device *dev)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
- apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
- apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+#ifdef CONFIG_X86_MCE_INTEL
+ if (maxlvt >= 5)
+ apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+ if (maxlvt >= 4)
+ apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
@@ -585,6 +605,64 @@ static int __init detect_init_APIC (void)
return 0;
}
+#ifdef CONFIG_X86_IO_APIC
+static struct resource * __init ioapic_setup_resources(void)
+{
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+ unsigned long n;
+ struct resource *res;
+ char *mem;
+ int i;
+
+ if (nr_ioapics <= 0)
+ return NULL;
+
+ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+ n *= nr_ioapics;
+
+ mem = alloc_bootmem(n);
+ res = (void *)mem;
+
+ if (mem != NULL) {
+ memset(mem, 0, n);
+ mem += sizeof(struct resource) * nr_ioapics;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ res[i].name = mem;
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ sprintf(mem, "IOAPIC %u", i);
+ mem += IOAPIC_RESOURCE_NAME_SIZE;
+ }
+ }
+
+ ioapic_resources = res;
+
+ return res;
+}
+
+static int __init ioapic_insert_resources(void)
+{
+ int i;
+ struct resource *r = ioapic_resources;
+
+ if (!r) {
+ printk("IO APIC resources could be not be allocated.\n");
+ return -1;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ insert_resource(&iomem_resource, r);
+ r++;
+ }
+
+ return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
+
void __init init_apic_mappings(void)
{
unsigned long apic_phys;
@@ -604,6 +682,11 @@ void __init init_apic_mappings(void)
apic_mapped = 1;
apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
+ /* Put local APIC into the resource map. */
+ lapic_resource.start = apic_phys;
+ lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+ insert_resource(&iomem_resource, &lapic_resource);
+
/*
* Fetch the APIC ID of the BSP in case we have a
* default configuration (or the MP table is broken).
@@ -613,7 +696,9 @@ void __init init_apic_mappings(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
int i;
+ struct resource *ioapic_res;
+ ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +710,12 @@ void __init init_apic_mappings(void)
apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
__fix_to_virt(idx), ioapic_phys);
idx++;
+
+ if (ioapic_res != NULL) {
+ ioapic_res->start = ioapic_phys;
+ ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res++;
+ }
}
}
}
@@ -644,10 +735,9 @@ void __init init_apic_mappings(void)
static void __setup_APIC_LVTT(unsigned int clocks)
{
- unsigned int lvtt_value, tmp_value, ver;
+ unsigned int lvtt_value, tmp_value;
int cpu = smp_processor_id();
- ver = GET_APIC_VERSION(apic_read(APIC_LVR));
lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 3525f884af82..95a7a2c13131 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -28,71 +28,6 @@
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type,
- void *data, size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) +3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void final_note(u32 *buf)
-{
- struct elf_note note;
-
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
-{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= NR_CPUS))
- return;
-
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that, no need to invent something new.
- */
-
- buf = (u32*)per_cpu_ptr(crash_notes, cpu);
-
- if (!buf)
- return;
-
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
- sizeof(prstatus));
- final_note(buf);
-}
-
-static void crash_save_self(struct pt_regs *regs)
-{
- int cpu;
-
- cpu = smp_processor_id();
- crash_save_this_cpu(regs, cpu);
-}
-
#ifdef CONFIG_SMP
static atomic_t waiting_for_crash_ipi;
@@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self,
return NOTIFY_STOP;
local_irq_disable();
- crash_save_this_cpu(regs, cpu);
+ crash_save_cpu(regs, cpu);
disable_local_APIC();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs)
disable_IO_APIC();
- crash_save_self(regs);
+ crash_save_cpu(regs, smp_processor_id());
}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a75c829c2b02..6fe191c58084 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -278,7 +278,7 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
>> PAGE_SHIFT;
/* Skip map entries smaller than a page */
- if (ei_startpfn > ei_endpfn)
+ if (ei_startpfn >= ei_endpfn)
continue;
/* Check if end_pfn_map should be updated */
@@ -594,7 +594,9 @@ static int __init parse_memmap_opt(char *p)
* size before original memory map is
* reset.
*/
+ e820_register_active_regions(0, 0, -1UL);
saved_max_pfn = e820_end_of_ram();
+ remove_all_active_ranges();
#endif
end_pfn_map = 0;
e820.nr_map = 0;
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 2b1245d86258..829698f6d049 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -45,7 +45,13 @@ static void nvidia_bugs(void)
/*
* All timer overrides on Nvidia are
* wrong unless HPET is enabled.
+ * Unfortunately that's not true on many Asus boards.
+ * We don't know yet how to detect this automatically, but
+ * at least allow a command line override.
*/
+ if (acpi_use_timer_override)
+ return;
+
nvidia_hpet_detected = 0;
acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
if (nvidia_hpet_detected == 0) {
@@ -53,6 +59,8 @@ static void nvidia_bugs(void)
printk(KERN_INFO "Nvidia board "
"detected. Ignoring ACPI "
"timer override.\n");
+ printk(KERN_INFO "If you got timer trouble "
+ "try acpi_use_timer_override\n");
}
#endif
/* RED-PEN skip them on mptables too? */
@@ -61,11 +69,18 @@ static void nvidia_bugs(void)
static void ati_bugs(void)
{
- if (timer_over_8254 == 1) {
- timer_over_8254 = 0;
- printk(KERN_INFO
- "ATI board detected. Disabling timer routing over 8254.\n");
- }
+}
+
+static void intel_bugs(void)
+{
+ u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
+
+#ifdef CONFIG_SMP
+ if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
+ device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
+ device == PCI_DEVICE_ID_INTEL_E7525_MCH)
+ quirk_intel_irqbalance();
+#endif
}
struct chipset {
@@ -77,6 +92,7 @@ static struct chipset early_qrk[] = {
{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
{ PCI_VENDOR_ID_VIA, via_bugs },
{ PCI_VENDOR_ID_ATI, ati_bugs },
+ { PCI_VENDOR_ID_INTEL, intel_bugs},
{}
};
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
index e22ecd54870d..47b6d90349da 100644
--- a/arch/x86_64/kernel/early_printk.c
+++ b/arch/x86_64/kernel/early_printk.c
@@ -224,7 +224,7 @@ static int __init setup_early_printk(char *buf)
return 0;
early_console_initialized = 1;
- if (!strcmp(buf,"keep"))
+ if (strstr(buf, "keep"))
keep_early = 1;
if (!strncmp(buf, "serial", 6)) {
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7d401b00d822..601d332c4b79 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@ ENTRY(system_call)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
@@ -241,7 +240,6 @@ ENTRY(system_call)
* Syscall return path ending with SYSRET (fast path)
* Has incomplete stack frame and undefined top of stack.
*/
- .globl ret_from_sys_call
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
@@ -251,8 +249,8 @@ sysret_check:
TRACE_IRQS_OFF
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
- CFI_REMEMBER_STATE
jnz sysret_careful
+ CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
*/
@@ -265,10 +263,10 @@ sysret_check:
swapgs
sysretq
+ CFI_RESTORE_STATE
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
- CFI_RESTORE_STATE
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
@@ -306,7 +304,6 @@ badsys:
/* Do syscall tracing */
tracesys:
- CFI_RESTORE_STATE
SAVE_REST
movq $-ENOSYS,RAX(%rsp)
FIXUP_TOP_OF_STACK %rdi
@@ -322,32 +319,13 @@ tracesys:
call *sys_call_table(,%rax,8)
1: movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(system_call)
/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
- */
-ENTRY(int_ret_from_sys_call)
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
- CFI_REL_OFFSET r8,R8-ARGOFFSET
- CFI_REL_OFFSET r9,R9-ARGOFFSET
- CFI_REL_OFFSET r10,R10-ARGOFFSET
- CFI_REL_OFFSET r11,R11-ARGOFFSET
+ */
+ .globl int_ret_from_sys_call
+int_ret_from_sys_call:
cli
TRACE_IRQS_OFF
testl $3,CS-ARGOFFSET(%rsp)
@@ -394,8 +372,6 @@ int_very_careful:
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
- cli
- TRACE_IRQS_OFF
jmp int_restore_rest
int_signal:
@@ -411,7 +387,7 @@ int_restore_rest:
TRACE_IRQS_OFF
jmp int_with_check
CFI_ENDPROC
-END(int_ret_from_sys_call)
+END(system_call)
/*
* Certain special system calls that need to save a complete full stack frame.
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75d1866..b007433f96bb 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@ extern struct genapic apic_flat;
extern struct genapic apic_physflat;
struct genapic *genapic = &apic_flat;
-
+struct genapic *genapic_force;
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@ void __init clustered_apic_check(void)
u8 cluster_cnt[NUM_APIC_CLUSTERS];
int max_apic = 0;
+ /* genapic selection can be forced because of certain quirks.
+ */
+ if (genapic_force) {
+ genapic = genapic_force;
+ goto print;
+ }
+
#if defined(CONFIG_ACPI)
/*
* Some x86_64 machines use physical APIC mode regardless of how many
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3c5b5c..cc230b93cd1c 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@ void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
- for (i = 0; i < 256; i++)
+ /* clear bss before set_intr_gate with early_idt_handler */
+ clear_bss();
+
+ for (i = 0; i < IDT_ENTRIES; i++)
set_intr_gate(i, early_idt_handler);
asm volatile("lidt %0" :: "m" (idt_descr));
- clear_bss();
early_printk("Kernel alive\n");
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9bb781d..1d58c13bc6bc 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@ int save_i387(struct _fpstate __user *buf)
struct task_struct *tsk = current;
int err = 0;
- {
- extern void bad_user_i387_struct(void);
- if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
- bad_user_i387_struct();
- }
+ BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+ sizeof(tsk->thread.i387.fxsave));
if ((unsigned long)buf % 16)
printk("save_i387: bad fpstate %p\n",buf);
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c4ef801b765b..d73c79e821f1 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-void (*interrupt[NR_IRQS])(void) = {
+/* for the irq vectors */
+static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index fe429e5d6b29..2a1dcd5f69c2 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,10 +55,6 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
static int no_timer_check;
-static int disable_timer_pin_1 __initdata;
-
-int timer_over_8254 __initdata = 1;
-
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
@@ -88,6 +84,52 @@ static struct irq_pin_list {
short apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
+struct io_apic {
+ unsigned int index;
+ unsigned int unused[3];
+ unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(reg, &io_apic->index);
+ writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ writel(value, &io_apic->data);
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
+ readl(&io_apic->data);
+}
+
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
@@ -126,11 +168,39 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
return eu.entry;
}
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- unsigned long flags;
union entry_union eu;
eu.entry = e;
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __ioapic_write_entry(apic, pin, e);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+ unsigned long flags;
+ union entry_union eu = { .entry.mask = 1 };
+
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
@@ -256,9 +326,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
/*
* Disable it in the IO-APIC irq-routing table:
*/
- memset(&entry, 0, sizeof(entry));
- entry.mask = 1;
- ioapic_write_entry(apic, pin, entry);
+ ioapic_mask_entry(apic, pin);
}
static void clear_IO_APIC (void)
@@ -282,29 +350,6 @@ static int __init disable_ioapic_setup(char *str)
}
early_param("noapic", disable_ioapic_setup);
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
- disable_timer_pin_1 = 1;
- return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
-static int __init setup_disable_8254_timer(char *s)
-{
- timer_over_8254 = -1;
- return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
- timer_over_8254 = 2;
- return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
-
/*
* Find the IRQ entry number of a certain pin.
*/
@@ -684,14 +729,28 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
return vector;
}
+static void __clear_irq_vector(int irq)
+{
+ cpumask_t mask;
+ int cpu, vector;
+
+ BUG_ON(!irq_vector[irq]);
+
+ vector = irq_vector[irq];
+ cpus_and(mask, irq_domain[irq], cpu_online_map);
+ for_each_cpu_mask(cpu, mask)
+ per_cpu(vector_irq, cpu)[vector] = -1;
+
+ irq_vector[irq] = 0;
+ irq_domain[irq] = CPU_MASK_NONE;
+}
+
void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
/* This function must be called with vector_lock held */
- unsigned long flags;
int irq, vector;
-
/* Mark the inuse vectors */
for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
if (!cpu_isset(cpu, irq_domain[irq]))
@@ -724,31 +783,71 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
trigger == IOAPIC_LEVEL)
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_fasteoi_irq, "fasteoi");
- else
+ else {
+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
set_irq_chip_and_handler_name(irq, &ioapic_chip,
handle_edge_irq, "edge");
+ }
}
-
-static void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
{
struct IO_APIC_route_entry entry;
- int apic, pin, idx, irq, first_notcon = 1, vector;
+ int vector;
unsigned long flags;
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ /*
+ * add it to the IO-APIC irq-routing table:
+ */
+ memset(&entry,0,sizeof(entry));
- /*
- * add it to the IO-APIC irq-routing table:
- */
- memset(&entry,0,sizeof(entry));
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+ entry.mask = 0; /* enable IRQ */
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+ entry.trigger = irq_trigger(idx);
+ entry.polarity = irq_polarity(idx);
- entry.delivery_mode = INT_DELIVERY_MODE;
- entry.dest_mode = INT_DEST_MODE;
- entry.mask = 0; /* enable IRQ */
+ if (irq_trigger(idx)) {
+ entry.trigger = 1;
+ entry.mask = 1;
entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ }
+
+ if (!apic && !IO_APIC_IRQ(irq))
+ return;
+
+ if (IO_APIC_IRQ(irq)) {
+ cpumask_t mask;
+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
+ if (vector < 0)
+ return;
+
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+ entry.vector = vector;
+
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
+
+ ioapic_write_entry(apic, pin, entry);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+ int apic, pin, idx, irq, first_notcon = 1;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
@@ -760,39 +859,11 @@ static void __init setup_IO_APIC_irqs(void)
continue;
}
- entry.trigger = irq_trigger(idx);
- entry.polarity = irq_polarity(idx);
-
- if (irq_trigger(idx)) {
- entry.trigger = 1;
- entry.mask = 1;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
- }
-
irq = pin_2_irq(idx, apic, pin);
add_pin_to_irq(irq, apic, pin);
- if (!apic && !IO_APIC_IRQ(irq))
- continue;
-
- if (IO_APIC_IRQ(irq)) {
- cpumask_t mask;
- vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
- if (vector < 0)
- continue;
-
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
- entry.vector = vector;
-
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
- if (!apic && (irq < 16))
- disable_8259A_irq(irq);
- }
- ioapic_write_entry(apic, pin, entry);
+ setup_IO_APIC_irq(apic, pin, idx, irq);
- spin_lock_irqsave(&ioapic_lock, flags);
- set_native_irq_info(irq, TARGET_CPUS);
- spin_unlock_irqrestore(&ioapic_lock, flags);
}
}
@@ -1497,10 +1568,33 @@ static inline void unlock_ExtINT_logic(void)
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
* fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for modern platforms only.
*/
-static inline void check_timer(void)
+
+static int try_apic_pin(int apic, int pin, char *msg)
+{
+ apic_printk(APIC_VERBOSE, KERN_INFO
+ "..TIMER: trying IO-APIC=%d PIN=%d %s",
+ apic, pin, msg);
+
+ /*
+ * Ok, does IRQ0 through the IOAPIC work?
+ */
+ if (!no_timer_check && timer_irq_works()) {
+ nmi_watchdog_default();
+ if (nmi_watchdog == NMI_IO_APIC) {
+ disable_8259A_irq(0);
+ setup_nmi();
+ enable_8259A_irq(0);
+ }
+ return 1;
+ }
+ clear_IO_APIC_pin(apic, pin);
+ apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
+ return 0;
+}
+
+/* The function from hell */
+static void check_timer(void)
{
int apic1, pin1, apic2, pin2;
int vector;
@@ -1521,61 +1615,43 @@ static inline void check_timer(void)
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
- if (timer_over_8254 > 0)
- enable_8259A_irq(0);
pin1 = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
- vector, apic1, pin1, apic2, pin2);
+ /* Do this first, otherwise we get double interrupts on ATI boards */
+ if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
+ return;
- if (pin1 != -1) {
- /*
- * Ok, does IRQ0 through the IOAPIC work?
- */
- unmask_IO_APIC_irq(0);
- if (!no_timer_check && timer_irq_works()) {
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
- setup_nmi();
- enable_8259A_irq(0);
- }
- if (disable_timer_pin_1 > 0)
- clear_IO_APIC_pin(0, pin1);
- return;
- }
- clear_IO_APIC_pin(apic1, pin1);
- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
- "connected to IO-APIC\n");
- }
+ /* Now try again with IRQ0 8259A enabled.
+ Assumes timer is on IO-APIC 0 ?!? */
+ enable_8259A_irq(0);
+ unmask_IO_APIC_irq(0);
+ if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
+ return;
+ disable_8259A_irq(0);
+
+ /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
+ on Nvidia boards */
+ if (!(apic1 == 0 && pin1 == 0) &&
+ try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
+ return;
+ if (!(apic1 == 0 && pin1 == 2) &&
+ try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
+ return;
- apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
- "through the 8259A ... ");
+ /* Then try pure 8259A routing on the 8259 as reported by BIOS*/
+ enable_8259A_irq(0);
if (pin2 != -1) {
- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
- apic2, pin2);
- /*
- * legacy devices should be connected to IO APIC #0
- */
setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
- if (timer_irq_works()) {
- apic_printk(APIC_VERBOSE," works.\n");
- nmi_watchdog_default();
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- }
+ if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
return;
- }
- /*
- * Cleanup, just in case ...
- */
- clear_IO_APIC_pin(apic2, pin2);
}
- apic_printk(APIC_VERBOSE," failed.\n");
+
+ /* Tried all possibilities to go through the IO-APIC. Now come the
+ really cheesy fallbacks. */
if (nmi_watchdog == NMI_IO_APIC) {
printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
@@ -1771,7 +1847,7 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq);
spin_lock_irqsave(&vector_lock, flags);
- irq_vector[irq] = 0;
+ __clear_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
}
@@ -1889,18 +1965,16 @@ void arch_teardown_msi_irq(unsigned int irq)
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
{
- u32 low, high;
- low = read_ht_irq_low(irq);
- high = read_ht_irq_high(irq);
+ struct ht_irq_msg msg;
+ fetch_ht_irq_msg(irq, &msg);
- low &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
- high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
- low |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
- high |= HT_IRQ_HIGH_DEST_ID(dest);
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
- write_ht_irq_low(irq, low);
- write_ht_irq_high(irq, high);
+ write_ht_irq_msg(irq, &msg);
}
static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -1921,7 +1995,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
dest = cpu_mask_to_apicid(tmp);
- target_ht_irq(irq, dest, vector & 0xff);
+ target_ht_irq(irq, dest, vector);
set_native_irq_info(irq, mask);
}
#endif
@@ -1944,14 +2018,15 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
if (vector >= 0) {
- u32 low, high;
+ struct ht_irq_msg msg;
unsigned dest;
dest = cpu_mask_to_apicid(tmp);
- high = HT_IRQ_HIGH_DEST_ID(dest);
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
- low = HT_IRQ_LOW_BASE |
+ msg.address_lo =
+ HT_IRQ_LOW_BASE |
HT_IRQ_LOW_DEST_ID(dest) |
HT_IRQ_LOW_VECTOR(vector) |
((INT_DEST_MODE == 0) ?
@@ -1960,10 +2035,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
HT_IRQ_LOW_RQEOI_EDGE |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED);
+ HT_IRQ_LOW_MT_ARBITRATED) |
+ HT_IRQ_LOW_IRQ_MASKED;
- write_ht_irq_low(irq, low);
- write_ht_irq_high(irq, high);
+ write_ht_irq_msg(irq, &msg);
set_irq_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
@@ -2074,7 +2149,15 @@ void __init setup_ioapic_dest(void)
if (irq_entry == -1)
continue;
irq = pin_2_irq(irq_entry, ioapic, pin);
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
+
+ /* setup_IO_APIC_irqs could fail to get vector for some device
+ * when you have too many devices, because at that time only boot
+ * cpu is online.
+ */
+ if(!irq_vector[irq])
+ setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
+ else
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
}
}
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index e46c55856d40..0c06af6c13bc 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
if (likely(irq < NR_IRQS))
generic_handle_irq(irq);
- else
+ else if (printk_ratelimit())
printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(), vector);
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index ac241567e682..209c8c0bec71 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn);
+ free_insn_slot(p->ainsn.insn, 0);
mutex_unlock(&kprobe_mutex);
}
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..ac085038af29 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
*/
static int check_interval = 5 * 60; /* 5 minutes */
-static void mcheck_timer(void *data);
-static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
+static void mcheck_timer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
static void mcheck_check_cpu(void *info)
{
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
do_machine_check(NULL, 0);
}
-static void mcheck_timer(void *data)
+static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
schedule_delayed_work(&mcheck_work, check_interval * HZ);
@@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return err;
}
-#ifdef CONFIG_HOTPLUG_CPU
static void mce_remove_device(unsigned int cpu)
{
int i;
@@ -652,6 +651,7 @@ static void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
sysdev_unregister(&per_cpu(device_mce,cpu));
+ memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -674,7 +674,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
static struct notifier_block mce_cpu_notifier = {
.notifier_call = mce_cpu_callback,
};
-#endif
static __init int mce_init_device(void)
{
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index 883fe747f64c..fa09debad4b7 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -551,7 +551,6 @@ out:
return err;
}
-#ifdef CONFIG_HOTPLUG_CPU
/*
* let's be hotplug friendly.
* in case of multiple core processors, the first core always takes ownership
@@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
sprintf(name, "threshold_bank%i", bank);
+#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
}
+#endif
/* remove all sibling symlinks before unregistering */
for_each_cpu_mask(i, b->cpus) {
@@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
static struct notifier_block threshold_cpu_notifier = {
.notifier_call = threshold_cpu_callback,
};
-#endif /* CONFIG_HOTPLUG_CPU */
static __init int threshold_init_device(void)
{
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab19fbd4..08072568847d 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
int smp_found_config;
unsigned int __initdata maxcpus = NR_CPUS;
-int acpi_found_madt;
-
/*
* Various Linux-internal data structures created from the
* MP-table.
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..27e95e7922c1 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
* Mikael Pettersson : PM converted to driver model. Disable/enable API.
*/
+#include <linux/nmi.h>
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/sysdev.h>
-#include <linux/nmi.h>
#include <linux/sysctl.h>
#include <linux/kprobes.h>
+#include <linux/cpumask.h>
#include <asm/smp.h>
#include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
*/
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
int sum;
int touched = 0;
+ int cpu = smp_processor_id();
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
u64 dummy;
int rc=0;
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
touched = 1;
}
+ if (cpu_isset(cpu, backtrace_mask)) {
+ static DEFINE_SPINLOCK(lock); /* Serialise the printks */
+
+ spin_lock(&lock);
+ printk("NMI backtrace for cpu %d\n", cpu);
+ dump_stack();
+ spin_unlock(&lock);
+ cpu_clear(cpu, backtrace_mask);
+ }
+
#ifdef CONFIG_X86_MCE
/* Could check oops_in_progress here too, but it's safer
not too */
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
#endif
+void __trigger_all_cpu_backtrace(void)
+{
+ int i;
+
+ backtrace_mask = cpu_online_map;
+ /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+ for (i = 0; i < 10 * 1000; i++) {
+ if (cpus_empty(backtrace_mask))
+ break;
+ mdelay(1);
+ }
+}
+
EXPORT_SYMBOL(nmi_active);
EXPORT_SYMBOL(nmi_watchdog);
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 37a770859e71..3215675ab128 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,13 @@
#include <asm/pci-direct.h>
#include <asm/system.h>
#include <asm/dma.h>
+#include <asm/rio.h>
+
+#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
+int use_calgary __read_mostly = 1;
+#else
+int use_calgary __read_mostly = 0;
+#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
#define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -115,14 +122,35 @@ static const unsigned long phb_offsets[] = {
0xB000 /* PHB3 */
};
+/* PHB debug registers */
+
+static const unsigned long phb_debug_offsets[] = {
+ 0x4000 /* PHB 0 DEBUG */,
+ 0x5000 /* PHB 1 DEBUG */,
+ 0x6000 /* PHB 2 DEBUG */,
+ 0x7000 /* PHB 3 DEBUG */
+};
+
+/*
+ * STUFF register for each debug PHB,
+ * byte 1 = start bus number, byte 2 = end bus number
+ */
+
+#define PHB_DEBUG_STUFF_OFFSET 0x0020
+
unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
static int translate_empty_slots __read_mostly = 0;
static int calgary_detected __read_mostly = 0;
+static struct rio_table_hdr *rio_table_hdr __initdata;
+static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
+static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
+
struct calgary_bus_info {
void *tce_space;
unsigned char translation_disabled;
signed char phbid;
+ void __iomem *bbar;
};
static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -475,6 +503,11 @@ static struct dma_mapping_ops calgary_dma_ops = {
.unmap_sg = calgary_unmap_sg,
};
+static inline void __iomem * busno_to_bbar(unsigned char num)
+{
+ return bus_info[num].bbar;
+}
+
static inline int busno_to_phbid(unsigned char num)
{
return bus_info[num].phbid;
@@ -620,14 +653,9 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
static void __init calgary_reserve_regions(struct pci_dev *dev)
{
unsigned int npages;
- void __iomem *bbar;
- unsigned char busnum;
u64 start;
struct iommu_table *tbl = dev->sysdata;
- bbar = tbl->bbar;
- busnum = dev->bus->number;
-
/* reserve bad_dma_address in case it's a legal address */
iommu_range_reserve(tbl, bad_dma_address, 1);
@@ -740,7 +768,7 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
{
u64 val64;
void __iomem *target;
- unsigned long phb_shift = -1;
+ unsigned int phb_shift = ~0; /* silence gcc */
u64 mask;
switch (busno_to_phbid(busnum)) {
@@ -828,33 +856,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
del_timer_sync(&tbl->watchdog_timer);
}
-static inline unsigned int __init locate_register_space(struct pci_dev *dev)
-{
- int rionodeid;
- u32 address;
-
- /*
- * Each Calgary has four busses. The first four busses (first Calgary)
- * have RIO node ID 2, then the next four (second Calgary) have RIO
- * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
- * We use a gross hack - relying on the dev->bus->number ordering,
- * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
- * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
- * second (id 3), and then it repeats modulo 14.
- */
- rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
- /*
- * register space address calculation as follows:
- * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
- * ChassisBase is always zero for x366/x260/x460
- * RioNodeId is 2 for first Calgary, 3 for second Calgary
- */
- address = START_ADDRESS -
- (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
- (0x100000) * (rionodeid - CHASSIS_BASE);
- return address;
-}
-
static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
pci_dev_get(dev);
@@ -864,23 +865,15 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
static int __init calgary_init_one(struct pci_dev *dev)
{
- u32 address;
void __iomem *bbar;
int ret;
BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
- address = locate_register_space(dev);
- /* map entire 1MB of Calgary config space */
- bbar = ioremap_nocache(address, 1024 * 1024);
- if (!bbar) {
- ret = -ENODATA;
- goto done;
- }
-
+ bbar = busno_to_bbar(dev->bus->number);
ret = calgary_setup_tar(dev, bbar);
if (ret)
- goto iounmap;
+ goto done;
pci_dev_get(dev);
dev->bus->self = dev;
@@ -888,17 +881,66 @@ static int __init calgary_init_one(struct pci_dev *dev)
return 0;
-iounmap:
- iounmap(bbar);
done:
return ret;
}
+static int __init calgary_locate_bbars(void)
+{
+ int ret;
+ int rioidx, phb, bus;
+ void __iomem *bbar;
+ void __iomem *target;
+ unsigned long offset;
+ u8 start_bus, end_bus;
+ u32 val;
+
+ ret = -ENODATA;
+ for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
+ struct rio_detail *rio = rio_devs[rioidx];
+
+ if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
+ continue;
+
+ /* map entire 1MB of Calgary config space */
+ bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
+ if (!bbar)
+ goto error;
+
+ for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
+ offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
+ target = calgary_reg(bbar, offset);
+
+ val = be32_to_cpu(readl(target));
+ start_bus = (u8)((val & 0x00FF0000) >> 16);
+ end_bus = (u8)((val & 0x0000FF00) >> 8);
+ for (bus = start_bus; bus <= end_bus; bus++) {
+ bus_info[bus].bbar = bbar;
+ bus_info[bus].phbid = phb;
+ }
+ }
+ }
+
+ return 0;
+
+error:
+ /* scan bus_info and iounmap any bbars we previously ioremap'd */
+ for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
+ if (bus_info[bus].bbar)
+ iounmap(bus_info[bus].bbar);
+
+ return ret;
+}
+
static int __init calgary_init(void)
{
- int ret = -ENODEV;
+ int ret;
struct pci_dev *dev = NULL;
+ ret = calgary_locate_bbars();
+ if (ret)
+ return ret;
+
do {
dev = pci_get_device(PCI_VENDOR_ID_IBM,
PCI_DEVICE_ID_IBM_CALGARY,
@@ -921,7 +963,7 @@ static int __init calgary_init(void)
error:
do {
- dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+ dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
PCI_DEVICE_ID_IBM_CALGARY,
dev);
if (!dev)
@@ -962,13 +1004,56 @@ static inline int __init determine_tce_table_size(u64 ram)
return ret;
}
+static int __init build_detail_arrays(void)
+{
+ unsigned long ptr;
+ int i, scal_detail_size, rio_detail_size;
+
+ if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+ printk(KERN_WARNING
+ "Calgary: MAX_NUMNODES too low! Defined as %d, "
+ "but system has %d nodes.\n",
+ MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ return -ENODEV;
+ }
+
+ switch (rio_table_hdr->version){
+ case 2:
+ scal_detail_size = 11;
+ rio_detail_size = 13;
+ break;
+ case 3:
+ scal_detail_size = 12;
+ rio_detail_size = 15;
+ break;
+ default:
+ printk(KERN_WARNING
+ "Calgary: Invalid Rio Grande Table Version: %d\n",
+ rio_table_hdr->version);
+ return -EPROTO;
+ }
+
+ ptr = ((unsigned long)rio_table_hdr) + 3;
+ for (i = 0; i < rio_table_hdr->num_scal_dev;
+ i++, ptr += scal_detail_size)
+ scal_devs[i] = (struct scal_detail *)ptr;
+
+ for (i = 0; i < rio_table_hdr->num_rio_dev;
+ i++, ptr += rio_detail_size)
+ rio_devs[i] = (struct rio_detail *)ptr;
+
+ return 0;
+}
+
void __init detect_calgary(void)
{
u32 val;
int bus;
void *tbl;
int calgary_found = 0;
- int phb = -1;
+ unsigned long ptr;
+ int offset;
+ int ret;
/*
* if the user specified iommu=off or iommu=soft or we found
@@ -977,25 +1062,47 @@ void __init detect_calgary(void)
if (swiotlb || no_iommu || iommu_detected)
return;
+ if (!use_calgary)
+ return;
+
if (!early_pci_allowed())
return;
+ ptr = (unsigned long)phys_to_virt(get_bios_ebda());
+
+ rio_table_hdr = NULL;
+ offset = 0x180;
+ while (offset) {
+ /* The block id is stored in the 2nd word */
+ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
+ /* set the pointer past the offset & block id */
+ rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+ break;
+ }
+ /* The next offset is stored in the 1st word. 0 means no more */
+ offset = *((unsigned short *)(ptr + offset));
+ }
+ if (!rio_table_hdr) {
+ printk(KERN_ERR "Calgary: Unable to locate "
+ "Rio Grande Table in EBDA - bailing!\n");
+ return;
+ }
+
+ ret = build_detail_arrays();
+ if (ret) {
+ printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret);
+ return;
+ }
+
specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
int dev;
struct calgary_bus_info *info = &bus_info[bus];
- info->phbid = -1;
if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
continue;
- /*
- * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3)
- * it is connected to releative to the clagary chip.
- */
- phb = (phb + 1) % PHBS_PER_CALGARY;
-
if (info->translation_disabled)
continue;
@@ -1010,7 +1117,6 @@ void __init detect_calgary(void)
if (!tbl)
goto cleanup;
info->tce_space = tbl;
- info->phbid = phb;
calgary_found = 1;
break;
}
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d857453f8a..683b7a5c1ab3 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@ __init int iommu_setup(char *p)
gart_parse_options(p);
#endif
+#ifdef CONFIG_CALGARY_IOMMU
+ if (!strncmp(p, "calgary", 7))
+ use_calgary = 1;
+#endif /* CONFIG_CALGARY_IOMMU */
+
p += strcspn(p, ",");
if (*p == ',')
++p;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8a3303..fc1960f1f243 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@ void __init gart_iommu_init(void)
(!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
!iommu_aperture ||
(no_agp && init_k8_gatt(&info) < 0)) {
- printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
if (end_pfn > MAX_DMA32_PFN) {
printk(KERN_ERR "WARNING more than 4GB of memory "
- "but IOMMU not available.\n"
+ "but GART IOMMU not available.\n"
KERN_ERR "WARNING 32bit PCI may malfunction.\n");
}
return;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 49f7fac6229e..a418ee4c8c62 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -88,9 +88,8 @@ void enter_idle(void)
static void __exit_idle(void)
{
- if (read_pda(isidle) == 0)
+ if (test_and_clear_bit_pda(0, isidle) == 0)
return;
- write_pda(isidle, 0);
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
}
@@ -109,17 +108,15 @@ void exit_idle(void)
*/
static void default_idle(void)
{
- local_irq_enable();
-
current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
- while (!need_resched()) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
- local_irq_enable();
- }
+ local_irq_disable();
+ if (!need_resched()) {
+ /* Enables interrupts one instruction before HLT.
+ x86 special cases this so there is no race. */
+ safe_halt();
+ } else
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
@@ -131,21 +128,13 @@ static void default_idle(void)
static void poll_idle (void)
{
local_irq_enable();
-
- asm volatile(
- "2:"
- "testl %0,%1;"
- "rep; nop;"
- "je 2b;"
- : :
- "i" (_TIF_NEED_RESCHED),
- "m" (current_thread_info()->flags));
+ cpu_relax();
}
void cpu_idle_wait(void)
{
unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map;
+ cpumask_t map, tmp = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
put_cpu();
@@ -168,6 +157,8 @@ void cpu_idle_wait(void)
}
cpus_and(map, map, cpu_online_map);
} while (!cpus_empty(map));
+
+ set_cpus_allowed(current, tmp);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -218,6 +209,12 @@ void cpu_idle (void)
idle = default_idle;
if (cpu_is_offline(smp_processor_id()))
play_dead();
+ /*
+ * Idle routines should keep interrupts disabled
+ * from here on, until they go to idle.
+ * Otherwise, idle callbacks can misfire.
+ */
+ local_irq_disable();
enter_idle();
idle();
/* In many cases the interrupt that ended idle
@@ -255,9 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
- local_irq_enable();
- while (!need_resched())
- mwait_idle_with_hints(0,0);
+ if (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ }
}
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index fc944b5e8f4a..af425a8049fb 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -471,8 +471,7 @@ void __init setup_arch(char **cmdline_p)
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+ initrd_start = INITRD_START + PAGE_OFFSET;
initrd_end = initrd_start+INITRD_SIZE;
}
else {
@@ -732,11 +731,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
/* Fix cpuid4 emulation for more */
num_cache_leaves = 3;
- /* When there is only one core no need to synchronize RDTSC */
- if (num_possible_cpus() == 1)
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
- else
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ /* RDTSC can be speculated around */
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
}
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -835,6 +831,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
}
+ if (cpu_has_ds) {
+ unsigned int l1, l2;
+ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+ if (!(l1 & (1<<11)))
+ set_bit(X86_FEATURE_BTS, c->x86_capability);
+ if (!(l1 & (1<<12)))
+ set_bit(X86_FEATURE_PEBS, c->x86_capability);
+ }
+
n = c->extended_cpuid_level;
if (n >= 0x80000008) {
unsigned eax = cpuid_eax(0x80000008);
@@ -854,7 +859,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
if (c->x86 == 6)
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ if (c->x86 == 15)
+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ else
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
c->x86_max_cores = intel_num_cpu_cores(c);
srat_detect_node();
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4f67697f5036..af1ec4d23cf8 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -376,16 +376,20 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
/* prevent preemption and reschedule on another processor */
int me = get_cpu();
if (cpu == me) {
- WARN_ON(1);
put_cpu();
- return -EBUSY;
+ return 0;
}
+
+ /* Can deadlock when called with interrupts disabled */
+ WARN_ON(irqs_disabled());
+
spin_lock_bh(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait);
spin_unlock_bh(&call_lock);
put_cpu();
return 0;
}
+EXPORT_SYMBOL(smp_call_function_single);
/*
* this function sends a 'generic call function' IPI to all other CPUs
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..daf19332f0dd 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/numa.h>
+#include <asm/genapic.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -753,14 +754,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
}
struct create_idle {
+ struct work_struct work;
struct task_struct *idle;
struct completion done;
int cpu;
};
-void do_fork_idle(void *_c_idle)
+void do_fork_idle(struct work_struct *work)
{
- struct create_idle *c_idle = _c_idle;
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
c_idle->idle = fork_idle(c_idle->cpu);
complete(&c_idle->done);
@@ -775,10 +778,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
int timeout;
unsigned long start_rip;
struct create_idle c_idle = {
+ .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
.cpu = cpu,
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- DECLARE_WORK(work, do_fork_idle, &c_idle);
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
@@ -825,9 +828,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
* thread.
*/
if (!keventd_up() || current_is_keventd())
- work.func(work.data);
+ c_idle.work.func(&c_idle.work);
else {
- schedule_work(&work);
+ schedule_work(&c_idle.work);
wait_for_completion(&c_idle.done);
}
@@ -1167,6 +1170,13 @@ int __cpuinit __cpu_up(unsigned int cpu)
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
+
+ if (num_online_cpus() > 8 && genapic == &apic_flat) {
+ printk(KERN_WARNING
+ "flat APIC routing can't be used with > 8 cpus\n");
+ BUG();
+ }
+
err = 0;
return err;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 88722f11ca13..9f05bc9b2dad 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
static unsigned int cpufreq_init = 0;
static struct work_struct cpufreq_delayed_get_work;
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *v)
{
unsigned int cpu;
for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
static int __init cpufreq_tsc(void)
{
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER))
cpufreq_init = 1;
@@ -876,15 +876,6 @@ static struct irqaction irq0 = {
timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
};
-static int __cpuinit
-time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
- unsigned cpu = (unsigned long) hcpu;
- if (action == CPU_ONLINE)
- vsyscall_set_cpu(cpu);
- return NOTIFY_DONE;
-}
-
void __init time_init(void)
{
if (nohpet)
@@ -925,8 +916,6 @@ void __init time_init(void)
vxtime.last_tsc = get_cycles_sync();
set_cyc2ns_scale(cpu_khz);
setup_irq(0, &irq0);
- hotcpu_notifier(time_cpu_notifier, 0);
- time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
#ifndef CONFIG_SMP
time_init_gtod();
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 7819022a8db5..a1641ffdffcf 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,9 @@
#include <linux/kprobes.h>
#include <linux/kexec.h>
#include <linux/unwind.h>
+#include <linux/uaccess.h>
#include <asm/system.h>
-#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/atomic.h>
#include <asm/debugreg.h>
@@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
preempt_enable_no_resched();
}
-static int kstack_depth_to_print = 12;
+int kstack_depth_to_print = 12;
#ifdef CONFIG_STACK_UNWIND
static int call_trace = 1;
#else
@@ -225,16 +225,25 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
{
struct ops_and_data *oad = (struct ops_and_data *)context;
int n = 0;
+ unsigned long sp = UNW_SP(info);
+ if (arch_unw_user_mode(info))
+ return -1;
while (unwind(info) == 0 && UNW_PC(info)) {
n++;
oad->ops->address(oad->data, UNW_PC(info));
if (arch_unw_user_mode(info))
break;
+ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+ && sp > UNW_SP(info))
+ break;
+ sp = UNW_SP(info);
}
return n;
}
+#define MSG(txt) ops->warning(data, txt)
+
/*
* x86-64 can have upto three kernel stacks:
* process stack
@@ -242,12 +251,20 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+ void *t = (void *)tinfo;
+ return p > t && p < t + THREAD_SIZE - 3;
+}
+
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+ unsigned long *stack,
struct stacktrace_ops *ops, void *data)
{
- const unsigned cpu = smp_processor_id();
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ const unsigned cpu = get_cpu();
+ unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
+ struct thread_info *tinfo;
if (!tsk)
tsk = current;
@@ -261,28 +278,30 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
if (unwind_init_frame_info(&info, tsk, regs) == 0)
unw_ret = dump_trace_unwind(&info, &oad);
} else if (tsk == current)
- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+ unw_ret = unwind_init_running(&info, dump_trace_unwind,
+ &oad);
else {
if (unwind_init_blocked(&info, tsk) == 0)
unw_ret = dump_trace_unwind(&info, &oad);
}
if (unw_ret > 0) {
if (call_trace == 1 && !arch_unw_user_mode(&info)) {
- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+ ops->warning_symbol(data,
+ "DWARF2 unwinder stuck at %s",
UNW_PC(&info));
if ((long)UNW_SP(&info) < 0) {
- ops->warning(data, "Leftover inexact backtrace:\n");
+ MSG("Leftover inexact backtrace:");
stack = (unsigned long *)UNW_SP(&info);
if (!stack)
- return;
+ goto out;
} else
- ops->warning(data, "Full inexact backtrace again:\n");
+ MSG("Full inexact backtrace again:");
} else if (call_trace >= 1)
- return;
+ goto out;
else
- ops->warning(data, "Full inexact backtrace again:\n");
+ MSG("Full inexact backtrace again:");
} else
- ops->warning(data, "Inexact backtrace:\n");
+ MSG("Inexact backtrace:");
}
if (!stack) {
unsigned long dummy;
@@ -299,9 +318,9 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
#define HANDLE_STACK(cond) \
do while (cond) { \
unsigned long addr = *stack++; \
- if (oops_in_progress ? \
- __kernel_text_address(addr) : \
- kernel_text_address(addr)) { \
+ /* Use unlocked access here because except for NMIs \
+ we should be already protected against module unloads */ \
+ if (__kernel_text_address(addr)) { \
/* \
* If the address is either in the text segment of the \
* kernel, or in the region which contains vmalloc'ed \
@@ -364,8 +383,11 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
/*
* This handles the process stack:
*/
- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
+ tinfo = current_thread_info();
+ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
#undef HANDLE_STACK
+out:
+ put_cpu();
}
EXPORT_SYMBOL(dump_trace);
@@ -772,8 +794,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
reason);
- printk(KERN_EMERG "You probably have a hardware problem with your "
- "RAM chips\n");
+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index edb24aa714b4..6a1f8f491e5d 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -51,15 +51,7 @@ SECTIONS
RODATA
-#ifdef CONFIG_STACK_UNWIND
- . = ALIGN(8);
- .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
- __start_unwind = .;
- *(.eh_frame)
- __end_unwind = .;
- }
-#endif
-
+ . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
*(.data)
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a98b460af6a1..4a673f5397a0 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -27,6 +27,9 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/getcpu.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
@@ -39,6 +42,7 @@
#include <asm/topology.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __syscall_clobber "r11","rcx","memory"
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
@@ -243,32 +247,17 @@ static ctl_table kernel_root_table2[] = {
#endif
-static void __cpuinit write_rdtscp_cb(void *info)
-{
- write_rdtscp_aux((unsigned long)info);
-}
-
-void __cpuinit vsyscall_set_cpu(int cpu)
+/* Assume __initcall executes before all user space. Hopefully kmod
+ doesn't violate that. We'll find out if it does. */
+static void __cpuinit vsyscall_set_cpu(int cpu)
{
unsigned long *d;
unsigned long node = 0;
#ifdef CONFIG_NUMA
node = cpu_to_node[cpu];
#endif
- if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
- void *info = (void *)((node << 12) | cpu);
- /* Can happen on preemptive kernel */
- if (get_cpu() == cpu)
- write_rdtscp_cb(info);
-#ifdef CONFIG_SMP
- else {
- /* the notifier is unfortunately not executed on the
- target CPU */
- smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
- }
-#endif
- put_cpu();
- }
+ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+ write_rdtscp_aux((node << 12) | cpu);
/* Store cpu number in limit so that it can be loaded quickly
in user space in vgetcpu.
@@ -280,11 +269,27 @@ void __cpuinit vsyscall_set_cpu(int cpu)
*d |= (node >> 4) << 48;
}
+static void __cpuinit cpu_vsyscall_init(void *arg)
+{
+ /* preemption should be already off */
+ vsyscall_set_cpu(raw_smp_processor_id());
+}
+
+static int __cpuinit
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+ long cpu = (long)arg;
+ if (action == CPU_ONLINE)
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+ return NOTIFY_DONE;
+}
+
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+ /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
}
@@ -299,6 +304,8 @@ static int __init vsyscall_init(void)
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2, 0);
#endif
+ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+ hotcpu_notifier(cpu_vsyscall_notifier, 0);
return 0;
}
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index c493735218da..bc503f506903 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -9,8 +9,6 @@
#include <linux/module.h>
#include <asm/checksum.h>
-#define __force_inline inline __attribute__((always_inline))
-
static inline unsigned short from32to16(unsigned a)
{
unsigned short b = a >> 16;
@@ -33,7 +31,7 @@ static inline unsigned short from32to16(unsigned a)
* Unrolling to an 128 bytes inner loop.
* Using interleaving with more registers to break the carry chains.
*/
-static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
+static unsigned do_csum(const unsigned char *buff, unsigned len)
{
unsigned odd, count;
unsigned long result = 0;
@@ -132,9 +130,10 @@ static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
*
* it's best to have buff aligned on a 64-bit boundary
*/
-unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum)
+__wsum csum_partial(const void *buff, int len, __wsum sum)
{
- return add32_with_carry(do_csum(buff, len), sum);
+ return (__force __wsum)add32_with_carry(do_csum(buff, len),
+ (__force u32)sum);
}
EXPORT_SYMBOL(csum_partial);
@@ -143,7 +142,7 @@ EXPORT_SYMBOL(csum_partial);
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
*/
-unsigned short ip_compute_csum(unsigned char * buff, int len)
+__sum16 ip_compute_csum(const void *buff, int len)
{
return csum_fold(csum_partial(buff,len,0));
}
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index b1320ec58428..fd42a4a095fc 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -18,9 +18,9 @@
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
-unsigned int
-csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst,
- int len, unsigned int isum, int *errp)
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum isum, int *errp)
{
might_sleep();
*errp = 0;
@@ -34,17 +34,19 @@ csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst,
if (unlikely((unsigned long)src & 6)) {
while (((unsigned long)src & 6) && len >= 2) {
__u16 val16;
- *errp = __get_user(val16, (__u16 __user *)src);
+ *errp = __get_user(val16, (const __u16 __user *)src);
if (*errp)
return isum;
*(__u16 *)dst = val16;
- isum = add32_with_carry(isum, val16);
+ isum = (__force __wsum)add32_with_carry(
+ (__force unsigned)isum, val16);
src += 2;
dst += 2;
len -= 2;
}
}
- isum = csum_partial_copy_generic((__force void *)src,dst,len,isum,errp,NULL);
+ isum = csum_partial_copy_generic((__force const void *)src,
+ dst, len, isum, errp, NULL);
if (likely(*errp == 0))
return isum;
}
@@ -66,9 +68,9 @@ EXPORT_SYMBOL(csum_partial_copy_from_user);
* Returns an 32bit unfolded checksum of the buffer.
* src and dst are best aligned to 64bits.
*/
-unsigned int
-csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst,
- int len, unsigned int isum, int *errp)
+__wsum
+csum_partial_copy_to_user(const void *src, void __user *dst,
+ int len, __wsum isum, int *errp)
{
might_sleep();
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
@@ -79,7 +81,8 @@ csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst,
if (unlikely((unsigned long)dst & 6)) {
while (((unsigned long)dst & 6) && len >= 2) {
__u16 val16 = *(__u16 *)src;
- isum = add32_with_carry(isum, val16);
+ isum = (__force __wsum)add32_with_carry(
+ (__force unsigned)isum, val16);
*errp = __put_user(val16, (__u16 __user *)dst);
if (*errp)
return isum;
@@ -104,19 +107,21 @@ EXPORT_SYMBOL(csum_partial_copy_to_user);
*
* Returns an 32bit unfolded checksum of the buffer.
*/
-unsigned int
-csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, unsigned int sum)
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
{
return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
-unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
- __u32 len, unsigned short proto, unsigned int sum)
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, unsigned short proto, __wsum sum)
{
__u64 rest, sum64;
- rest = (__u64)htonl(len) + (__u64)htons(proto) + (__u64)sum;
+ rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
+ (__force __u64)sum;
asm(" addq (%[saddr]),%[sum]\n"
" adcq 8(%[saddr]),%[sum]\n"
" adcq (%[daddr]),%[sum]\n"
@@ -124,7 +129,7 @@ unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
" adcq $0,%[sum]\n"
: [sum] "=r" (sum64)
: "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
- return csum_fold(add32_with_carry(sum64 & 0xffffffff, sum64>>32));
+ return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
}
EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 50be90975d04..2dbebd308347 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay);
inline void __const_udelay(unsigned long xloops)
{
- __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
+ __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
}
EXPORT_SYMBOL(__const_udelay);
void __udelay(unsigned long usecs)
{
- __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
EXPORT_SYMBOL(__udelay);
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 3751b4788e28..a65fc6f1dcaf 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,9 +23,9 @@
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/kprobes.h>
+#include <linux/uaccess.h>
#include <asm/system.h>
-#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/smp.h>
#include <asm/tlbflush.h>
@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
unsigned long error_code)
{
- unsigned char __user *instr;
+ unsigned char *instr;
int scan_more = 1;
int prefetch = 0;
unsigned char *max_instr;
@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
unsigned char instr_hi;
unsigned char instr_lo;
- if (__get_user(opcode, (char __user *)instr))
+ if (probe_kernel_address(instr, opcode))
break;
instr_hi = opcode & 0xf0;
@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
case 0x00:
/* Prefetch instruction is 0x0F0D or 0x0F18 */
scan_more = 0;
- if (__get_user(opcode, (char __user *)instr))
+ if (probe_kernel_address(instr, opcode))
break;
prefetch = (instr_lo == 0xF) &&
(opcode == 0x0D || opcode == 0x18);
@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
static int bad_address(void *p)
{
unsigned long dummy;
- return __get_user(dummy, (unsigned long __user *)p);
+ return probe_kernel_address((unsigned long *)p, dummy);
}
void dump_pagetable(unsigned long address)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 971dc1181e69..2968b90ef8ad 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -496,7 +496,7 @@ int remove_memory(u64 start, u64 size)
}
EXPORT_SYMBOL_GPL(remove_memory);
-#ifndef CONFIG_ACPI_NUMA
+#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
int memory_add_physaddr_to_nid(u64 start)
{
return 0;
@@ -504,13 +504,6 @@ int memory_add_physaddr_to_nid(u64 start)
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
-#ifndef CONFIG_ACPI_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
- return 0;
-}
-#endif
-
#endif /* CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
@@ -655,9 +648,22 @@ void free_initrd_mem(unsigned long start, unsigned long end)
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
{
- /* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
int nid = phys_to_nid(phys);
+#endif
+ unsigned long pfn = phys >> PAGE_SHIFT;
+ if (pfn >= end_pfn) {
+ /* This can happen with kdump kernels when accessing firmware
+ tables. */
+ if (pfn < end_pfn_map)
+ return;
+ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
+ phys, len);
+ return;
+ }
+
+ /* Should check here against the e820 map to avoid double free */
+#ifdef CONFIG_NUMA
reserve_bootmem_node(NODE_DATA(nid), phys, len);
#else
reserve_bootmem(phys, len);
@@ -724,14 +730,15 @@ static __init int x8664_sysctl_init(void)
__initcall(x8664_sysctl_init);
#endif
-/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
+/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
covers the 64bit vsyscall page now. 32bit has a real VMA now and does
not need special handling anymore. */
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_START,
- .vm_end = VSYSCALL_END,
- .vm_page_prot = PAGE_READONLY
+ .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
+ .vm_page_prot = PAGE_READONLY_EXEC,
+ .vm_flags = VM_READ | VM_EXEC
};
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 3e231d762aaa..ccb91dd996a9 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -61,34 +61,40 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
return base;
}
-
-static void flush_kernel_map(void *address)
+static void cache_flush_page(void *adr)
{
- if (0 && address && cpu_has_clflush) {
- /* is this worth it? */
- int i;
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (address + i));
- } else
- asm volatile("wbinvd":::"memory");
- if (address)
- __flush_tlb_one(address);
- else
- __flush_tlb_all();
+ int i;
+ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+ asm volatile("clflush (%0)" :: "r" (adr + i));
}
+static void flush_kernel_map(void *arg)
+{
+ struct list_head *l = (struct list_head *)arg;
+ struct page *pg;
+
+ /* When clflush is available always use it because it is
+ much cheaper than WBINVD */
+ if (!cpu_has_clflush)
+ asm volatile("wbinvd" ::: "memory");
+ list_for_each_entry(pg, l, lru) {
+ void *adr = page_address(pg);
+ if (cpu_has_clflush)
+ cache_flush_page(adr);
+ __flush_tlb_one(adr);
+ }
+}
-static inline void flush_map(unsigned long address)
+static inline void flush_map(struct list_head *l)
{
- on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
+ on_each_cpu(flush_kernel_map, l, 1, 1);
}
-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
+static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
static inline void save_page(struct page *fpage)
{
- fpage->lru.next = (struct list_head *)deferred_pages;
- deferred_pages = fpage;
+ list_add(&fpage->lru, &deferred_pages);
}
/*
@@ -207,18 +213,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
void global_flush_tlb(void)
{
- struct page *dpage;
+ struct page *pg, *next;
+ struct list_head l;
down_read(&init_mm.mmap_sem);
- dpage = xchg(&deferred_pages, NULL);
+ list_replace_init(&deferred_pages, &l);
up_read(&init_mm.mmap_sem);
- flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
- while (dpage) {
- struct page *tmp = dpage;
- dpage = (struct page *)dpage->lru.next;
- ClearPagePrivate(tmp);
- __free_page(tmp);
+ flush_map(&l);
+
+ list_for_each_entry_safe(pg, next, &l, lru) {
+ ClearPagePrivate(pg);
+ __free_page(pg);
}
}
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index e61093b34c26..f8b6b2800a62 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -163,37 +163,6 @@ static __init void unreachable_devices(void)
}
}
-static __init void pci_mmcfg_insert_resources(void)
-{
-#define PCI_MMCFG_RESOURCE_NAME_LEN 19
- int i;
- struct resource *res;
- char *names;
- unsigned num_buses;
-
- res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
- pci_mmcfg_config_num, GFP_KERNEL);
-
- if (!res) {
- printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
- return;
- }
-
- names = (void *)&res[pci_mmcfg_config_num];
- for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
- num_buses = pci_mmcfg_config[i].end_bus_number -
- pci_mmcfg_config[i].start_bus_number + 1;
- res->name = names;
- snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
- pci_mmcfg_config[i].pci_segment_group_number);
- res->start = pci_mmcfg_config[i].base_address;
- res->end = res->start + (num_buses << 20) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- insert_resource(&iomem_resource, res);
- names += PCI_MMCFG_RESOURCE_NAME_LEN;
- }
-}
-
void __init pci_mmcfg_init(int type)
{
int i;
@@ -237,7 +206,6 @@ void __init pci_mmcfg_init(int type)
}
unreachable_devices();
- pci_mmcfg_insert_resources();
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;