summaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig5
-rw-r--r--arch/x86_64/Makefile12
-rw-r--r--arch/x86_64/defconfig224
-rw-r--r--arch/x86_64/ia32/Makefile4
-rw-r--r--arch/x86_64/ia32/syscall32.c10
-rw-r--r--arch/x86_64/ia32/syscall32_syscall.S17
-rw-r--r--arch/x86_64/kernel/entry.S22
-rw-r--r--arch/x86_64/kernel/genapic.c33
-rw-r--r--arch/x86_64/kernel/genapic_flat.c142
-rw-r--r--arch/x86_64/kernel/head.S16
-rw-r--r--arch/x86_64/kernel/irq.c19
-rw-r--r--arch/x86_64/kernel/machine_kexec.c101
-rw-r--r--arch/x86_64/kernel/mce.c93
-rw-r--r--arch/x86_64/kernel/mpparse.c17
-rw-r--r--arch/x86_64/kernel/setup.c5
-rw-r--r--arch/x86_64/kernel/setup64.c18
-rw-r--r--arch/x86_64/kernel/smp.c66
-rw-r--r--arch/x86_64/kernel/smpboot.c84
-rw-r--r--arch/x86_64/kernel/traps.c3
-rw-r--r--arch/x86_64/lib/delay.c2
-rw-r--r--arch/x86_64/mm/fault.c1
-rw-r--r--arch/x86_64/mm/numa.c50
-rw-r--r--arch/x86_64/mm/srat.c22
-rw-r--r--arch/x86_64/pci/k8-bus.c2
24 files changed, 565 insertions, 403 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 4b8326177c52..660a03a89e66 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -329,12 +329,15 @@ config HPET_EMULATE_RTC
config GART_IOMMU
bool "IOMMU support"
+ default y
depends on PCI
help
- Support the K8 IOMMU. Needed to run systems with more than 4GB of memory
+ Support the IOMMU. Needed to run systems with more than 3GB of memory
properly with 32-bit PCI devices that do not support DAC (Double Address
Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter.
Normally the kernel will take the right choice by itself.
+ This option includes a driver for the AMD Opteron/Athlon64 IOMMU
+ and a software emulation used on some other systems.
If unsure, say Y.
# need this always enabled with GART_IOMMU for the VIA workaround
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 428915697675..4c6ed96d5f7c 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -21,18 +21,6 @@
#
# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $
-#
-# early bootup linking needs 32bit. You can either use real 32bit tools
-# here or 64bit tools in 32bit mode.
-#
-IA32_CC := $(CC) $(CPPFLAGS) -m32 -O2 -fomit-frame-pointer
-IA32_LD := $(LD) -m elf_i386
-IA32_AS := $(CC) $(AFLAGS) -m32 -Wa,--32 -traditional -c
-IA32_OBJCOPY := $(CROSS_COMPILE)objcopy
-IA32_CPP := $(CROSS_COMPILE)gcc -m32 -E
-export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP
-
-
LDFLAGS := -m elf_x86_64
OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LDFLAGS_vmlinux :=
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 569595b74c7c..776f3c866b70 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc4
-# Fri May 13 06:39:11 2005
+# Linux kernel version: 2.6.13-rc3
+# Fri Jul 22 16:47:31 2005
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -84,14 +84,27 @@ CONFIG_X86_IO_APIC=y
CONFIG_X86_LOCAL_APIC=y
CONFIG_MTRR=y
CONFIG_SMP=y
-# CONFIG_PREEMPT is not set
CONFIG_SCHED_SMT=y
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_PREEMPT_BKL=y
CONFIG_K8_NUMA=y
# CONFIG_NUMA_EMU is not set
-CONFIG_DISCONTIGMEM=y
+CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
CONFIG_NUMA=y
+CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+# CONFIG_FLATMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM_MANUAL=y
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_DISCONTIGMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_NEED_MULTIPLE_NODES=y
+CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_HAVE_DEC_LOCK=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=32
CONFIG_HPET_TIMER=y
CONFIG_X86_PM_TIMER=y
CONFIG_HPET_EMULATE_RTC=y
@@ -99,7 +112,13 @@ CONFIG_GART_IOMMU=y
CONFIG_SWIOTLB=y
CONFIG_X86_MCE=y
CONFIG_X86_MCE_INTEL=y
+CONFIG_PHYSICAL_START=0x100000
+# CONFIG_KEXEC is not set
CONFIG_SECCOMP=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_ISA_DMA_API=y
@@ -118,12 +137,11 @@ CONFIG_PM_STD_PARTITION=""
CONFIG_ACPI=y
CONFIG_ACPI_BOOT=y
CONFIG_ACPI_INTERPRETER=y
-CONFIG_ACPI_SLEEP=y
-CONFIG_ACPI_SLEEP_PROC_FS=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_BATTERY=y
CONFIG_ACPI_BUTTON=y
# CONFIG_ACPI_VIDEO is not set
+CONFIG_ACPI_HOTKEY=m
CONFIG_ACPI_FAN=y
CONFIG_ACPI_PROCESSOR=y
CONFIG_ACPI_THERMAL=y
@@ -154,6 +172,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
#
# CPUFreq processor drivers
@@ -204,6 +223,76 @@ CONFIG_SYSVIPC_COMPAT=y
CONFIG_UID16=y
#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_IP_TCPDIAG=y
+CONFIG_IP_TCPDIAG_IPV6=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETFILTER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_NET_CLS_ROUTE is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+
+#
# Device Drivers
#
@@ -308,6 +397,7 @@ CONFIG_BLK_DEV_AMD74XX=y
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_IT821X is not set
# CONFIG_BLK_DEV_NS87415 is not set
# CONFIG_BLK_DEV_PDC202XX_OLD is not set
CONFIG_BLK_DEV_PDC202XX_NEW=y
@@ -338,6 +428,7 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_OSST is not set
# CONFIG_BLK_DEV_SR is not set
# CONFIG_CHR_DEV_SG is not set
+# CONFIG_CHR_DEV_SCH is not set
#
# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
@@ -372,7 +463,6 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
CONFIG_SCSI_SATA=y
-# CONFIG_SCSI_SATA_AHCI is not set
# CONFIG_SCSI_SATA_SVW is not set
CONFIG_SCSI_ATA_PIIX=y
# CONFIG_SCSI_SATA_NV is not set
@@ -410,14 +500,21 @@ CONFIG_SCSI_QLA2XXX=y
#
# Multi-device support (RAID and LVM)
#
-# CONFIG_MD is not set
+CONFIG_MD=y
+# CONFIG_BLK_DEV_MD is not set
+CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_CRYPT is not set
+# CONFIG_DM_SNAPSHOT is not set
+# CONFIG_DM_MIRROR is not set
+# CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
#
# Fusion MPT device support
#
-CONFIG_FUSION=y
-CONFIG_FUSION_MAX_SGE=40
-# CONFIG_FUSION_CTL is not set
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
#
# IEEE 1394 (FireWire) support
@@ -430,75 +527,8 @@ CONFIG_FUSION_MAX_SGE=40
# CONFIG_I2O is not set
#
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-CONFIG_IP_TCPDIAG_IPV6=y
-CONFIG_IPV6=y
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
+# Network device support
#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-CONFIG_NETPOLL=y
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-CONFIG_NET_POLL_CONTROLLER=y
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
CONFIG_NETDEVICES=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
@@ -517,7 +547,9 @@ CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
-# CONFIG_NET_VENDOR_3COM is not set
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
#
# Tulip family network device support
@@ -532,7 +564,7 @@ CONFIG_NET_PCI=y
CONFIG_FORCEDETH=y
# CONFIG_DGRS is not set
# CONFIG_EEPRO100 is not set
-# CONFIG_E100 is not set
+CONFIG_E100=y
# CONFIG_FEALNX is not set
# CONFIG_NATSEMI is not set
# CONFIG_NE2K_PCI is not set
@@ -553,14 +585,15 @@ CONFIG_8139TOO=y
# CONFIG_ACENIC is not set
# CONFIG_DL2K is not set
CONFIG_E1000=y
-# CONFIG_E1000_NAPI is not set
# CONFIG_NS83820 is not set
# CONFIG_HAMACHI is not set
# CONFIG_YELLOWFIN is not set
# CONFIG_R8169 is not set
+# CONFIG_SKGE is not set
# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
+# CONFIG_BNX2 is not set
#
# Ethernet (10000 Mbit)
@@ -647,7 +680,6 @@ CONFIG_SERIO_I8042=y
CONFIG_SERIO_LIBPS2=y
# CONFIG_SERIO_RAW is not set
# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
#
# Character devices
@@ -716,6 +748,7 @@ CONFIG_MAX_RAW_DEVS=256
# I2C support
#
# CONFIG_I2C is not set
+# CONFIG_I2C_SENSOR is not set
#
# Dallas's 1-wire bus
@@ -723,6 +756,12 @@ CONFIG_MAX_RAW_DEVS=256
# CONFIG_W1 is not set
#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
# Misc devices
#
# CONFIG_IBM_ASM is not set
@@ -808,6 +847,7 @@ CONFIG_USB_DEVICEFS=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_SPLIT_ISO is not set
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
@@ -846,12 +886,15 @@ CONFIG_USB_HIDINPUT=y
# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
# CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
# CONFIG_USB_KBTAB is not set
# CONFIG_USB_POWERMATE is not set
# CONFIG_USB_MTOUCH is not set
+# CONFIG_USB_ITMTOUCH is not set
# CONFIG_USB_EGALAX is not set
# CONFIG_USB_XPAD is not set
# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
#
# USB Imaging devices
@@ -902,10 +945,11 @@ CONFIG_USB_MON=y
# CONFIG_USB_PHIDGETSERVO is not set
# CONFIG_USB_IDMOUSE is not set
# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
# CONFIG_USB_TEST is not set
#
-# USB ATM/DSL drivers
+# USB DSL modem support
#
#
@@ -924,6 +968,10 @@ CONFIG_USB_MON=y
# CONFIG_INFINIBAND is not set
#
+# SN Devices
+#
+
+#
# Firmware Drivers
#
# CONFIG_EDD is not set
@@ -935,6 +983,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
# CONFIG_EXT2_FS_SECURITY is not set
+# CONFIG_EXT2_FS_XIP is not set
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_XATTR=y
CONFIG_EXT3_FS_POSIX_ACL=y
@@ -957,6 +1006,7 @@ CONFIG_FS_POSIX_ACL=y
# CONFIG_XFS_FS is not set
# CONFIG_MINIX_FS is not set
# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
# CONFIG_QUOTA is not set
CONFIG_DNOTIFY=y
CONFIG_AUTOFS_FS=y
@@ -986,7 +1036,6 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
CONFIG_PROC_FS=y
CONFIG_PROC_KCORE=y
CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
# CONFIG_DEVPTS_FS_XATTR is not set
CONFIG_TMPFS=y
# CONFIG_TMPFS_XATTR is not set
@@ -1016,15 +1065,18 @@ CONFIG_RAMFS=y
#
CONFIG_NFS_FS=y
CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
# CONFIG_NFS_V4 is not set
# CONFIG_NFS_DIRECTIO is not set
CONFIG_NFSD=y
CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
# CONFIG_NFSD_V4 is not set
CONFIG_NFSD_TCP=y
CONFIG_LOCKD=y
CONFIG_LOCKD_V4=y
CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
CONFIG_SUNRPC=y
# CONFIG_RPCSEC_GSS_KRB5 is not set
# CONFIG_RPCSEC_GSS_SPKM3 is not set
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
index a12b19da4b59..f76217d8f579 100644
--- a/arch/x86_64/ia32/Makefile
+++ b/arch/x86_64/ia32/Makefile
@@ -4,14 +4,14 @@
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \
ia32_signal.o tls32.o \
- ia32_binfmt.o fpu32.o ptrace32.o syscall32.o
+ ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o
sysv-$(CONFIG_SYSVIPC) := ipc32.o
obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
-$(obj)/syscall32.o: $(src)/syscall32.c \
+$(obj)/syscall32_syscall.o: \
$(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
# Teach kbuild about targets
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 816a3b89f13d..adbc5f8089e9 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -14,16 +14,6 @@
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
-/* 32bit VDSOs mapped into user space. */
-asm(".section \".init.data\",\"aw\"\n"
- "syscall32_syscall:\n"
- ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n"
- "syscall32_syscall_end:\n"
- "syscall32_sysenter:\n"
- ".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n"
- "syscall32_sysenter_end:\n"
- ".previous");
-
extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;
diff --git a/arch/x86_64/ia32/syscall32_syscall.S b/arch/x86_64/ia32/syscall32_syscall.S
new file mode 100644
index 000000000000..8f8271bdf135
--- /dev/null
+++ b/arch/x86_64/ia32/syscall32_syscall.S
@@ -0,0 +1,17 @@
+/* 32bit VDSOs mapped into user space. */
+
+ .section ".init.data","aw"
+
+ .globl syscall32_syscall
+ .globl syscall32_syscall_end
+
+syscall32_syscall:
+ .incbin "arch/x86_64/ia32/vsyscall-syscall.so"
+syscall32_syscall_end:
+
+ .globl syscall32_sysenter
+ .globl syscall32_sysenter_end
+
+syscall32_sysenter:
+ .incbin "arch/x86_64/ia32/vsyscall-sysenter.so"
+syscall32_sysenter_end:
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 28817490fdc6..096d470e280f 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -76,7 +76,7 @@
.macro FAKE_STACK_FRAME child_rip
/* push in order ss, rsp, eflags, cs, rip */
- xorq %rax, %rax
+ xorl %eax, %eax
pushq %rax /* ss */
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* rsp */
@@ -423,7 +423,7 @@ ENTRY(stub_rt_sigreturn)
testl $3,CS(%rdi)
je 1f
swapgs
-1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
+1: incl %gs:pda_irqcount # RED-PEN should check preempt count
movq %gs:pda_irqstackptr,%rax
cmoveq %rax,%rsp
pushq %rdi # save old stack
@@ -436,7 +436,7 @@ ENTRY(common_interrupt)
ret_from_intr:
popq %rdi
cli
- subl $1,%gs:pda_irqcount
+ decl %gs:pda_irqcount
#ifdef CONFIG_DEBUG_INFO
movq RBP(%rdi),%rbp
#endif
@@ -494,7 +494,7 @@ retint_signal:
sti
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
- xorq %rsi,%rsi # oldset
+ xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
@@ -752,7 +752,7 @@ child_rip:
movq %rsi, %rdi
call *%rax
# exit
- xorq %rdi, %rdi
+ xorl %edi, %edi
call do_exit
/*
@@ -918,3 +918,15 @@ ENTRY(machine_check)
ENTRY(call_debug)
zeroentry do_call_debug
+ENTRY(call_softirq)
+ movq %gs:pda_irqstackptr,%rax
+ pushq %r15
+ movq %rsp,%r15
+ incl %gs:pda_irqcount
+ cmove %rax,%rsp
+ call __do_softirq
+ movq %r15,%rsp
+ decl %gs:pda_irqcount
+ popq %r15
+ ret
+
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 69b9c25a8fc4..30c843a5efdd 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -31,6 +31,7 @@ u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
extern struct genapic apic_cluster;
extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
struct genapic *genapic = &apic_flat;
@@ -44,12 +45,7 @@ void __init clustered_apic_check(void)
u8 clusters, max_cluster;
u8 id;
u8 cluster_cnt[NUM_APIC_CLUSTERS];
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- /* AMD always uses flat mode right now */
- genapic = &apic_flat;
- goto print;
- }
+ int num_cpus = 0;
#if defined(CONFIG_ACPI_BUS)
/*
@@ -64,15 +60,34 @@ void __init clustered_apic_check(void)
#endif
memset(cluster_cnt, 0, sizeof(cluster_cnt));
-
for (i = 0; i < NR_CPUS; i++) {
id = bios_cpu_apicid[i];
- if (id != BAD_APICID)
- cluster_cnt[APIC_CLUSTERID(id)]++;
+ if (id == BAD_APICID)
+ continue;
+ num_cpus++;
+ cluster_cnt[APIC_CLUSTERID(id)]++;
}
+ /* Don't use clustered mode on AMD platforms. */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ genapic = &apic_physflat;
+#ifndef CONFIG_CPU_HOTPLUG
+ /* In the CPU hotplug case we cannot use broadcast mode
+ because that opens a race when a CPU is removed.
+ Stay at physflat mode in this case.
+ It is bad to do this unconditionally though. Once
+ we have ACPI platform support for CPU hotplug
+ we should detect hotplug capablity from ACPI tables and
+ only do this when really needed. -AK */
+ if (num_cpus <= 8)
+ genapic = &apic_flat;
+#endif
+ goto print;
+ }
+
clusters = 0;
max_cluster = 0;
+
for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
if (cluster_cnt[i] > 0) {
++clusters;
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 282846965080..adc96282a9e2 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -2,13 +2,11 @@
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
*
- * Flat APIC subarch code. Maximum 8 CPUs, logical delivery.
+ * Flat APIC subarch code.
*
* Hacked for x86-64 by James Cleverdon from i386 architecture code by
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
- * Ashok Raj <ashok.raj@intel.com>
- * Removed IPI broadcast shortcut to support CPU hotplug
*/
#include <linux/config.h>
#include <linux/threads.h>
@@ -20,47 +18,6 @@
#include <asm/smp.h>
#include <asm/ipi.h>
-/*
- * The following permit choosing broadcast IPI shortcut v.s sending IPI only
- * to online cpus via the send_IPI_mask varient.
- * The mask version is my preferred option, since it eliminates a lot of
- * other extra code that would need to be written to cleanup intrs sent
- * to a CPU while offline.
- *
- * Sending broadcast introduces lots of trouble in CPU hotplug situations.
- * These IPI's are delivered to cpu's irrespective of their offline status
- * and could pickup stale intr data when these CPUS are turned online.
- *
- * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with
- * the idea of not using broadcast IPI's anymore. Hence the run time check
- * is introduced, on his request so we can choose an alternate mechanism.
- *
- * Initial wacky performance tests that collect cycle counts show
- * no increase in using mask v.s broadcast version. In fact they seem
- * identical in terms of cycle counts.
- *
- * if we need to use broadcast, we need to do the following.
- *
- * cli;
- * hold call_lock;
- * clear any pending IPI, just ack and clear all pending intr
- * set cpu_online_map;
- * release call_lock;
- * sti;
- *
- * The complicated dummy irq processing shown above is not required if
- * we didnt sent IPI's to wrong CPU's in the first place.
- *
- * - Ashok Raj <ashok.raj@intel.com>
- */
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI (1)
-#else
-#define DEFAULT_SEND_IPI (0)
-#endif
-
-static int no_broadcast=DEFAULT_SEND_IPI;
-
static cpumask_t flat_target_cpus(void)
{
return cpu_online_map;
@@ -119,37 +76,15 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
local_irq_restore(flags);
}
-static inline void __local_flat_send_IPI_allbutself(int vector)
-{
- if (no_broadcast) {
- cpumask_t mask = cpu_online_map;
- int this_cpu = get_cpu();
-
- cpu_clear(this_cpu, mask);
- flat_send_IPI_mask(mask, vector);
- put_cpu();
- }
- else
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL);
-}
-
-static inline void __local_flat_send_IPI_all(int vector)
-{
- if (no_broadcast)
- flat_send_IPI_mask(cpu_online_map, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
-}
-
static void flat_send_IPI_allbutself(int vector)
{
if (((num_online_cpus()) - 1) >= 1)
- __local_flat_send_IPI_allbutself(vector);
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
}
static void flat_send_IPI_all(int vector)
{
- __local_flat_send_IPI_all(vector);
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
static int flat_apic_id_registered(void)
@@ -170,16 +105,6 @@ static unsigned int phys_pkg_id(int index_msb)
return ((ebx >> 24) & 0xFF) >> index_msb;
}
-static __init int no_ipi_broadcast(char *str)
-{
- get_option(&str, &no_broadcast);
- printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
- "IPI Broadcast");
- return 1;
-}
-
-__setup("no_ipi_broadcast", no_ipi_broadcast);
-
struct genapic apic_flat = {
.name = "flat",
.int_delivery_mode = dest_LowestPrio,
@@ -195,11 +120,62 @@ struct genapic apic_flat = {
.phys_pkg_id = phys_pkg_id,
};
-static int __init print_ipi_mode(void)
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+
+static cpumask_t physflat_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+ send_IPI_mask_sequence(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+ cpumask_t allbutme = cpu_online_map;
+ int me = get_cpu();
+ cpu_clear(me, allbutme);
+ physflat_send_IPI_mask(allbutme, vector);
+ put_cpu();
+}
+
+static void physflat_send_IPI_all(int vector)
{
- printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
- "Shortcut");
- return 0;
+ physflat_send_IPI_mask(cpu_online_map, vector);
}
-late_initcall(print_ipi_mode);
+static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return x86_cpu_to_apicid[cpu];
+ else
+ return BAD_APICID;
+}
+
+struct genapic apic_physflat = {
+ .name = "physical flat",
+ .int_delivery_mode = dest_LowestPrio,
+ .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+ .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST,
+ .target_cpus = physflat_target_cpus,
+ .apic_id_registered = flat_apic_id_registered,
+ .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
+ .send_IPI_all = physflat_send_IPI_all,
+ .send_IPI_allbutself = physflat_send_IPI_allbutself,
+ .send_IPI_mask = physflat_send_IPI_mask,
+ .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+};
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 8d765aa77a26..98ff5eb32b9a 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -137,14 +137,14 @@ startup_64:
wrmsr
/* Setup cr0 */
- xorq %rax, %rax
- btsq $31, %rax /* Enable paging */
- btsq $0, %rax /* Enable protected mode */
- btsq $1, %rax /* Enable MP */
- btsq $4, %rax /* Enable ET */
- btsq $5, %rax /* Enable NE */
- btsq $16, %rax /* Enable WP */
- btsq $18, %rax /* Enable AM */
+#define CR0_PM 1 /* protected mode */
+#define CR0_MP (1<<1)
+#define CR0_ET (1<<4)
+#define CR0_NE (1<<5)
+#define CR0_WP (1<<16)
+#define CR0_AM (1<<18)
+#define CR0_PAGING (1<<31)
+ movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
/* Make changes effective */
movq %rax, %cr0
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index cc3fb85f5145..849a20aec7ca 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -135,3 +135,22 @@ void fixup_irqs(cpumask_t map)
local_irq_disable();
}
#endif
+
+extern void call_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ __u32 pending;
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+ pending = local_softirq_pending();
+ /* Switch to interrupt stack */
+ if (pending)
+ call_softirq();
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 60d1eff41567..89fab51e20f4 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -8,43 +8,26 @@
#include <linux/mm.h>
#include <linux/kexec.h>
-#include <linux/delay.h>
#include <linux/string.h>
#include <linux/reboot.h>
-#include <asm/pda.h>
#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/cpufeature.h>
-#include <asm/hw_irq.h>
-
-#define LEVEL0_SIZE (1UL << 12UL)
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-#define LEVEL3_SIZE (1UL << 39UL)
-#define LEVEL4_SIZE (1UL << 48UL)
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE)
-#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-static void init_level2_page(u64 *level2p, unsigned long addr)
+
+static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
unsigned long end_addr;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL2_SIZE;
+ end_addr = addr + PUD_SIZE;
while (addr < end_addr) {
- *(level2p++) = addr | L1_ATTR;
- addr += LEVEL1_SIZE;
+ set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ addr += PMD_SIZE;
}
}
-static int init_level3_page(struct kimage *image, u64 *level3p,
+static int init_level3_page(struct kimage *image, pud_t *level3p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
@@ -52,32 +35,32 @@ static int init_level3_page(struct kimage *image, u64 *level3p,
result = 0;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL3_SIZE;
+ end_addr = addr + PGDIR_SIZE;
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
- u64 *level2p;
+ pmd_t *level2p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
- level2p = (u64 *)page_address(page);
+ level2p = (pmd_t *)page_address(page);
init_level2_page(level2p, addr);
- *(level3p++) = __pa(level2p) | L2_ATTR;
- addr += LEVEL2_SIZE;
+ set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
+ addr += PUD_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
- *(level3p++) = 0;
- addr += LEVEL2_SIZE;
+ pud_clear(level3p++);
+ addr += PUD_SIZE;
}
out:
return result;
}
-static int init_level4_page(struct kimage *image, u64 *level4p,
+static int init_level4_page(struct kimage *image, pgd_t *level4p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
@@ -85,28 +68,28 @@ static int init_level4_page(struct kimage *image, u64 *level4p,
result = 0;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL4_SIZE;
+ end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
- u64 *level3p;
+ pud_t *level3p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
- level3p = (u64 *)page_address(page);
+ level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
if (result) {
goto out;
}
- *(level4p++) = __pa(level3p) | L3_ATTR;
- addr += LEVEL3_SIZE;
+ set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
+ addr += PGDIR_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
- *(level4p++) = 0;
- addr += LEVEL3_SIZE;
+ pgd_clear(level4p++);
+ addr += PGDIR_SIZE;
}
out:
return result;
@@ -115,52 +98,50 @@ out:
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
- u64 *level4p;
- level4p = (u64 *)__va(start_pgtable);
+ pgd_t *level4p;
+ level4p = (pgd_t *)__va(start_pgtable);
return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
}
static void set_idt(void *newidt, u16 limit)
{
- unsigned char curidt[10];
+ struct desc_ptr curidt;
/* x86-64 supports unaliged loads & stores */
- (*(u16 *)(curidt)) = limit;
- (*(u64 *)(curidt +2)) = (unsigned long)(newidt);
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
__asm__ __volatile__ (
- "lidt %0\n"
- : "=m" (curidt)
+ "lidtq %0\n"
+ : : "m" (curidt)
);
};
static void set_gdt(void *newgdt, u16 limit)
{
- unsigned char curgdt[10];
+ struct desc_ptr curgdt;
/* x86-64 supports unaligned loads & stores */
- (*(u16 *)(curgdt)) = limit;
- (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt);
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
__asm__ __volatile__ (
- "lgdt %0\n"
- : "=m" (curgdt)
+ "lgdtq %0\n"
+ : : "m" (curgdt)
);
};
static void load_segments(void)
{
__asm__ __volatile__ (
- "\tmovl $"STR(__KERNEL_DS)",%eax\n"
- "\tmovl %eax,%ds\n"
- "\tmovl %eax,%es\n"
- "\tmovl %eax,%ss\n"
- "\tmovl %eax,%fs\n"
- "\tmovl %eax,%gs\n"
+ "\tmovl %0,%%ds\n"
+ "\tmovl %0,%%es\n"
+ "\tmovl %0,%%ss\n"
+ "\tmovl %0,%%fs\n"
+ "\tmovl %0,%%gs\n"
+ : : "a" (__KERNEL_DS)
);
-#undef STR
-#undef __STR
}
typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
@@ -178,7 +159,7 @@ int machine_kexec_prepare(struct kimage *image)
/* Calculate the offsets */
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
- control_code_buffer = start_pgtable + 4096UL;
+ control_code_buffer = start_pgtable + PAGE_SIZE;
/* Setup the identity mapped 64bit page table */
result = init_pgtable(image, start_pgtable);
@@ -214,7 +195,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
/* Calculate the offsets */
page_list = image->head;
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
- control_code_buffer = start_pgtable + 4096UL;
+ control_code_buffer = start_pgtable + PAGE_SIZE;
/* Set the low half of the page table to my identity mapped
* page table for kexec. Leave the high half pointing at the
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 21e70625a495..3b267c91bb0c 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -15,6 +15,8 @@
#include <linux/sysdev.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@@ -514,10 +516,7 @@ static struct sysdev_class mce_sysclass = {
set_kset_name("machinecheck"),
};
-static struct sys_device device_mce = {
- .id = 0,
- .cls = &mce_sysclass,
-};
+static DEFINE_PER_CPU(struct sys_device, device_mce);
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
@@ -542,27 +541,83 @@ ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(tolerant,tolerant,)
ACCESSOR(check_interval,check_interval,mce_restart())
-static __cpuinit int mce_init_device(void)
+/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
+static __cpuinit int mce_create_device(unsigned int cpu)
{
int err;
+ if (!mce_available(&cpu_data[cpu]))
+ return -EIO;
+
+ per_cpu(device_mce,cpu).id = cpu;
+ per_cpu(device_mce,cpu).cls = &mce_sysclass;
+
+ err = sysdev_register(&per_cpu(device_mce,cpu));
+
+ if (!err) {
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
+ }
+ return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static __cpuinit void mce_remove_device(unsigned int cpu)
+{
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
+ sysdev_unregister(&per_cpu(device_mce,cpu));
+}
+#endif
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ mce_create_device(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ mce_remove_device(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier = {
+ .notifier_call = mce_cpu_callback,
+};
+
+static __init int mce_init_device(void)
+{
+ int err;
+ int i = 0;
+
if (!mce_available(&boot_cpu_data))
return -EIO;
err = sysdev_class_register(&mce_sysclass);
- if (!err)
- err = sysdev_register(&device_mce);
- if (!err) {
- /* could create per CPU objects, but it is not worth it. */
- sysdev_create_file(&device_mce, &attr_bank0ctl);
- sysdev_create_file(&device_mce, &attr_bank1ctl);
- sysdev_create_file(&device_mce, &attr_bank2ctl);
- sysdev_create_file(&device_mce, &attr_bank3ctl);
- sysdev_create_file(&device_mce, &attr_bank4ctl);
- sysdev_create_file(&device_mce, &attr_tolerant);
- sysdev_create_file(&device_mce, &attr_check_interval);
- }
-
+
+ for_each_online_cpu(i) {
+ mce_create_device(i);
+ }
+
+ register_cpu_notifier(&mce_cpu_notifier);
misc_register(&mce_log_device);
return err;
-
}
+
device_initcall(mce_init_device);
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 9c5aa2a790c7..08abf9f5b159 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -109,7 +109,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver;
+ int ver, cpu;
static int found_bsp=0;
if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -131,7 +131,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
return;
}
- num_processors++;
+ cpu = num_processors++;
if (m->mpc_apicid > MAX_APICS) {
printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
@@ -155,13 +155,18 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
*/
+ cpu = 0;
+
bios_cpu_apicid[0] = m->mpc_apicid;
x86_cpu_to_apicid[0] = m->mpc_apicid;
found_bsp = 1;
- } else {
- bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
- x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
- }
+ } else
+ cpu = num_processors - found_bsp;
+ bios_cpu_apicid[cpu] = m->mpc_apicid;
+ x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+
+ cpu_set(cpu, cpu_possible_map);
+ cpu_set(cpu, cpu_present_map);
}
static void __init MP_bus_info (struct mpc_config_bus *m)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 5fd03225058a..0aa526298e92 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -765,8 +765,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
int node = 0;
unsigned bits;
- if (c->x86_num_cores == 1)
- return;
bits = 0;
while ((1 << bits) < c->x86_num_cores)
@@ -1081,8 +1079,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
else
mtrr_ap_init();
#ifdef CONFIG_NUMA
- if (c != &boot_cpu_data)
- numa_add_cpu(c - cpu_data);
+ numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index f2b9c6bc999a..34082c1cc41e 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/bitops.h>
+#include <linux/module.h>
#include <asm/bootsetup.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
@@ -22,10 +23,8 @@
#include <asm/smp.h>
#include <asm/i387.h>
#include <asm/percpu.h>
-#include <asm/mtrr.h>
#include <asm/proto.h>
-#include <asm/mman.h>
-#include <asm/numa.h>
+#include <asm/sections.h>
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
@@ -33,11 +32,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
-extern struct task_struct init_task;
-
-extern unsigned char __per_cpu_start[], __per_cpu_end[];
-
-extern struct desc_ptr cpu_gdt_descr[];
struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
@@ -101,7 +95,7 @@ void __init setup_per_cpu_areas(void)
#endif
for (i = 0; i < NR_CPUS; i++) {
- unsigned char *ptr;
+ char *ptr;
if (!NODE_DATA(cpu_to_node(i))) {
printk("cpu with no node %d, num_online_nodes %d\n",
@@ -190,11 +184,7 @@ void __cpuinit check_efer(void)
*/
void __cpuinit cpu_init (void)
{
-#ifdef CONFIG_SMP
int cpu = stack_smp_processor_id();
-#else
- int cpu = smp_processor_id();
-#endif
struct tss_struct *t = &per_cpu(init_tss, cpu);
unsigned long v;
char *estacks = NULL;
@@ -214,7 +204,7 @@ void __cpuinit cpu_init (void)
printk("Initializing CPU#%d\n", cpu);
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index ccae392886af..e5958220d6b8 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -129,10 +129,9 @@ asmlinkage void smp_invalidate_interrupt (void)
} else
leave_mm(cpu);
}
+out:
ack_APIC_irq();
cpu_clear(cpu, flush_cpumask);
-
-out:
put_cpu_no_resched();
}
@@ -294,6 +293,69 @@ void unlock_ipi_call_lock(void)
}
/*
+ * this function sends a 'generic call function' IPI to one other CPU
+ * in the system.
+ */
+static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = 1;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (!wait)
+ return;
+
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+/*
+ * smp_call_function_single - Run a function on another CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Currently unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Retrurns 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ /* prevent preemption and reschedule on another processor */
+ int me = get_cpu();
+ if (cpu == me) {
+ WARN_ON(1);
+ put_cpu();
+ return -EBUSY;
+ }
+ spin_lock_bh(&call_lock);
+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
+ spin_unlock_bh(&call_lock);
+ put_cpu();
+ return 0;
+}
+
+/*
* this function sends a 'generic call function' IPI to all other CPUs
* in the system.
*/
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index e773a794ec45..6e4807d64d46 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -113,24 +113,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * If cpu-hotplug is supported, then we need to preallocate for all
- * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
- * - Ashok Raj
- */
-#ifdef CONFIG_HOTPLUG_CPU
-#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map)
-#else
-#define fixup_cpu_possible_map(x)
-#endif
-
-/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
@@ -229,9 +211,6 @@ static __cpuinit void sync_master(void *arg)
{
unsigned long flags, i;
- if (smp_processor_id() != 0)
- return;
-
go[MASTER] = 0;
local_irq_save(flags);
@@ -280,7 +259,7 @@ get_delta(long *rt, long *master)
return tcenter - best_tm;
}
-static __cpuinit void sync_tsc(void)
+static __cpuinit void sync_tsc(unsigned int master)
{
int i, done = 0;
long delta, adj, adjust_latency = 0;
@@ -294,9 +273,17 @@ static __cpuinit void sync_tsc(void)
} t[NUM_ROUNDS] __cpuinitdata;
#endif
+ printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
+ smp_processor_id(), master);
+
go[MASTER] = 1;
- smp_call_function(sync_master, NULL, 1, 0);
+ /* It is dangerous to broadcast IPI as cpus are coming up,
+ * as they may not be ready to accept them. So since
+ * we only need to send the ipi to the boot cpu direct
+ * the message, and avoid the race.
+ */
+ smp_call_function_single(master, sync_master, NULL, 1, 0);
while (go[MASTER]) /* wait for master to be ready */
no_cpu_relax();
@@ -340,16 +327,14 @@ static __cpuinit void sync_tsc(void)
printk(KERN_INFO
"CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
"maxerr %lu cycles)\n",
- smp_processor_id(), boot_cpu_id, delta, rt);
+ smp_processor_id(), master, delta, rt);
}
static void __cpuinit tsc_sync_wait(void)
{
if (notscsync || !cpu_has_tsc)
return;
- printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
- boot_cpu_id);
- sync_tsc();
+ sync_tsc(boot_cpu_id);
}
static __init int notscsync_setup(char *s)
@@ -773,8 +758,9 @@ do_rest:
initial_code = start_secondary;
clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK);
- printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
- start_rip, init_rsp);
+ printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
+ cpus_weight(cpu_present_map),
+ apicid);
/*
* This grunge runs the startup process for
@@ -924,6 +910,27 @@ static __init void enforce_max_cpus(unsigned max_cpus)
}
}
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu_possible_map should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_map on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * If cpu-hotplug is supported, then we need to preallocate for all
+ * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
+ * - Ashok Raj
+ */
+static void prefill_possible_map(void)
+{
+ int i;
+ for (i = 0; i < NR_CPUS; i++)
+ cpu_set(i, cpu_possible_map);
+}
+#endif
+
/*
* Various sanity checks.
*/
@@ -987,25 +994,15 @@ static int __init smp_sanity_check(unsigned max_cpus)
*/
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int i;
-
nmi_watchdog_default();
current_cpu_data = boot_cpu_data;
current_thread_info()->cpu = 0; /* needed? */
enforce_max_cpus(max_cpus);
- /*
- * Fill in cpu_present_mask
- */
- for (i = 0; i < NR_CPUS; i++) {
- int apicid = cpu_present_to_apicid(i);
- if (physid_isset(apicid, phys_cpu_present_map)) {
- cpu_set(i, cpu_present_map);
- cpu_set(i, cpu_possible_map);
- }
- fixup_cpu_possible_map(i);
- }
+#ifdef CONFIG_HOTPLUG_CPU
+ prefill_possible_map();
+#endif
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
@@ -1189,8 +1186,7 @@ void __cpu_die(unsigned int cpu)
printk ("CPU %d is now offline\n", cpu);
return;
}
- current->state = TASK_UNINTERRUPTIBLE;
- schedule_timeout(HZ/10);
+ msleep(100);
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 102736630002..6ead433a3885 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -594,9 +594,6 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
if (!cpu)
reason = get_nmi_reason();
- if (!cpu_online(cpu))
- return;
-
if (!(reason & 0xc0)) {
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT)
== NOTIFY_STOP)
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 33a873a3c223..841bd738a189 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -18,8 +18,6 @@
#include <asm/smp.h>
#endif
-int x86_udelay_tsc = 0; /* Delay via TSC */
-
int read_current_timer(unsigned long *timer_value)
{
rdtscll(*timer_value);
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 2f187986f940..13792721037e 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,7 +23,6 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/compiler.h>
#include <linux/module.h>
-#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/uaccess.h>
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index ac61c186eb02..70cb2904a90f 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -36,34 +36,36 @@ int numa_off __initdata;
int __init compute_hash_shift(struct node *nodes, int numnodes)
{
int i;
- int shift = 24;
- u64 addr;
+ int shift = 20;
+ unsigned long addr,maxend=0;
- /* When in doubt use brute force. */
- while (shift < 48) {
- memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
- for (i = 0; i < numnodes; i++) {
- if (nodes[i].start == nodes[i].end)
- continue;
- for (addr = nodes[i].start;
- addr < nodes[i].end;
- addr += (1UL << shift)) {
- if (memnodemap[addr >> shift] != 0xff &&
- memnodemap[addr >> shift] != i) {
- printk(KERN_INFO
- "node %d shift %d addr %Lx conflict %d\n",
- i, shift, addr, memnodemap[addr>>shift]);
- goto next;
- }
- memnodemap[addr >> shift] = i;
+ for (i = 0; i < numnodes; i++)
+ if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
+ maxend = nodes[i].end;
+
+ while ((1UL << shift) < (maxend / NODEMAPSIZE))
+ shift++;
+
+ printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
+ shift,maxend);
+ memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
+ for (i = 0; i < numnodes; i++) {
+ if (nodes[i].start == nodes[i].end)
+ continue;
+ for (addr = nodes[i].start;
+ addr < nodes[i].end;
+ addr += (1UL << shift)) {
+ if (memnodemap[addr >> shift] != 0xff) {
+ printk(KERN_INFO
+ "Your memory is not aligned you need to rebuild your kernel "
+ "with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
+ shift,addr);
+ return -1;
}
+ memnodemap[addr >> shift] = i;
}
- return shift;
- next:
- shift++;
}
- memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE);
- return -1;
+ return shift;
}
#ifdef CONFIG_SPARSEMEM
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 5d01b31472e1..8e3d097a9ddd 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -20,6 +20,9 @@
static struct acpi_table_slit *acpi_slit;
+/* Internal processor count */
+static unsigned int __initdata num_processors = 0;
+
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct node nodes[MAX_NUMNODES] __initdata;
@@ -101,16 +104,18 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
bad_srat();
return;
}
- if (pa->apic_id >= NR_CPUS) {
- printk(KERN_ERR "SRAT: lapic %u too large.\n",
- pa->apic_id);
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n",
+ num_processors, pa->apic_id, NR_CPUS);
bad_srat();
return;
}
- cpu_to_node[pa->apic_id] = node;
+ cpu_to_node[num_processors] = node;
acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
- pxm, pa->apic_id, node);
+ printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n",
+ pxm, pa->apic_id, num_processors, node);
+
+ num_processors++;
}
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -124,7 +129,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
if (srat_disabled() || ma->flags.enabled == 0)
return;
- /* hotplug bit is ignored for now */
pxm = ma->proximity_domain;
node = setup_node(pxm);
if (node < 0) {
@@ -134,6 +138,10 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
}
start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
+ /* It is fine to add this area to the nodes data it will be used later*/
+ if (ma->flags.hot_pluggable == 1)
+ printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
+ start, end);
i = conflicting_nodes(start, end);
if (i >= 0) {
printk(KERN_ERR
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 7e7d0c2a0025..c2c38b579939 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -29,7 +29,7 @@ __init static int
fill_mp_bus_to_cpumask(void)
{
struct pci_dev *nb_dev = NULL;
- int i, j, printed;
+ int i, j;
u32 ldtbus, nid;
static int lbnr[3] = {
LDT_BUS_NUMBER_REGISTER_0,