diff options
Diffstat (limited to 'arch/i386')
90 files changed, 4232 insertions, 2980 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 6189b0c28d6f..8ff1c6fb5aa1 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -166,7 +166,6 @@ config X86_VISWS config X86_GENERICARCH bool "Generic architecture (Summit, bigsmp, ES7000, default)" - depends on SMP help This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. @@ -263,7 +262,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on !SMP && !(X86_VISWS || X86_VOYAGER) + depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -288,12 +287,12 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC bool - depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) + depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH default y config X86_IO_APIC bool - depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) + depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH default y config X86_VISWS_APIC @@ -402,6 +401,7 @@ config X86_REBOOTFIXUPS config MICROCODE tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" + select FW_LOADER ---help--- If you say Y here and also to "/dev file system support" in the 'File systems' section, you will be able to update the microcode on @@ -417,6 +417,11 @@ config MICROCODE To compile this driver as a module, choose M here: the module will be called microcode. +config MICROCODE_OLD_INTERFACE + bool + depends on MICROCODE + default y + config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" help @@ -599,12 +604,10 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE -source "mm/Kconfig" +config ARCH_POPULATES_NODE_MAP + def_bool y -config HAVE_ARCH_EARLY_PFN_TO_NID - bool - default y - depends on NUMA +source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" @@ -679,7 +682,7 @@ config EFI depends on ACPI default n ---help--- - This enables the the kernel to boot on EFI platforms using + This enables the kernel to boot on EFI platforms using system configuration information passed to it from the firmware. This also enables the kernel to use any EFI runtime services that are available (such as the EFI variable services). @@ -741,8 +744,7 @@ config SECCOMP source kernel/Kconfig.hz config KEXEC - bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "kexec system call" help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -763,6 +765,13 @@ config CRASH_DUMP depends on HIGHMEM help Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START. + For more details see Documentation/kdump/kdump.txt config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) @@ -1133,7 +1142,7 @@ source "arch/i386/oprofile/Kconfig" config KPROBES bool "Kprobes (EXPERIMENTAL)" - depends on EXPERIMENTAL && MODULES + depends on KALLSYMS && EXPERIMENTAL && MODULES help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 3e4adb1e2244..7cc0b189b82b 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -46,6 +46,14 @@ cflags-y += -ffreestanding # a lot more stack due to the lack of sharing of stacklots: CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + CFLAGS += $(cflags-y) # Default subarch .c files diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index 4b84ea216f2b..34321368011a 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -15,42 +15,95 @@ #include <asm/setup.h> #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +# It is assumed that %ds == INITSEG here + movb $0, (EDD_MBR_SIG_NR_BUF) movb $0, (EDDNR) -# Check the command line for two options: +# Check the command line for options: # edd=of disables EDD completely (edd=off) # edd=sk skips the MBR test (edd=skipmbr) +# edd=on re-enables EDD (edd=on) + pushl %esi - cmpl $0, %cs:cmd_line_ptr - jz done_cl + movw $edd_mbr_sig_start, %di # Default to edd=on + movl %cs:(cmd_line_ptr), %esi -# ds:esi has the pointer to the command line now - movl $(COMMAND_LINE_SIZE-7), %ecx -# loop through kernel command line one byte at a time -cl_loop: - cmpl $EDD_CL_EQUALS, (%si) + andl %esi, %esi + jz old_cl # Old boot protocol? + +# Convert to a real-mode pointer in fs:si + movl %esi, %eax + shrl $4, %eax + movw %ax, %fs + andw $0xf, %si + jmp have_cl_pointer + +# Old-style boot protocol? +old_cl: + push %ds # aka INITSEG + pop %fs + + cmpw $0xa33f, (0x20) + jne done_cl # No command line at all? + movw (0x22), %si # Pointer relative to INITSEG + +# fs:si has the pointer to the command line now +have_cl_pointer: + +# Loop through kernel command line one byte at a time. Just in +# case the loader is buggy and failed to null-terminate the command line +# terminate if we get close enough to the end of the segment that we +# cannot fit "edd=XX"... +cl_atspace: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movl %fs:(%si), %eax + andb %al, %al # End of line? + jz done_cl + cmpl $EDD_CL_EQUALS, %eax jz found_edd_equals - incl %esi - loop cl_loop - jmp done_cl + cmpb $0x20, %al # <= space consider whitespace + ja cl_skipword + incw %si + jmp cl_atspace + +cl_skipword: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movb %fs:(%si), %al # End of string? + andb %al, %al + jz done_cl + cmpb $0x20, %al + jbe cl_atspace + incw %si + jmp cl_skipword + found_edd_equals: # only looking at first two characters after equals - addl $4, %esi - cmpw $EDD_CL_OFF, (%si) # edd=of - jz do_edd_off - cmpw $EDD_CL_SKIP, (%si) # edd=sk - jz do_edd_skipmbr - jmp done_cl +# late overrides early on the command line, so keep going after finding something + movw %fs:4(%si), %ax + cmpw $EDD_CL_OFF, %ax # edd=of + je do_edd_off + cmpw $EDD_CL_SKIP, %ax # edd=sk + je do_edd_skipmbr + cmpw $EDD_CL_ON, %ax # edd=on + je do_edd_on + jmp cl_skipword do_edd_skipmbr: - popl %esi - jmp edd_start + movw $edd_start, %di + jmp cl_skipword do_edd_off: - popl %esi - jmp edd_done + movw $edd_done, %di + jmp cl_skipword +do_edd_on: + movw $edd_mbr_sig_start, %di + jmp cl_skipword + done_cl: popl %esi - + jmpw *%di # Read the first sector of each BIOS disk device and store the 4-byte signature edd_mbr_sig_start: diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index d2b684cd620a..3aec4538a113 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -494,12 +494,12 @@ no_voyager: movw %cs, %ax # aka SETUPSEG subw $DELTA_INITSEG, %ax # aka INITSEG movw %ax, %ds - movw $0, (0x1ff) # default is no pointing device + movb $0, (0x1ff) # default is no pointing device int $0x11 # int 0x11: equipment list testb $0x04, %al # check if mouse installed jz no_psmouse - movw $0xAA, (0x1ff) # device present + movb $0xAA, (0x1ff) # device present no_psmouse: #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 8c2a6faeeae5..2c5b5cc55f79 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -11,8 +11,6 @@ * */ -#include <linux/config.h> /* for CONFIG_VIDEO_* */ - /* Enable autodetection of SVGA adapters and modes. */ #undef CONFIG_VIDEO_SVGA diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 89ebb7a316ab..ee2d79bd8af7 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,41 +1,51 @@ # # Automatically generated make config: don't edit +# Linux kernel version: 2.6.18-git7 +# Wed Sep 27 21:53:10 2006 # CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_X86=y CONFIG_MMU=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # Code maturity level options # CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup # CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set +CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" -CONFIG_UID16=y -CONFIG_VM86=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL=y CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y @@ -45,11 +55,9 @@ CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -60,41 +68,45 @@ CONFIG_BASE_SMALL=0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set # CONFIG_KMOD is not set +CONFIG_STOP_MACHINE=y # # Block layer # -# CONFIG_LBD is not set +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set -# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Processor type and features # -CONFIG_X86_PC=y +CONFIG_SMP=y +# CONFIG_X86_PC is not set # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_NUMAQ is not set # CONFIG_X86_SUMMIT is not set # CONFIG_X86_BIGSMP is not set # CONFIG_X86_VISWS is not set -# CONFIG_X86_GENERICARCH is not set +CONFIG_X86_GENERICARCH=y # CONFIG_X86_ES7000 is not set +CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -102,11 +114,11 @@ CONFIG_X86_PC=y # CONFIG_M586MMX is not set # CONFIG_M686 is not set # CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set +CONFIG_MPENTIUMIII=y # CONFIG_MPENTIUMM is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set -CONFIG_MK7=y +# CONFIG_MK7 is not set # CONFIG_MK8 is not set # CONFIG_MCRUSOE is not set # CONFIG_MEFFICEON is not set @@ -117,10 +129,10 @@ CONFIG_MK7=y # CONFIG_MGEODE_LX is not set # CONFIG_MCYRIXIII is not set # CONFIG_MVIAC3_2 is not set -# CONFIG_X86_GENERIC is not set +CONFIG_X86_GENERIC=y CONFIG_X86_CMPXCHG=y CONFIG_X86_XADD=y -CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y @@ -131,26 +143,28 @@ CONFIG_X86_CMPXCHG64=y CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_USE_3DNOW=y CONFIG_X86_TSC=y -# CONFIG_HPET_TIMER is not set -# CONFIG_SMP is not set -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_NR_CPUS=32 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_X86_UP_APIC=y -CONFIG_X86_UP_IOAPIC=y +CONFIG_PREEMPT_BKL=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_MCE=y CONFIG_X86_MCE_NONFATAL=y -# CONFIG_X86_MCE_P4THERMAL is not set +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set # CONFIG_X86_REBOOTFIXUPS is not set -# CONFIG_MICROCODE is not set -# CONFIG_X86_MSR is not set -# CONFIG_X86_CPUID is not set +CONFIG_MICROCODE=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y # # Firmware Drivers @@ -158,68 +172,68 @@ CONFIG_X86_MCE_NONFATAL=y # CONFIG_EDD is not set # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set -CONFIG_VMSPLIT_3G=y -# CONFIG_VMSPLIT_3G_OPT is not set -# CONFIG_VMSPLIT_2G is not set -# CONFIG_VMSPLIT_1G is not set CONFIG_PAGE_OFFSET=0xC0000000 -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_HIGHMEM=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_RESOURCES_64BIT=y +# CONFIG_HIGHPTE is not set # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y # CONFIG_EFI is not set +# CONFIG_IRQBALANCE is not set CONFIG_REGPARM=y -# CONFIG_SECCOMP is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 # CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 -CONFIG_DOUBLEFAULT=y +# CONFIG_HOTPLUG_CPU is not set +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # # Power management options (ACPI, APM) # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set +CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set -CONFIG_SOFTWARE_SUSPEND=y -CONFIG_PM_STD_PARTITION="" +CONFIG_PM_SYSFS_DEPRECATED=y # # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -# CONFIG_ACPI_SLEEP is not set -# CONFIG_ACPI_AC is not set -# CONFIG_ACPI_BATTERY is not set -# CONFIG_ACPI_BUTTON is not set +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y # CONFIG_ACPI_VIDEO is not set # CONFIG_ACPI_HOTKEY is not set -# CONFIG_ACPI_FAN is not set -# CONFIG_ACPI_PROCESSOR is not set +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_IBM is not set # CONFIG_ACPI_TOSHIBA is not set -CONFIG_ACPI_BLACKLIST_YEAR=0 -# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_DEBUG=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y -# CONFIG_X86_PM_TIMER is not set +CONFIG_X86_PM_TIMER=y # CONFIG_ACPI_CONTAINER is not set # @@ -230,7 +244,41 @@ CONFIG_ACPI_SYSTEM=y # # CPU Frequency scaling # -# CONFIG_CPU_FREQ is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K6 is not set +# CONFIG_X86_POWERNOW_K7 is not set +CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_GX_SUSPMOD is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_SPEEDSTEP_ICH is not set +# CONFIG_X86_SPEEDSTEP_SMI is not set +# CONFIG_X86_P4_CLOCKMOD is not set +# CONFIG_X86_CPUFREQ_NFORCE2 is not set +# CONFIG_X86_LONGRUN is not set +# CONFIG_X86_LONGHAUL is not set + +# +# shared options +# +CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set # # Bus options (PCI, PCMCIA, EISA, MCA, ISA) @@ -244,12 +292,14 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y # CONFIG_PCIEPORTBUS is not set -# CONFIG_PCI_MSI is not set -# CONFIG_PCI_LEGACY_PROC is not set +CONFIG_PCI_MSI=y +# CONFIG_PCI_MULTITHREAD_PROBE is not set +# CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set # CONFIG_SCx200 is not set +CONFIG_K8_NB=y # # PCCARD (PCMCIA/CardBus) support @@ -278,93 +328,54 @@ CONFIG_NET=y # # CONFIG_NETDEBUG is not set CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y +# CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set # CONFIG_NET_KEY is not set CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set +CONFIG_IP_MULTICAST=y # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set # CONFIG_INET_TUNNEL is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set - -# -# Core Netfilter Configuration -# -# CONFIG_NETFILTER_NETLINK is not set -CONFIG_NETFILTER_XTABLES=y -# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set -# CONFIG_NETFILTER_XT_TARGET_MARK is not set -# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set -# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set -# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set -# CONFIG_NETFILTER_XT_MATCH_DCCP is not set -# CONFIG_NETFILTER_XT_MATCH_HELPER is not set -# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set -CONFIG_NETFILTER_XT_MATCH_LIMIT=y -CONFIG_NETFILTER_XT_MATCH_MAC=y -# CONFIG_NETFILTER_XT_MATCH_MARK is not set -# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set -# CONFIG_NETFILTER_XT_MATCH_REALM is not set -# CONFIG_NETFILTER_XT_MATCH_SCTP is not set -CONFIG_NETFILTER_XT_MATCH_STATE=y -# CONFIG_NETFILTER_XT_MATCH_STRING is not set -# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -# CONFIG_IP_NF_CT_ACCT is not set -# CONFIG_IP_NF_CONNTRACK_MARK is not set -# CONFIG_IP_NF_CONNTRACK_EVENTS is not set -# CONFIG_IP_NF_CT_PROTO_SCTP is not set -CONFIG_IP_NF_FTP=y -# CONFIG_IP_NF_IRC is not set -# CONFIG_IP_NF_NETBIOS_NS is not set -# CONFIG_IP_NF_TFTP is not set -# CONFIG_IP_NF_AMANDA is not set -# CONFIG_IP_NF_PPTP is not set -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_IPRANGE is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -# CONFIG_IP_NF_MATCH_ADDRTYPE is not set -# CONFIG_IP_NF_MATCH_HASHLIMIT is not set -CONFIG_IP_NF_FILTER=y -# CONFIG_IP_NF_TARGET_REJECT is not set -CONFIG_IP_NF_TARGET_LOG=y -# CONFIG_IP_NF_TARGET_ULOG is not set -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_NAT is not set -# CONFIG_IP_NF_MANGLE is not set -# CONFIG_IP_NF_RAW is not set -# CONFIG_IP_NF_ARPTABLES is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set # # DCCP Configuration (EXPERIMENTAL) @@ -389,7 +400,6 @@ CONFIG_IP_NF_TARGET_LOG=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -402,6 +412,7 @@ CONFIG_IP_NF_TARGET_LOG=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set @@ -416,7 +427,9 @@ CONFIG_IP_NF_TARGET_LOG=y # CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -431,13 +444,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # # Parallel port support # -CONFIG_PARPORT=y -CONFIG_PARPORT_PC=y -# CONFIG_PARPORT_SERIAL is not set -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_GSC is not set -CONFIG_PARPORT_1284=y +# CONFIG_PARPORT is not set # # Plug and Play support @@ -447,8 +454,7 @@ CONFIG_PARPORT_1284=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set -# CONFIG_PARIDE is not set +CONFIG_BLK_DEV_FD=y # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -459,8 +465,11 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set -# CONFIG_BLK_DEV_RAM is not set +CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set @@ -476,7 +485,7 @@ CONFIG_BLK_DEV_IDE=y # CONFIG_BLK_DEV_IDE_SATA is not set # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_IDEDISK_MULTI_MODE=y CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set @@ -486,10 +495,10 @@ CONFIG_BLK_DEV_IDECD=y # # IDE chipset support/bugfixes # -# CONFIG_IDE_GENERIC is not set +CONFIG_IDE_GENERIC=y # CONFIG_BLK_DEV_CMD640 is not set CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_IDEPCI_SHARE_IRQ is not set # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_GENERIC is not set # CONFIG_BLK_DEV_OPTI621 is not set @@ -500,7 +509,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_ALI15X3 is not set -# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_AMD74XX=y # CONFIG_BLK_DEV_ATIIXP is not set # CONFIG_BLK_DEV_CMD64X is not set # CONFIG_BLK_DEV_TRIFLEX is not set @@ -511,7 +520,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set # CONFIG_BLK_DEV_SC1200 is not set -# CONFIG_BLK_DEV_PIIX is not set +CONFIG_BLK_DEV_PIIX=y # CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set # CONFIG_BLK_DEV_PDC202XX_OLD is not set @@ -521,7 +530,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_BLK_DEV_SIS5513 is not set # CONFIG_BLK_DEV_SLC90E66 is not set # CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_IVB is not set @@ -533,6 +542,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_NETLINK=y # CONFIG_SCSI_PROC_FS is not set # @@ -541,8 +551,9 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -# CONFIG_CHR_DEV_SG is not set +CONFIG_BLK_DEV_SR=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set # @@ -553,29 +564,44 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOGGING is not set # -# SCSI Transport Attributes +# SCSI Transports # -# CONFIG_SCSI_SPI_ATTRS is not set -# CONFIG_SCSI_FC_ATTRS is not set +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set # # SCSI low-level drivers # # CONFIG_ISCSI_TCP is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +CONFIG_BLK_DEV_3W_XXXX_RAID=y # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_AIC7XXX_OLD is not set -# CONFIG_SCSI_AIC79XX is not set +CONFIG_SCSI_AIC79XX=y +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=4000 +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +# CONFIG_SCSI_AIC94XX is not set # CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -584,11 +610,9 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set +# CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLA_FC is not set # CONFIG_SCSI_LPFC is not set @@ -598,22 +622,114 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_DEBUG is not set # +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SVW=y +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +CONFIG_SATA_NV=y +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +CONFIG_SATA_SIL=y +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +CONFIG_SATA_VIA=y +# CONFIG_SATA_VITESSE is not set +CONFIG_SATA_INTEL_COMBINED=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_LEGACY is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_QDI is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# # Multi-device support (RAID and LVM) # -# CONFIG_MD is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support # -# CONFIG_FUSION is not set -# CONFIG_FUSION_SPI is not set +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y # CONFIG_FUSION_FC is not set # CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set # # IEEE 1394 (FireWire) support # -# CONFIG_IEEE1394 is not set +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set +# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set +# CONFIG_IEEE1394_EXPORT_FULL_API is not set + +# +# Device Drivers +# + +# +# Texas Instruments PCILynx requires I2C +# +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocol Drivers +# +# CONFIG_IEEE1394_VIDEO1394 is not set +# CONFIG_IEEE1394_SBP2 is not set +# CONFIG_IEEE1394_ETH1394 is not set +# CONFIG_IEEE1394_DV1394 is not set +CONFIG_IEEE1394_RAWIO=y # # I2O device support @@ -652,46 +768,63 @@ CONFIG_MII=y # # Tulip family network device support # -# CONFIG_NET_TULIP is not set +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set # CONFIG_HP100 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set -# CONFIG_FORCEDETH is not set +CONFIG_B44=y +CONFIG_FORCEDETH=y +# CONFIG_FORCEDETH_NAPI is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set -# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) # # CONFIG_ACENIC is not set # CONFIG_DL2K is not set -# CONFIG_E1000 is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set +CONFIG_R8169=y +# CONFIG_R8169_NAPI is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set -# CONFIG_SKY2 is not set +CONFIG_SKY2=y # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set -# CONFIG_TIGON3 is not set -# CONFIG_BNX2 is not set +CONFIG_TIGON3=y +CONFIG_BNX2=y +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -699,6 +832,7 @@ CONFIG_E100=y # CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set # # Token Ring devices @@ -716,14 +850,15 @@ CONFIG_E100=y # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set -# CONFIG_PLIP is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set # CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y # # ISDN subsystem @@ -745,8 +880,8 @@ CONFIG_INPUT=y # CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set # CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y @@ -776,7 +911,6 @@ CONFIG_SERIO=y CONFIG_SERIO_I8042=y # CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set @@ -788,14 +922,15 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # CONFIG_SERIAL_8250=y -# CONFIG_SERIAL_8250_CONSOLE is not set -# CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set @@ -804,14 +939,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # Non-8250 serial port support # CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 -CONFIG_PRINTER=y -# CONFIG_LP_CONSOLE is not set -# CONFIG_PPDEV is not set -# CONFIG_TIPAR is not set # # IPMI @@ -822,8 +954,12 @@ CONFIG_PRINTER=y # Watchdog Cards # # CONFIG_WATCHDOG is not set -# CONFIG_HW_RANDOM is not set -CONFIG_NVRAM=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y +CONFIG_HW_RANDOM_GEODE=y +CONFIG_HW_RANDOM_VIA=y +# CONFIG_NVRAM is not set CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set @@ -833,31 +969,28 @@ CONFIG_RTC=y # # Ftape, the floppy tape device driver # -# CONFIG_FTAPE is not set CONFIG_AGP=y # CONFIG_AGP_ALI is not set # CONFIG_AGP_ATI is not set # CONFIG_AGP_AMD is not set -# CONFIG_AGP_AMD64 is not set -# CONFIG_AGP_INTEL is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y # CONFIG_AGP_NVIDIA is not set # CONFIG_AGP_SIS is not set # CONFIG_AGP_SWORKS is not set -CONFIG_AGP_VIA=y +# CONFIG_AGP_VIA is not set # CONFIG_AGP_EFFICEON is not set -CONFIG_DRM=y -# CONFIG_DRM_TDFX is not set -# CONFIG_DRM_R128 is not set -CONFIG_DRM_RADEON=y -# CONFIG_DRM_MGA is not set -# CONFIG_DRM_SIS is not set -# CONFIG_DRM_VIA is not set -# CONFIG_DRM_SAVAGE is not set +# CONFIG_DRM is not set # CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set # CONFIG_CS5535_GPIO is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_HPET is not set -# CONFIG_HANGCHECK_TIMER is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +CONFIG_HANGCHECK_TIMER=y # # TPM devices @@ -868,59 +1001,7 @@ CONFIG_DRM_RADEON=y # # I2C support # -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y - -# -# I2C Algorithms -# -CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_ALGOPCF is not set -# CONFIG_I2C_ALGOPCA is not set - -# -# I2C Hardware Bus support -# -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI1563 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set -# CONFIG_I2C_I801 is not set -# CONFIG_I2C_I810 is not set -# CONFIG_I2C_PIIX4 is not set -CONFIG_I2C_ISA=y -# CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_PARPORT is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -# CONFIG_SCx200_ACB is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_SIS630 is not set -# CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_VIA is not set -CONFIG_I2C_VIAPRO=y -# CONFIG_I2C_VOODOO3 is not set -# CONFIG_I2C_PCA_ISA is not set - -# -# Miscellaneous I2C Chip support -# -# CONFIG_SENSORS_DS1337 is not set -# CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set -# CONFIG_SENSORS_PCF8574 is not set -# CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_RTC8564 is not set -# CONFIG_SENSORS_MAX6875 is not set -# CONFIG_RTC_X1205_I2C is not set -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_I2C is not set # # SPI support @@ -931,51 +1012,12 @@ CONFIG_I2C_VIAPRO=y # # Dallas's 1-wire bus # -# CONFIG_W1 is not set # # Hardware Monitoring support # -CONFIG_HWMON=y -CONFIG_HWMON_VID=y -# CONFIG_SENSORS_ADM1021 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM1026 is not set -# CONFIG_SENSORS_ADM1031 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_ASB100 is not set -# CONFIG_SENSORS_ATXP1 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_FSCHER is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -CONFIG_SENSORS_IT87=y -# CONFIG_SENSORS_LM63 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM77 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -# CONFIG_SENSORS_LM83 is not set -# CONFIG_SENSORS_LM85 is not set -# CONFIG_SENSORS_LM87 is not set -# CONFIG_SENSORS_LM90 is not set -# CONFIG_SENSORS_LM92 is not set -# CONFIG_SENSORS_MAX1619 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83781D is not set -# CONFIG_SENSORS_W83792D is not set -# CONFIG_SENSORS_W83L785TS is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_SENSORS_HDAPS is not set -# CONFIG_HWMON_DEBUG_CHIP is not set +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices @@ -983,117 +1025,31 @@ CONFIG_SENSORS_IT87=y # CONFIG_IBM_ASM is not set # -# Multimedia Capabilities Port drivers -# - -# # Multimedia devices # -CONFIG_VIDEO_DEV=y - -# -# Video For Linux -# - -# -# Video Adapters -# -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_BT848 is not set -# CONFIG_VIDEO_BWQCAM is not set -# CONFIG_VIDEO_CQCAM is not set -# CONFIG_VIDEO_W9966 is not set -# CONFIG_VIDEO_CPIA is not set -# CONFIG_VIDEO_SAA5246A is not set -# CONFIG_VIDEO_SAA5249 is not set -# CONFIG_TUNER_3036 is not set -# CONFIG_VIDEO_STRADIS is not set -# CONFIG_VIDEO_ZORAN is not set -CONFIG_VIDEO_SAA7134=y -# CONFIG_VIDEO_SAA7134_ALSA is not set -# CONFIG_VIDEO_MXB is not set -# CONFIG_VIDEO_DPC is not set -# CONFIG_VIDEO_HEXIUM_ORION is not set -# CONFIG_VIDEO_HEXIUM_GEMINI is not set -# CONFIG_VIDEO_CX88 is not set -# CONFIG_VIDEO_EM28XX is not set -# CONFIG_VIDEO_OVCAMCHIP is not set -# CONFIG_VIDEO_AUDIO_DECODER is not set -# CONFIG_VIDEO_DECODER is not set - -# -# Radio Adapters -# -# CONFIG_RADIO_GEMTEK_PCI is not set -# CONFIG_RADIO_MAXIRADIO is not set -# CONFIG_RADIO_MAESTRO is not set +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices # # CONFIG_DVB is not set -CONFIG_VIDEO_TUNER=y -CONFIG_VIDEO_BUF=y -CONFIG_VIDEO_IR=y +# CONFIG_USB_DABUSB is not set # # Graphics support # -CONFIG_FB=y -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_MACMODES is not set -CONFIG_FB_MODE_HELPERS=y -# CONFIG_FB_TILEBLITTING is not set -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_VESA is not set -CONFIG_VIDEO_SELECT=y -# CONFIG_FB_HGA is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON_OLD is not set -CONFIG_FB_RADEON=y -CONFIG_FB_RADEON_I2C=y -# CONFIG_FB_RADEON_DEBUG is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_CYBLA is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_GEODE is not set -# CONFIG_FB_VIRTUAL is not set +CONFIG_FIRMWARE_EDID=y +# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Logo configuration -# -# CONFIG_LOGO is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # @@ -1104,97 +1060,30 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -CONFIG_SND=y -CONFIG_SND_TIMER=y -CONFIG_SND_PCM=y -CONFIG_SND_RAWMIDI=y -CONFIG_SND_SEQUENCER=y -# CONFIG_SND_SEQ_DUMMY is not set -# CONFIG_SND_MIXER_OSS is not set -# CONFIG_SND_PCM_OSS is not set -# CONFIG_SND_SEQUENCER_OSS is not set -CONFIG_SND_RTCTIMER=y -CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y -# CONFIG_SND_DYNAMIC_MINORS is not set -# CONFIG_SND_SUPPORT_OLD_API is not set -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set - -# -# Generic devices -# -CONFIG_SND_MPU401_UART=y -CONFIG_SND_AC97_CODEC=y -CONFIG_SND_AC97_BUS=y -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set - -# -# PCI devices -# -# CONFIG_SND_AD1889 is not set -# CONFIG_SND_ALS4000 is not set -# CONFIG_SND_ALI5451 is not set -# CONFIG_SND_ATIIXP is not set -# CONFIG_SND_ATIIXP_MODEM is not set -# CONFIG_SND_AU8810 is not set -# CONFIG_SND_AU8820 is not set -# CONFIG_SND_AU8830 is not set -# CONFIG_SND_AZT3328 is not set -# CONFIG_SND_BT87X is not set -# CONFIG_SND_CA0106 is not set -# CONFIG_SND_CMIPCI is not set -# CONFIG_SND_CS4281 is not set -# CONFIG_SND_CS46XX is not set -# CONFIG_SND_CS5535AUDIO is not set -# CONFIG_SND_EMU10K1 is not set -# CONFIG_SND_EMU10K1X is not set -# CONFIG_SND_ENS1370 is not set -# CONFIG_SND_ENS1371 is not set -# CONFIG_SND_ES1938 is not set -# CONFIG_SND_ES1968 is not set -# CONFIG_SND_FM801 is not set -# CONFIG_SND_HDA_INTEL is not set -# CONFIG_SND_HDSP is not set -# CONFIG_SND_HDSPM is not set -# CONFIG_SND_ICE1712 is not set -# CONFIG_SND_ICE1724 is not set -# CONFIG_SND_INTEL8X0 is not set -# CONFIG_SND_INTEL8X0M is not set -# CONFIG_SND_KORG1212 is not set -# CONFIG_SND_MAESTRO3 is not set -# CONFIG_SND_MIXART is not set -# CONFIG_SND_NM256 is not set -# CONFIG_SND_PCXHR is not set -# CONFIG_SND_RME32 is not set -# CONFIG_SND_RME96 is not set -# CONFIG_SND_RME9652 is not set -# CONFIG_SND_SONICVIBES is not set -# CONFIG_SND_TRIDENT is not set -CONFIG_SND_VIA82XX=y -# CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VX222 is not set -# CONFIG_SND_YMFPCI is not set - -# -# USB devices -# -# CONFIG_SND_USB_AUDIO is not set -# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND is not set # # Open Sound System # -# CONFIG_SOUND_PRIME is not set +CONFIG_SOUND_PRIME=y +CONFIG_OSS_OBSOLETE_DRIVER=y +# CONFIG_SOUND_BT878 is not set +# CONFIG_SOUND_EMU10K1 is not set +# CONFIG_SOUND_FUSION is not set +# CONFIG_SOUND_ES1371 is not set +CONFIG_SOUND_ICH=y +# CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +# CONFIG_SOUND_VIA82CXXX is not set +# CONFIG_SOUND_OSS is not set # # USB support # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -1213,17 +1102,19 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_OHCI_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_ACM is not set -# CONFIG_USB_PRINTER is not set +CONFIG_USB_PRINTER=y # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -1248,21 +1139,17 @@ CONFIG_USB_STORAGE=y # # USB Input Devices # -# CONFIG_USB_HID is not set - -# -# USB HID Boot Protocol drivers -# -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set # CONFIG_USB_AIPTEK is not set # CONFIG_USB_WACOM is not set # CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set -# CONFIG_USB_MTOUCH is not set -# CONFIG_USB_ITMTOUCH is not set -# CONFIG_USB_EGALAX is not set +# CONFIG_USB_TOUCHSCREEN is not set # CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set @@ -1277,21 +1164,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_MICROTEK is not set # -# USB Multimedia devices -# -# CONFIG_USB_DABUSB is not set -# CONFIG_USB_VICAM is not set -# CONFIG_USB_DSBR is not set -# CONFIG_USB_ET61X251 is not set -# CONFIG_USB_IBMCAM is not set -# CONFIG_USB_KONICAWC is not set -# CONFIG_USB_OV511 is not set -# CONFIG_USB_SE401 is not set -# CONFIG_USB_SN9C102 is not set -# CONFIG_USB_STV680 is not set -# CONFIG_USB_PWC is not set - -# # USB Network Adapters # # CONFIG_USB_CATC is not set @@ -1299,12 +1171,11 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set -# CONFIG_USB_MON is not set +CONFIG_USB_MON=y # # USB port drivers # -# CONFIG_USB_USS720 is not set # # USB Serial Converter support @@ -1321,10 +1192,12 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set # CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_APPLEDISPLAY is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set # CONFIG_USB_TEST is not set @@ -1344,56 +1217,96 @@ CONFIG_USB_STORAGE=y # CONFIG_MMC is not set # +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + +# # InfiniBand support # # CONFIG_INFINIBAND is not set # -# SN Devices +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# +# CONFIG_EDAC is not set + +# +# Real Time Clock # +# CONFIG_RTC_CLASS is not set # -# EDAC - error detection and reporting (RAS) +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices # -# CONFIG_EDAC is not set # # File systems # CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set # CONFIG_EXT2_FS_XIP is not set -# CONFIG_EXT3_FS is not set -# CONFIG_REISERFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +# CONFIG_REISERFS_FS_SECURITY is not set # CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set +CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set -# CONFIG_INOTIFY is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set +CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_ZISOFS_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set # CONFIG_UDF_FS is not set # # DOS/FAT/NT Filesystems # CONFIG_FAT_FS=y -# CONFIG_MSDOS_FS is not set +CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y -CONFIG_FAT_DEFAULT_CODEPAGE=850 +CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_NTFS_FS is not set @@ -1404,10 +1317,9 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y CONFIG_TMPFS=y -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set # CONFIG_CONFIGFS_FS is not set # @@ -1430,13 +1342,26 @@ CONFIG_RAMFS=y # # Network File Systems # -# CONFIG_NFS_FS is not set -# CONFIG_NFSD is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set -CONFIG_CIFS=y -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_XATTR is not set -# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -1445,33 +1370,18 @@ CONFIG_CIFS=y # # Partition Types # -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set +# CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -# CONFIG_EFI_PARTITION is not set # # Native Language Support # CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-15" -# CONFIG_NLS_CODEPAGE_437 is not set +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=y +# CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set @@ -1491,7 +1401,7 @@ CONFIG_NLS_CODEPAGE_850=y # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set -# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set @@ -1510,20 +1420,51 @@ CONFIG_NLS_UTF8=y # # Instrumentation Support # -# CONFIG_PROFILING is not set -# CONFIG_KPROBES is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_FRAME_POINTER is not set +CONFIG_UNWIND_INFO=y +CONFIG_STACK_UNWIND=y +# CONFIG_FORCED_INLINING is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y # # Security options @@ -1537,10 +1478,6 @@ CONFIG_X86_MPPARSE=y # CONFIG_CRYPTO is not set # -# Hardware crypto devices -# - -# # Library routines # # CONFIG_CRC_CCITT is not set @@ -1548,7 +1485,12 @@ CONFIG_X86_MPPARSE=y CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y CONFIG_KTIME_SCALAR=y diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 5427a842e841..1a884b6e6e5c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -4,7 +4,7 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o @@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ $(call if_changed,syscall) k8-y += ../../x86_64/kernel/k8.o +stacktrace-y += ../../x86_64/kernel/stacktrace.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 7e9ac99354f4..7f7be01f44e6 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_ACPI) += boot.o +ifneq ($(CONFIG_PCI),) obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o +endif obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index ee003bc0e8b1..92f79cdd9a48 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -26,9 +26,12 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/efi.h> +#include <linux/cpumask.h> #include <linux/module.h> #include <linux/dmi.h> #include <linux/irq.h> +#include <linux/bootmem.h> +#include <linux/ioport.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -36,11 +39,17 @@ #include <asm/io.h> #include <asm/mpspec.h> -#ifdef CONFIG_X86_64 +static int __initdata acpi_force = 0; + +#ifdef CONFIG_ACPI +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); -extern void __init clustered_apic_check(void); +#ifdef CONFIG_X86_64 -extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } @@ -53,8 +62,6 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return #include <mach_mpparse.h> #endif /* CONFIG_X86_LOCAL_APIC */ -static inline int gsi_irq_sharing(int gsi) { return gsi; } - #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -459,12 +466,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { -#ifdef CONFIG_X86_IO_APIC - if (use_pci_vector() && !platform_legacy_irq(gsi)) - *irq = IO_APIC_VECTOR(gsi); - else -#endif - *irq = gsi_irq_sharing(gsi); + *irq = gsi; return 0; } @@ -506,16 +508,76 @@ EXPORT_SYMBOL(acpi_register_gsi); #ifdef CONFIG_ACPI_HOTPLUG_CPU int acpi_map_lsapic(acpi_handle handle, int *pcpu) { - /* TBD */ - return -EINVAL; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_table_lapic *lapic; + cpumask_t tmp_map, new_map; + u8 physid; + int cpu; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return -EINVAL; + + if (!buffer.length || !buffer.pointer) + return -EINVAL; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*lapic)) { + kfree(buffer.pointer); + return -EINVAL; + } + + lapic = (struct acpi_table_lapic *)obj->buffer.pointer; + + if ((lapic->header.type != ACPI_MADT_LAPIC) || + (!lapic->flags.enabled)) { + kfree(buffer.pointer); + return -EINVAL; + } + + physid = lapic->id; + + kfree(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + tmp_map = cpu_present_map; + mp_register_lapic(physid, lapic->flags.enabled); + + /* + * If mp_register_lapic successfully generates a new logical cpu + * number, then the following will get us exactly what was mapped + */ + cpus_andnot(new_map, cpu_present_map, tmp_map); + if (cpus_empty(new_map)) { + printk ("Unable to map lapic to logical cpu number\n"); + return -EINVAL; + } + + cpu = first_cpu(new_map); + + *pcpu = cpu; + return 0; } EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { - /* TBD */ - return -EINVAL; + int i; + + for_each_possible_cpu(i) { + if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) { + x86_acpiid_to_apicid[i] = -1; + break; + } + } + x86_cpu_to_apicid[cpu] = -1; + cpu_clear(cpu, cpu_present_map); + num_processors--; + + return (0); } EXPORT_SYMBOL(acpi_unmap_lsapic); @@ -579,6 +641,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { struct acpi_table_hpet *hpet_tbl; + struct resource *hpet_res; + resource_size_t res_start; if (!phys || !size) return -EINVAL; @@ -594,12 +658,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) "memory.\n"); return -1; } + +#define HPET_RESOURCE_NAME_SIZE 9 + hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + if (hpet_res) { + memset(hpet_res, 0, sizeof(*hpet_res)); + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, + "HPET %u", hpet_tbl->number); + hpet_res->end = (1 * 1024) - 1; + } + #ifdef CONFIG_X86_64 vxtime.hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, vxtime.hpet_address); + + res_start = vxtime.hpet_address; #else /* X86 */ { extern unsigned long hpet_address; @@ -607,9 +685,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) hpet_address = hpet_tbl->addr.addrl; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); + + res_start = hpet_address; } #endif /* X86 */ + if (hpet_res) { + hpet_res->start = res_start; + hpet_res->end += res_start; + insert_resource(&iomem_resource, hpet_res); + } + return 0; } #else @@ -860,8 +946,6 @@ static void __init acpi_process_madt(void) return; } -extern int acpi_force; - #ifdef __i386__ static int __init disable_acpi_irq(struct dmi_system_id *d) @@ -1163,3 +1247,75 @@ int __init acpi_boot_init(void) return 0; } + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags.trigger = 1; + else if (!strcmp(s, "level")) + acpi_sci_flags.trigger = 3; + else if (!strcmp(s, "high")) + acpi_sci_flags.polarity = 1; + else if (!strcmp(s, "low")) + acpi_sci_flags.polarity = 3; + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 1649a175a206..fe799b11ac0a 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -48,7 +48,11 @@ void __init check_acpi_pci(void) int num, slot, func; /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. */ + always read ffffffff and should not have any side effect. + Actually a few buggy systems can machine check. Allow the user + to disable it by command line option at least -AK */ + if (!early_pci_allowed()) + return; /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8c844d07862f..90faae5c5d30 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi; /* * Knob to control our willingness to enable the local APIC. */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ +static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + +static inline void lapic_disable(void) +{ + enable_local_apic = -1; + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); +} + +static inline void lapic_enable(void) +{ + enable_local_apic = 1; +} /* * Debug level @@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void) printk("No ESR for 82489DX.\n"); } - if (nmi_watchdog == NMI_LOCAL_APIC) - setup_apic_nmi_watchdog(); + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void) return 0; } + +static int __init parse_lapic(char *arg) +{ + lapic_enable(); + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + lapic_disable(); + return 0; +} +early_param("nolapic", parse_nolapic); + diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index ff9ce4b5eaa8..b42f2d914af3 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -225,6 +225,7 @@ #include <linux/smp_lock.h> #include <linux/dmi.h> #include <linux/suspend.h> +#include <linux/kthread.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -402,8 +403,6 @@ static int realmode_power_off = 1; #else static int realmode_power_off; #endif -static int exit_kapmd __read_mostly; -static int kapmd_running __read_mostly; #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else @@ -419,6 +418,8 @@ static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; static const char driver_version[] = "1.16ac"; /* no spaces */ +static struct task_struct *kapmd_task; + /* * APM event names taken from the APM 1.2 specification. These are * the message codes that the BIOS uses to tell us about events @@ -1423,7 +1424,7 @@ static void apm_mainloop(void) set_current_state(TASK_INTERRUPTIBLE); for (;;) { schedule_timeout(APM_CHECK_TIMEOUT); - if (exit_kapmd) + if (kthread_should_stop()) break; /* * Ok, check all events, check for idle (and mark us sleeping @@ -1706,12 +1707,6 @@ static int apm(void *unused) char * power_stat; char * bat_stat; - kapmd_running = 1; - - daemonize("kapmd"); - - current->flags |= PF_NOFREEZE; - #ifdef CONFIG_SMP /* 2002/08/01 - WT * This is to avoid random crashes at boot time during initialization @@ -1821,7 +1816,6 @@ static int apm(void *unused) console_blank_hook = NULL; #endif } - kapmd_running = 0; return 0; } @@ -2220,7 +2214,7 @@ static int __init apm_init(void) { struct proc_dir_entry *apm_proc; struct desc_struct *gdt; - int ret; + int err; dmi_check_system(apm_dmi_table); @@ -2329,12 +2323,17 @@ static int __init apm_init(void) if (apm_proc) apm_proc->owner = THIS_MODULE; - ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); - if (ret < 0) { - printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); + kapmd_task = kthread_create(apm, NULL, "kapmd"); + if (IS_ERR(kapmd_task)) { + printk(KERN_ERR "apm: disabled - Unable to start kernel " + "thread.\n"); + err = PTR_ERR(kapmd_task); + kapmd_task = NULL; remove_proc_entry("apm", NULL); - return -ENOMEM; + return err; } + kapmd_task->flags |= PF_NOFREEZE; + wake_up_process(kapmd_task); if (num_online_cpus() > 1 && !smp ) { printk(KERN_NOTICE @@ -2384,9 +2383,10 @@ static void __exit apm_exit(void) remove_proc_entry("apm", NULL); if (power_off) pm_power_off = NULL; - exit_kapmd = 1; - while (kapmd_running) - schedule(); + if (kapmd_task) { + kthread_stop(kapmd_task); + kapmd_task = NULL; + } #ifdef CONFIG_PM_LEGACY pm_active = 0; #endif diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e6a2d6b80cda..e4758095d87a 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -22,7 +22,7 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -static void __init init_amd(struct cpuinfo_x86 *c) +static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); @@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } -static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { @@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev amd_cpu_dev __initdata = { +static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, .c_models = { @@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = { }, }, .c_init = init_amd, - .c_identify = generic_identify, .c_size_cache = amd_size_cache, }; diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index bd75629dd262..8c25047975c0 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_OOSTORE -static u32 __init power2(u32 x) +static u32 __cpuinit power2(u32 x) { u32 s=1; while(s<=x) @@ -22,7 +22,7 @@ static u32 __init power2(u32 x) * Set up an actual MCR */ -static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) +static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) { u32 lo, hi; @@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) * Shortcut: We know you can't put 4Gig of RAM on a winchip */ -static u32 __init ramtop(void) /* 16388 */ +static u32 __cpuinit ramtop(void) /* 16388 */ { int i; u32 top = 0; @@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */ * Compute a set of MCR's to give maximum coverage */ -static int __init centaur_mcr_compute(int nr, int key) +static int __cpuinit centaur_mcr_compute(int nr, int key) { u32 mem = ramtop(); u32 root = power2(mem); @@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key) return ct; } -static void __init centaur_create_optimal_mcr(void) +static void __cpuinit centaur_create_optimal_mcr(void) { int i; /* @@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void) wrmsr(MSR_IDT_MCR0+i, 0, 0); } -static void __init winchip2_create_optimal_mcr(void) +static void __cpuinit winchip2_create_optimal_mcr(void) { u32 lo, hi; int i; @@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void) * Handle the MCR key on the Winchip 2. */ -static void __init winchip2_unprotect_mcr(void) +static void __cpuinit winchip2_unprotect_mcr(void) { u32 lo, hi; u32 key; @@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void) wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } -static void __init winchip2_protect_mcr(void) +static void __cpuinit winchip2_protect_mcr(void) { u32 lo, hi; @@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void) #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ -static void __init init_c3(struct cpuinfo_x86 *c) +static void __cpuinit init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c) display_cacheinfo(c); } -static void __init init_centaur(struct cpuinfo_x86 *c) +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { enum { ECX8=1<<1, @@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c) } } -static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) @@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size return size; } -static struct cpu_dev centaur_cpu_dev __initdata = { +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_init = init_centaur, diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 70c87de582c7..d9f3e3c31f05 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; extern int disable_pse; -static void default_init(struct cpuinfo_x86 * c) +static void __cpuinit default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c) } } -static struct cpu_dev default_cpu = { +static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; @@ -184,7 +184,16 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) static int __init x86_fxsr_setup(char * s) { + /* Tell all the other CPU's to not use it... */ disable_x86_fxsr = 1; + + /* + * ... and clear the bits early in the boot_cpu_data + * so that the bootup process doesn't try to do this + * either. + */ + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); return 1; } __setup("nofxsr", x86_fxsr_setup); @@ -265,7 +274,7 @@ static void __init early_cpu_detect(void) } } -void __cpuinit generic_identify(struct cpuinfo_x86 * c) +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int ebx; @@ -660,8 +669,7 @@ old_gdt: */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - if (current->mm) - BUG(); + BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); load_esp0(t, thread); @@ -675,7 +683,7 @@ old_gdt: #endif /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h index 5a1d4f163e84..2f6432cef6ff 100644 --- a/arch/i386/kernel/cpu/cpu.h +++ b/arch/i386/kernel/cpu/cpu.h @@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); -extern void generic_identify(struct cpuinfo_x86 * c); - extern void early_intel_workaround(struct cpuinfo_x86 *c); diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index ea19d091fd41..57c880bf0bd6 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -396,13 +396,13 @@ static int acpi_cpufreq_early_init_acpi(void) */ static int bios_with_sw_any_bug; -static int __init sw_any_bug_found(struct dmi_system_id *d) +static int sw_any_bug_found(struct dmi_system_id *d) { bios_with_sw_any_bug = 1; return 0; } -static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = { +static struct dmi_system_id sw_any_bug_dmi_table[] = { { .callback = sw_any_bug_found, .ident = "Supermicro Server X6DLP", @@ -597,7 +597,6 @@ static struct cpufreq_driver acpi_cpufreq_driver = { .name = "acpi-cpufreq", .owner = THIS_MODULE, .attr = acpi_cpufreq_attr, - .flags = CPUFREQ_STICKY, }; @@ -608,7 +607,7 @@ acpi_cpufreq_init (void) acpi_cpufreq_early_init_acpi(); - return cpufreq_register_driver(&acpi_cpufreq_driver); + return cpufreq_register_driver(&acpi_cpufreq_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index f5cc9f5c9bab..7233abe5d695 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -178,11 +178,17 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) safe_halt(); /* Change frequency on next halt or sleep */ wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_fadt.xpm_tmr_blk.address); + if (port22_en) { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C1 */ + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_fadt.xpm_tmr_blk.address); + } /* Disable bus ratio bit */ local_irq_disable(); @@ -567,16 +573,23 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle, static int enable_arbiter_disable(void) { struct pci_dev *dev; + int reg; u8 pci_cmd; /* Find PLE133 host bridge */ + reg = 0x78; dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL); + /* Find CLE266 host bridge */ + if (dev == NULL) { + reg = 0x76; + dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_862X_0, NULL); + } if (dev != NULL) { /* Enable access to port 0x22 */ - pci_read_config_byte(dev, 0x78, &pci_cmd); + pci_read_config_byte(dev, reg, &pci_cmd); if ( !(pci_cmd & 1<<7) ) { pci_cmd |= 1<<7; - pci_write_config_byte(dev, 0x78, pci_cmd); + pci_write_config_byte(dev, reg, pci_cmd); } return 1; } @@ -680,20 +693,25 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) if (longhaul_version == TYPE_POWERSAVER) { /* Check ACPI support for C3 state */ cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address == 0 || - (cx->latency > 1000 && ignore_latency == 0) ) + if (cx->address > 0 && + (cx->latency <= 1000 || ignore_latency != 0) ) { + goto print_support_type; + } + } + /* Check ACPI support for bus master arbiter disable */ + if (!pr->flags.bm_control) { + if (enable_arbiter_disable()) { + port22_en = 1; + } else { goto err_acpi; - - } else { - /* Check ACPI support for bus master arbiter disable */ - if (!pr->flags.bm_control) { - if (!enable_arbiter_disable()) { - printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n"); - return -ENODEV; - } else - port22_en = 1; } } +print_support_type: + if (!port22_en) { + printk (KERN_INFO PFX "Using ACPI support.\n"); + } else { + printk (KERN_INFO PFX "Using northbridge support.\n"); + } ret = longhaul_get_ranges(); if (ret != 0) @@ -716,7 +734,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) return 0; err_acpi: - printk(KERN_ERR PFX "No ACPI support for CPU frequency changes.\n"); + printk(KERN_ERR PFX "No ACPI support. No VT8601 or VT8623 northbridge. Aborting.\n"); return -ENODEV; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 7a9325349e94..e8993baf3d14 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -386,7 +386,7 @@ static int centrino_cpu_early_init_acpi(void) * than OS intended it to run at. Detect it and handle it cleanly. */ static int bios_with_sw_any_bug; -static int __init sw_any_bug_found(struct dmi_system_id *d) +static int sw_any_bug_found(struct dmi_system_id *d) { bios_with_sw_any_bug = 1; return 0; diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index f03b7f94c304..c0c3b59de32c 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -12,7 +12,7 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; unsigned long flags; @@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) * Actually since bugs.h doesn't even reference this perhaps someone should * fix the documentation ??? */ -static unsigned char Cx86_dir0_msb __initdata = 0; +static unsigned char Cx86_dir0_msb __cpuinitdata = 0; -static char Cx86_model[][9] __initdata = { +static char Cx86_model[][9] __cpuinitdata = { "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", "M II ", "Unknown" }; -static char Cx486_name[][5] __initdata = { +static char Cx486_name[][5] __cpuinitdata = { "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", "SRx2", "DRx2" }; -static char Cx486S_name[][4] __initdata = { +static char Cx486S_name[][4] __cpuinitdata = { "S", "S2", "Se", "S2e" }; -static char Cx486D_name[][4] __initdata = { +static char Cx486D_name[][4] __cpuinitdata = { "DX", "DX2", "?", "?", "?", "DX4" }; -static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __initdata = "12??43"; -static char cyrix_model_mult2[] __initdata = "12233445"; +static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __cpuinitdata = "12??43"; +static char cyrix_model_mult2[] __cpuinitdata = "12233445"; /* * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old @@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445"; extern void calibrate_delay(void) __init; -static void __init check_cx686_slop(struct cpuinfo_x86 *c) +static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) { unsigned long flags; @@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c) } -static void __init set_cx86_reorder(void) +static void __cpuinit set_cx86_reorder(void) { u8 ccr3; @@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void) setCx86(CX86_CCR3, ccr3); } -static void __init set_cx86_memwb(void) +static void __cpuinit set_cx86_memwb(void) { u32 cr0; @@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } -static void __init set_cx86_inc(void) +static void __cpuinit set_cx86_inc(void) { unsigned char ccr3; @@ -158,7 +158,7 @@ static void __init set_cx86_inc(void) * Configure later MediaGX and/or Geode processor. */ -static void __init geode_configure(void) +static void __cpuinit geode_configure(void) { unsigned long flags; u8 ccr3, ccr4; @@ -184,14 +184,14 @@ static void __init geode_configure(void) #ifdef CONFIG_PCI -static struct pci_device_id __initdata cyrix_55x0[] = { +static struct pci_device_id __cpuinitdata cyrix_55x0[] = { { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, { }, }; #endif -static void __init init_cyrix(struct cpuinfo_x86 *c) +static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; char *buf = c->x86_model_id; @@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) /* * Handle National Semiconductor branded processors */ -static void __init init_nsc(struct cpuinfo_x86 *c) +static void __cpuinit init_nsc(struct cpuinfo_x86 *c) { /* There may be GX1 processors in the wild that are branded * NSC and not Cyrix. @@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void) return (unsigned char) (test >> 8) == 0x02; } -static void cyrix_identify(struct cpuinfo_x86 * c) +static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) { /* Detect Cyrix with disabled CPUID */ if ( c->x86 == 4 && test_cyrix_52div() ) { @@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c) local_irq_restore(flags); } } - generic_identify(c); } -static struct cpu_dev cyrix_cpu_dev __initdata = { +static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, .c_init = init_cyrix, @@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void) late_initcall(cyrix_exit_cpu); -static struct cpu_dev nsc_cpu_dev __initdata = { +static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, - .c_identify = generic_identify, }; int __init nsc_init_cpu(void) diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5a2e270924b1..94a95aa5227e 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } -static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* Intel PIII Tualatin. This comes in two flavours. * One has 256kb of cache, the other 512. We have no way @@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { }, }, .c_init = init_intel, - .c_identify = generic_identify, .c_size_cache = intel_size_cache, }; diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile index 30808f3d6715..f1ebe1c1c17a 100644 --- a/arch/i386/kernel/cpu/mcheck/Makefile +++ b/arch/i386/kernel/cpu/mcheck/Makefile @@ -1,2 +1,2 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o +obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index b95f1b3d53aa..504434a46011 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -13,6 +13,8 @@ #include <asm/msr.h> #include <asm/apic.h> +#include <asm/therm_throt.h> + #include "mce.h" /* as supported by the P4/Xeon family */ @@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) /* P4/Xeon Thermal transition interrupt handler */ static void intel_thermal_interrupt(struct pt_regs *regs) { - u32 l, h; - unsigned int cpu = smp_processor_id(); - static unsigned long next[NR_CPUS]; + __u64 msr_val; ack_APIC_irq(); - if (time_after(next[cpu], jiffies)) - return; - - next[cpu] = jiffies + HZ*5; - rdmsr(MSR_IA32_THERM_STATUS, l, h); - if (l & 0x1) { - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", - cpu); - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); - } + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + therm_throt_process(msr_val & 0x1); } /* Thermal interrupt handler for this CPU setup */ @@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) rdmsr (MSR_IA32_MISC_ENABLE, l, h); wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - + l = apic_read (APIC_LVTTHMR); apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); return; } #endif /* CONFIG_X86_MCE_P4THERMAL */ diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c new file mode 100644 index 000000000000..4f43047de406 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -0,0 +1,180 @@ +/* + * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c + * + * Thermal throttle event support code (such as syslog messaging and rate + * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). + * This allows consistent reporting of CPU thermal throttle events. + * + * Maintains a counter in /sys that keeps track of the number of thermal + * events, such that the user knows how bad the thermal problem might be + * (since the logging to syslog and mcelog is rate limited). + * + * Author: Dmitriy Zavin (dmitriyz@google.com) + * + * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. + * Inspired by Ross Biro's and Al Borchers' counter code. + */ + +#include <linux/percpu.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <asm/cpu.h> +#include <linux/notifier.h> +#include <asm/therm_throt.h> + +/* How long to wait between reporting thermal events */ +#define CHECK_INTERVAL (300 * HZ) + +static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +atomic_t therm_throt_en = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ +} + +define_therm_throt_sysdev_show_func(count); +define_therm_throt_sysdev_one_ro(count); + +static struct attribute *thermal_throttle_attrs[] = { + &attr_count.attr, + NULL +}; + +static struct attribute_group thermal_throttle_attr_group = { + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" +}; +#endif /* CONFIG_SYSFS */ + +/*** + * therm_throt_process - Process thermal throttling event from interrupt + * @curr: Whether the condition is current or not (boolean), since the + * thermal interrupt normally gets called both when the thermal + * event begins and once the event has ended. + * + * This function is called by the thermal interrupt after the + * IRQ has been acknowledged. + * + * It will take care of rate limiting and printing messages to the syslog. + * + * Returns: 0 : Event should NOT be further logged, i.e. still in + * "timeout" from previous log message. + * 1 : Event should be logged further, and a message has been + * printed to the syslog. + */ +int therm_throt_process(int curr) +{ + unsigned int cpu = smp_processor_id(); + __u64 tmp_jiffs = get_jiffies_64(); + + if (curr) + __get_cpu_var(thermal_throttle_count)++; + + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + return 0; + + __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + + /* if we just entered the thermal event */ + if (curr) { + printk(KERN_CRIT "CPU%d: Temperature above threshold, " + "cpu clock throttled (total events = %lu)\n", cpu, + __get_cpu_var(thermal_throttle_count)); + + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } + + return 1; +} + +#ifdef CONFIG_SYSFS +/* Add/Remove thermal_throttle interface for CPU device */ +static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev) +{ + sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; +} + +/* Mutex protecting device creation against CPU hotplug */ +static DEFINE_MUTEX(therm_cpu_lock); + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + mutex_lock(&therm_cpu_lock); + switch (action) { + case CPU_ONLINE: + thermal_throttle_add_dev(sys_dev); + break; + case CPU_DEAD: + thermal_throttle_remove_dev(sys_dev); + break; + } + mutex_unlock(&therm_cpu_lock); + return NOTIFY_OK; +} + +static struct notifier_block thermal_throttle_cpu_notifier = +{ + .notifier_call = thermal_throttle_cpu_callback, +}; +#endif /* CONFIG_HOTPLUG_CPU */ + +static __init int thermal_throttle_init_device(void) +{ + unsigned int cpu = 0; + + if (!atomic_read(&therm_throt_en)) + return 0; + + register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&therm_cpu_lock); +#endif + /* connect live CPUs to sysfs */ + for_each_online_cpu(cpu) + thermal_throttle_add_dev(get_cpu_sysdev(cpu)); +#ifdef CONFIG_HOTPLUG_CPU + mutex_unlock(&therm_cpu_lock); +#endif + + return 0; +} + +device_initcall(thermal_throttle_init_device); +#endif /* CONFIG_SYSFS */ diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index ad87fa58058d..8bf23cc80c63 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -10,7 +10,7 @@ * to have CPUID. (Thanks to Herbert Oppmann) */ -static int __init deep_magic_nexgen_probe(void) +static int __cpuinit deep_magic_nexgen_probe(void) { int ret; @@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void) return ret; } -static void __init init_nexgen(struct cpuinfo_x86 * c) +static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) { c->x86_cache_size = 256; /* A few had 1 MB... */ } -static void __init nexgen_identify(struct cpuinfo_x86 * c) +static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) { /* Detect NexGen with old hypercode */ if ( deep_magic_nexgen_probe() ) { strcpy(c->x86_vendor_id, "NexGenDriven"); } - generic_identify(c); } -static struct cpu_dev nexgen_cpu_dev __initdata = { +static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { .c_vendor = "Nexgen", .c_ident = { "NexGenDriven" }, .c_models = { diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index f54a15268ed7..76aac088a323 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index d08d5a2811c8..9317f7414989 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c @@ -5,7 +5,7 @@ #include "cpu.h" -static void __init init_rise(struct cpuinfo_x86 *c) +static void __cpuinit init_rise(struct cpuinfo_x86 *c) { printk("CPU: Rise iDragon"); if (c->x86_model > 2) @@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CX8, c->x86_capability); } -static struct cpu_dev rise_cpu_dev __initdata = { +static struct cpu_dev rise_cpu_dev __cpuinitdata = { .c_vendor = "Rise", .c_ident = { "RiseRiseRise" }, .c_models = { diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 7214c9b577ab..4056fb7d2cdf 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -5,7 +5,7 @@ #include <asm/msr.h> #include "cpu.h" -static void __init init_transmeta(struct cpuinfo_x86 *c) +static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; unsigned int cms_rev1, cms_rev2; @@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __init transmeta_identify(struct cpuinfo_x86 * c) +static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) { u32 xlvl; - generic_identify(c); /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); @@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c) } } -static struct cpu_dev transmeta_cpu_dev __initdata = { +static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 2cd988f6dc55..1bf3f87e9c5b 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c @@ -5,12 +5,8 @@ /* UMC chips appear to be only either 386 or 486, so no special init takes place. */ -static void __init init_umc(struct cpuinfo_x86 * c) -{ - -} -static struct cpu_dev umc_cpu_dev __initdata = { +static struct cpu_dev umc_cpu_dev __cpuinitdata = { .c_vendor = "UMC", .c_ident = { "UMC UMC UMC" }, .c_models = { @@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = { } }, }, - .c_init = init_umc, }; int __init umc_init_cpu(void) diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 5b96f038367f..144b43288965 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -22,6 +22,9 @@ #include <asm/nmi.h> #include <asm/hw_irq.h> #include <asm/apic.h> +#include <asm/kdebug.h> +#include <asm/smp.h> + #include <mach_ipi.h> @@ -86,23 +89,32 @@ static void crash_save_self(struct pt_regs *regs) { int cpu; - cpu = smp_processor_id(); + cpu = safe_smp_processor_id(); crash_save_this_cpu(regs, cpu); } #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct pt_regs *regs, int cpu) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { + struct pt_regs *regs; struct pt_regs fixed_regs; + int cpu; + + if (val != DIE_NMI_IPI) + return NOTIFY_OK; + + regs = ((struct die_args *)data)->regs; + cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. * Otherwise, system will completely hang. Crashing cpu can get * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return 1; + return NOTIFY_STOP; local_irq_disable(); if (!user_mode_vm(regs)) { @@ -122,16 +134,24 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + cpumask_t mask = cpu_online_map; + cpu_clear(safe_smp_processor_id(), mask); + if (!cpus_empty(mask)) + send_IPI_mask(mask, NMI_VECTOR); } +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, +}; + static void nmi_shootdown_cpus(void) { unsigned long msecs; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - set_nmi_callback(crash_nmi_callback); + if (register_die_notifier(&crash_nmi_nb)) + return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI */ @@ -169,7 +189,7 @@ void machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = smp_processor_id(); + crashing_cpu = safe_smp_processor_id(); nmi_shootdown_cpus(); lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index fe158042110b..8b40648d0ef0 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -65,7 +65,7 @@ static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); static pgd_t efi_bak_pg_dir_pointer[2]; -static void efi_call_phys_prelog(void) +static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) { unsigned long cr4; unsigned long temp; @@ -109,7 +109,7 @@ static void efi_call_phys_prelog(void) load_gdt(cpu_gdt_descr); } -static void efi_call_phys_epilog(void) +static void efi_call_phys_epilog(void) __releases(efi_rt_lock) { unsigned long cr4; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); @@ -498,8 +498,7 @@ void __init efi_enter_virtual_mode(void) check_range_for_systab(md); } - if (!efi.systab) - BUG(); + BUG_ON(!efi.systab); status = phys_efi_set_virtual_address_map( memmap.desc_size * memmap.nr_map, diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 87f9f60b803b..5a63d6fdb70e 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -76,8 +76,15 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 +/* These are replaces for paravirtualization */ +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax + #ifdef CONFIG_PREEMPT -#define preempt_stop cli; TRACE_IRQS_OFF +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF #else #define preempt_stop #define resume_kernel restore_nocheck @@ -176,18 +183,21 @@ VM_MASK = 0x00020000 #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 3*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_EC_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 4*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, OLDESP-EBX;\ /*CFI_OFFSET cs, CS-OLDESP;*/\ CFI_OFFSET eip, EIP-OLDESP;\ @@ -233,10 +243,11 @@ ret_from_intr: check_userspace: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al - testl $(VM_MASK | 3), %eax - jz resume_kernel + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -247,7 +258,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cli + DISABLE_INTERRUPTS cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: @@ -267,6 +278,7 @@ need_resched: # sysenter call handler stub ENTRY(sysenter_entry) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp @@ -275,7 +287,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -320,7 +332,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx @@ -330,8 +342,7 @@ sysenter_past_esp: movl OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON - sti - sysexit + ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC @@ -356,7 +367,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -371,8 +382,8 @@ restore_all: # See comments in process.c:copy_thread() for details. movb OLDSS(%esp), %ah movb CS(%esp), %al - andl $(VM_MASK | (4 << 8) | 3), %eax - cmpl $((4 << 8) | 3), %eax + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: @@ -381,11 +392,11 @@ restore_nocheck_notrace: RESTORE_REGS addl $4, %esp CFI_ADJUST_CFA_OFFSET -4 -1: iret +1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -409,7 +420,7 @@ ldt_ss: * dosemu and wine happy. */ subl $8, %esp # reserve space for switch16 pointer CFI_ADJUST_CFA_OFFSET 8 - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, @@ -419,7 +430,7 @@ ldt_ss: TRACE_IRQS_IRET RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack -1: iret +1: INTERRUPT_RETURN .section __ex_table,"a" .align 4 .long 1b,iret_exc @@ -434,7 +445,7 @@ work_pending: jz work_notifysig work_resched: call schedule - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -490,7 +501,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - sti # could let do_syscall_trace() call + ENABLE_INTERRUPTS # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -591,11 +602,9 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: @@ -645,6 +654,7 @@ error_code: call *%edi jmp ret_from_exception CFI_ENDPROC +KPROBE_END(page_fault) ENTRY(coprocessor_error) RING0_INT_FRAME @@ -669,7 +679,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - movl %cr0, %eax + GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate preempt_stop @@ -702,9 +712,15 @@ device_not_available_emulate: jne ok; \ label: \ movl TSS_sysenter_esp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl $__KERNEL_CS; \ - pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 KPROBE_ENTRY(debug) RING0_INT_FRAME @@ -720,7 +736,8 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(debug) + /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -729,7 +746,7 @@ debug_stack_correct: * check whether we got an NMI on the debug path where the debug * fault happened on the sysenter path. */ -ENTRY(nmi) +KPROBE_ENTRY(nmi) RING0_INT_FRAME pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -754,6 +771,7 @@ ENTRY(nmi) cmpl $sysenter_entry,12(%esp) je nmi_debug_stack_check nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL @@ -764,9 +782,12 @@ nmi_stack_correct: CFI_ENDPROC nmi_stack_fixup: + RING0_INT_FRAME FIX_STACK(12,nmi_stack_correct, 1) jmp nmi_stack_correct + nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -777,8 +798,10 @@ nmi_debug_stack_check: jmp nmi_stack_correct nmi_16bit_stack: - RING0_INT_FRAME - /* create the pointer to lss back */ + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ pushl %ss CFI_ADJUST_CFA_OFFSET 4 pushl %esp @@ -799,12 +822,13 @@ nmi_16bit_stack: call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack -1: iret +1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" .align 4 .long 1b,iret_exc .previous +KPROBE_END(nmi) KPROBE_ENTRY(int3) RING0_INT_FRAME @@ -816,7 +840,7 @@ KPROBE_ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(int3) ENTRY(overflow) RING0_INT_FRAME @@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) RING0_EC_FRAME @@ -890,13 +914,14 @@ ENTRY(alignment_check) jmp error_code CFI_ENDPROC -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) @@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running) ENDPROC(arch_unwind_init_running) #endif +ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder + CFI_STARTPROC + movl %edx,%eax + push %edx + CFI_ADJUST_CFA_OFFSET 4 + call *%ebx + push %eax + CFI_ADJUST_CFA_OFFSET 4 + call do_exit + CFI_ENDPROC +ENDPROC(kernel_thread_helper) + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index a6b8bd89aa27..be9d883c62ce 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -371,8 +371,65 @@ rp_sidt: addl $8,%edi dec %ecx jne rp_sidt + +.macro set_early_handler handler,trapno + lea \handler,%edx + movl $(__KERNEL_CS << 16),%eax + movw %dx,%ax + movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ + lea idt_table,%edi + movl %eax,8*\trapno(%edi) + movl %edx,8*\trapno+4(%edi) +.endm + + set_early_handler handler=early_divide_err,trapno=0 + set_early_handler handler=early_illegal_opcode,trapno=6 + set_early_handler handler=early_protection_fault,trapno=13 + set_early_handler handler=early_page_fault,trapno=14 + ret +early_divide_err: + xor %edx,%edx + pushl $0 /* fake errcode */ + jmp early_fault + +early_illegal_opcode: + movl $6,%edx + pushl $0 /* fake errcode */ + jmp early_fault + +early_protection_fault: + movl $13,%edx + jmp early_fault + +early_page_fault: + movl $14,%edx + jmp early_fault + +early_fault: + cld +#ifdef CONFIG_PRINTK + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es + cmpl $2,early_recursion_flag + je hlt_loop + incl early_recursion_flag + movl %cr2,%eax + pushl %eax + pushl %edx /* trapno */ + pushl $fault_msg +#ifdef CONFIG_EARLY_PRINTK + call early_printk +#else + call printk +#endif +#endif +hlt_loop: + hlt + jmp hlt_loop + /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: @@ -386,6 +443,9 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + cmpl $2,early_recursion_flag + je hlt_loop + incl early_recursion_flag pushl 16(%esp) pushl 24(%esp) pushl 32(%esp) @@ -431,9 +491,16 @@ ENTRY(stack_start) ready: .byte 0 +early_recursion_flag: + .long 0 + int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" +fault_msg: + .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" + .asciz "Stack: %p %p %p %p %p %p %p %p\n" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c index c36d1c006c2f..6f508e8d7c57 100644 --- a/arch/i386/kernel/i8237.c +++ b/arch/i386/kernel/i8237.c @@ -2,6 +2,11 @@ * i8237.c: 8237A DMA controller suspend functions. * * Written by Pierre Ossman, 2005. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. */ #include <linux/init.h> diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index d4756d154f47..d07ed31f11e3 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -34,33 +34,15 @@ * moves to arch independent land */ +static int i8259A_auto_eoi; DEFINE_SPINLOCK(i8259A_lock); - -static void end_8259A_irq (unsigned int irq) -{ - if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) && - irq_desc[irq].action) - enable_8259A_irq(irq); -} - -#define shutdown_8259A_irq disable_8259A_irq - static void mask_and_ack_8259A(unsigned int); -unsigned int startup_8259A_irq(unsigned int irq) -{ - enable_8259A_irq(irq); - return 0; /* never anything pending */ -} - -static struct hw_interrupt_type i8259A_irq_type = { - .typename = "XT-PIC", - .startup = startup_8259A_irq, - .shutdown = shutdown_8259A_irq, - .enable = enable_8259A_irq, - .disable = disable_8259A_irq, - .ack = mask_and_ack_8259A, - .end = end_8259A_irq, +static struct irq_chip i8259A_chip = { + .name = "XT-PIC", + .mask = disable_8259A_irq, + .unmask = enable_8259A_irq, + .mask_ack = mask_and_ack_8259A, }; /* @@ -131,7 +113,7 @@ void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); io_apic_irqs &= ~(1<<irq); - irq_desc[irq].chip = &i8259A_irq_type; + set_irq_chip_and_handler(irq, &i8259A_chip, handle_level_irq); enable_irq(irq); } @@ -253,7 +235,7 @@ static void save_ELCR(char *trigger) static int i8259A_resume(struct sys_device *dev) { - init_8259A(0); + init_8259A(i8259A_auto_eoi); restore_ELCR(irq_trigger); return 0; } @@ -301,6 +283,8 @@ void init_8259A(int auto_eoi) { unsigned long flags; + i8259A_auto_eoi = auto_eoi; + spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ @@ -323,12 +307,12 @@ void init_8259A(int auto_eoi) outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ if (auto_eoi) /* - * in AEOI mode we just have to mask the interrupt + * In AEOI mode we just have to mask the interrupt * when acking. */ - i8259A_irq_type.ack = disable_8259A_irq; + i8259A_chip.mask_ack = disable_8259A_irq; else - i8259A_irq_type.ack = mask_and_ack_8259A; + i8259A_chip.mask_ack = mask_and_ack_8259A; udelay(100); /* wait for 8259A to initialize */ @@ -385,12 +369,13 @@ void __init init_ISA_irqs (void) /* * 16 old-style INTA-cycle interrupts: */ - irq_desc[i].chip = &i8259A_irq_type; + set_irq_chip_and_handler(i, &i8259A_chip, + handle_level_irq); } else { /* * 'high' PCI IRQs filled in on demand */ - irq_desc[i].chip = &no_irq_type; + irq_desc[i].chip = &no_irq_chip; } } } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4fb32c551fe0..b7287fb499f3 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -31,6 +31,9 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/sysdev.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/htirq.h> #include <asm/io.h> #include <asm/smp.h> @@ -38,8 +41,11 @@ #include <asm/timer.h> #include <asm/i8259.h> #include <asm/nmi.h> +#include <asm/msidef.h> +#include <asm/hypertransport.h> #include <mach_apic.h> +#include <mach_apicdef.h> #include "io_ports.h" @@ -65,7 +71,7 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; -int disable_timer_pin_1 __initdata; +static int disable_timer_pin_1 __initdata; /* * Rough estimation of how many shared IRQs there are, can @@ -85,13 +91,32 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; -#ifdef CONFIG_PCI_MSI -#define vector_to_irq(vector) \ - (platform_legacy_irq(vector) ? vector : vector_irq[vector]) -#else -#define vector_to_irq(vector) (vector) -#endif +union entry_union { + struct { u32 w1, w2; }; + struct IO_APIC_route_entry entry; +}; + +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + union entry_union eu; + eu.entry = e; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + spin_unlock_irqrestore(&ioapic_lock, flags); +} /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are @@ -200,13 +225,9 @@ static void unmask_IO_APIC_irq (unsigned int irq) static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; - unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ - spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; @@ -215,10 +236,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) */ memset(&entry, 0, sizeof(entry)); entry.mask = 1; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); } static void clear_IO_APIC (void) @@ -258,7 +276,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } - set_irq_info(irq, cpumask); + set_native_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1159,46 +1177,45 @@ static inline int IO_APIC_irq_trigger(int irq) /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; -int assign_irq_vector(int irq) +static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - unsigned long flags; int vector; - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - spin_lock_irqsave(&vector_lock, flags); - - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { - spin_unlock_irqrestore(&vector_lock, flags); + if (IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); - } -next: + current_vector += 8; if (current_vector == SYSCALL_VECTOR) - goto next; + current_vector += 8; if (current_vector >= FIRST_SYSTEM_VECTOR) { offset++; - if (!(offset%8)) { - spin_unlock_irqrestore(&vector_lock, flags); + if (!(offset % 8)) return -ENOSPC; - } current_vector = FIRST_DEVICE_VECTOR + offset; } vector = current_vector; - vector_irq[vector] = irq; - if (irq != AUTO_ASSIGN) - IO_APIC_VECTOR(irq) = vector; + IO_APIC_VECTOR(irq) = vector; + + return vector; +} + +static int assign_irq_vector(int irq) +{ + unsigned long flags; + int vector; + spin_lock_irqsave(&vector_lock, flags); + vector = __assign_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); return vector; } - -static struct hw_interrupt_type ioapic_level_type; -static struct hw_interrupt_type ioapic_edge_type; +static struct irq_chip ioapic_chip; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 @@ -1206,16 +1223,14 @@ static struct hw_interrupt_type ioapic_edge_type; static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { - unsigned idx; - - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) - irq_desc[idx].chip = &ioapic_level_type; + set_irq_chip_and_handler(irq, &ioapic_chip, + handle_fasteoi_irq); else - irq_desc[idx].chip = &ioapic_edge_type; - set_intr_gate(vector, interrupt[idx]); + set_irq_chip_and_handler(irq, &ioapic_chip, + handle_edge_irq); + set_intr_gate(vector, interrupt[irq]); } static void __init setup_IO_APIC_irqs(void) @@ -1283,9 +1298,8 @@ static void __init setup_IO_APIC_irqs(void) if (!apic && (irq < 16)) disable_8259A_irq(irq); } + ioapic_write_entry(apic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1301,7 +1315,6 @@ static void __init setup_IO_APIC_irqs(void) static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - unsigned long flags; memset(&entry,0,sizeof(entry)); @@ -1326,15 +1339,13 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in * The timer IRQ doesn't have to know that behind the * scene we have a 8259A-master in AEOI mode ... */ - irq_desc[0].chip = &ioapic_edge_type; + irq_desc[0].chip = &ioapic_chip; + set_irq_handler(0, handle_edge_irq); /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); enable_8259A_irq(0); } @@ -1444,10 +1455,7 @@ void __init print_IO_APIC(void) for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, i); printk(KERN_DEBUG " %02x %03X %02X ", i, @@ -1467,17 +1475,12 @@ void __init print_IO_APIC(void) ); } } - if (use_pci_vector()) - printk(KERN_INFO "Using vector-based indexing\n"); printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < NR_IRQS; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; - if (use_pci_vector() && !platform_legacy_irq(i)) - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); - else - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -1666,10 +1669,7 @@ static void __init enable_IO_APIC(void) /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, pin); /* If the interrupt line is enabled and in ExtInt mode @@ -1726,7 +1726,6 @@ void disable_IO_APIC(void) */ if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; - unsigned long flags; memset(&entry, 0, sizeof(entry)); entry.mask = 0; /* Enabled */ @@ -1743,12 +1742,7 @@ void disable_IO_APIC(void) /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, - *(((int *)&entry)+1)); - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, - *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } disconnect_bsp_APIC(ioapic_i8259.pin != -1); } @@ -1913,6 +1907,8 @@ static int __init timer_irq_works(void) */ /* + * Startup quirk: + * * Starting up a edge-triggered IO-APIC interrupt is * nasty - we need to make sure that we get the edge. * If it is already asserted for some reason, we need @@ -1920,8 +1916,10 @@ static int __init timer_irq_works(void) * * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... + * + * (We do this for level-triggered IRQs too - it cannot hurt.) */ -static unsigned int startup_edge_ioapic_irq(unsigned int irq) +static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; @@ -1938,47 +1936,18 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) return was_pending; } -/* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ -static void ack_edge_ioapic_irq(unsigned int irq) +static void ack_ioapic_irq(unsigned int irq) { - move_irq(irq); - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) - == (IRQ_PENDING | IRQ_DISABLED)) - mask_IO_APIC_irq(irq); + move_native_irq(irq); ack_APIC_irq(); } -/* - * Level triggered interrupts can just be masked, - * and shutting down and starting up the interrupt - * is the same as enabling and disabling them -- except - * with a startup need to return a "was pending" value. - * - * Level triggered interrupts are special because we - * do not touch any IO-APIC register while handling - * them. We ack the APIC in the end-IRQ handler, not - * in the start-IRQ-handler. Protection against reentrance - * from the same interrupt is still provided, both by the - * generic IRQ layer and by the fact that an unacked local - * APIC does not accept IRQs. - */ -static unsigned int startup_level_ioapic_irq (unsigned int irq) -{ - unmask_IO_APIC_irq(irq); - - return 0; /* don't check for pending */ -} - -static void end_level_ioapic_irq (unsigned int irq) +static void ack_ioapic_quirk_irq(unsigned int irq) { unsigned long v; int i; - move_irq(irq); + move_native_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -2013,105 +1982,26 @@ static void end_level_ioapic_irq (unsigned int irq) } } -#ifdef CONFIG_PCI_MSI -static unsigned int startup_edge_ioapic_vector(unsigned int vector) -{ - int irq = vector_to_irq(vector); - - return startup_edge_ioapic_irq(irq); -} - -static void ack_edge_ioapic_vector(unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - ack_edge_ioapic_irq(irq); -} - -static unsigned int startup_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - return startup_level_ioapic_irq (irq); -} - -static void end_level_ioapic_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - move_native_irq(vector); - end_level_ioapic_irq(irq); -} - -static void mask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - mask_IO_APIC_irq(irq); -} - -static void unmask_IO_APIC_vector (unsigned int vector) -{ - int irq = vector_to_irq(vector); - - unmask_IO_APIC_irq(irq); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_vector (unsigned int vector, - cpumask_t cpu_mask) -{ - int irq = vector_to_irq(vector); - - set_native_irq_info(vector, cpu_mask); - set_ioapic_affinity_irq(irq, cpu_mask); -} -#endif -#endif - -static int ioapic_retrigger(unsigned int irq) +static int ioapic_retrigger_irq(unsigned int irq) { send_IPI_self(IO_APIC_VECTOR(irq)); return 1; } -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ -static struct hw_interrupt_type ioapic_edge_type __read_mostly = { - .typename = "IO-APIC-edge", - .startup = startup_edge_ioapic, - .shutdown = shutdown_edge_ioapic, - .enable = enable_edge_ioapic, - .disable = disable_edge_ioapic, - .ack = ack_edge_ioapic, - .end = end_edge_ioapic, +static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_ioapic_irq, + .eoi = ack_ioapic_quirk_irq, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, + .set_affinity = set_ioapic_affinity_irq, #endif - .retrigger = ioapic_retrigger, + .retrigger = ioapic_retrigger_irq, }; -static struct hw_interrupt_type ioapic_level_type __read_mostly = { - .typename = "IO-APIC-level", - .startup = startup_level_ioapic, - .shutdown = shutdown_level_ioapic, - .enable = enable_level_ioapic, - .disable = disable_level_ioapic, - .ack = mask_and_ack_level_ioapic, - .end = end_level_ioapic, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity, -#endif - .retrigger = ioapic_retrigger, -}; static inline void init_IO_APIC_traps(void) { @@ -2130,11 +2020,6 @@ static inline void init_IO_APIC_traps(void) */ for (irq = 0; irq < NR_IRQS ; irq++) { int tmp = irq; - if (use_pci_vector()) { - if (!platform_legacy_irq(tmp)) - if ((tmp = vector_to_irq(tmp)) == -1) - continue; - } if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { /* * Hmm.. We don't have an entry for this, @@ -2145,20 +2030,21 @@ static inline void init_IO_APIC_traps(void) make_8259A_irq(irq); else /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_type; + irq_desc[irq].chip = &no_irq_chip; } } } -static void enable_lapic_irq (unsigned int irq) -{ - unsigned long v; +/* + * The local APIC irq-chip implementation: + */ - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +static void ack_apic(unsigned int irq) +{ + ack_APIC_irq(); } -static void disable_lapic_irq (unsigned int irq) +static void mask_lapic_irq (unsigned int irq) { unsigned long v; @@ -2166,21 +2052,19 @@ static void disable_lapic_irq (unsigned int irq) apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); } -static void ack_lapic_irq (unsigned int irq) +static void unmask_lapic_irq (unsigned int irq) { - ack_APIC_irq(); -} + unsigned long v; -static void end_lapic_irq (unsigned int i) { /* nothing */ } + v = apic_read(APIC_LVT0); + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); +} -static struct hw_interrupt_type lapic_irq_type __read_mostly = { - .typename = "local-APIC-edge", - .startup = NULL, /* startup_irq() not used for IRQ0 */ - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ - .enable = enable_lapic_irq, - .disable = disable_lapic_irq, - .ack = ack_lapic_irq, - .end = end_lapic_irq +static struct irq_chip lapic_chip __read_mostly = { + .name = "local-APIC-edge", + .mask = mask_lapic_irq, + .unmask = unmask_lapic_irq, + .eoi = ack_apic, }; static void setup_nmi (void) @@ -2213,17 +2097,13 @@ static inline void unlock_ExtINT_logic(void) int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; - unsigned long flags; pin = find_isa_irq_pin(8, mp_INT); apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry0 = ioapic_read_entry(apic, pin); clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2236,10 +2116,7 @@ static inline void unlock_ExtINT_logic(void) entry1.trigger = 0; entry1.vector = 0; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry1); save_control = CMOS_READ(RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); @@ -2258,10 +2135,7 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); clear_IO_APIC_pin(apic, pin); - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry0); } int timer_uses_ioapic_pin_0; @@ -2361,7 +2235,7 @@ static inline void check_timer(void) printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); disable_8259A_irq(0); - irq_desc[0].chip = &lapic_irq_type; + set_irq_chip_and_handler(0, &lapic_chip, handle_fasteoi_irq); apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2461,17 +2335,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; struct sysfs_ioapic_data *data; - unsigned long flags; int i; data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); - } - spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + entry[i] = ioapic_read_entry(dev->id, i); return 0; } @@ -2493,11 +2362,9 @@ static int ioapic_resume(struct sys_device *dev) reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; io_apic_write(dev->id, 0, reg_00.raw); } - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); - } spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + ioapic_write_entry(dev->id, i, entry[i]); return 0; } @@ -2543,6 +2410,238 @@ static int __init ioapic_init_sysfs(void) device_initcall(ioapic_init_sysfs); +/* + * Dynamic irq allocate and deallocation + */ +int create_irq(void) +{ + /* Allocate an unused irq */ + int irq, new, vector; + unsigned long flags; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for (new = (NR_IRQS - 1); new >= 0; new--) { + if (platform_legacy_irq(new)) + continue; + if (irq_vector[new] != 0) + continue; + vector = __assign_irq_vector(new); + if (likely(vector > 0)) + irq = new; + break; + } + spin_unlock_irqrestore(&vector_lock, flags); + + if (irq >= 0) { + set_intr_gate(vector, interrupt[irq]); + dynamic_irq_init(irq); + } + return irq; +} + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + dynamic_irq_cleanup(irq); + + spin_lock_irqsave(&vector_lock, flags); + irq_vector[irq] = 0; + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * MSI mesage composition + */ +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + int vector; + unsigned dest; + + vector = assign_irq_vector(irq); + if (vector >= 0) { + dest = cpu_mask_to_apicid(TARGET_CPUS); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(vector); + } + return vector; +} + +#ifdef CONFIG_SMP +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + vector = assign_irq_vector(irq); + if (vector < 0) + return; + + dest = cpu_mask_to_apicid(mask); + + read_msi_msg(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); + set_native_irq_info(irq, mask); +} +#endif /* CONFIG_SMP */ + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip msi_chip = { + .name = "PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_ioapic_irq, +#ifdef CONFIG_SMP + .set_affinity = set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) +{ + struct msi_msg msg; + int ret; + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler(irq, &msi_chip, handle_edge_irq); + + return 0; +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + return; +} + +#endif /* CONFIG_PCI_MSI */ + +/* + * Hypertransport interrupt support + */ +#ifdef CONFIG_HT_IRQ + +#ifdef CONFIG_SMP + +static void target_ht_irq(unsigned int irq, unsigned int dest) +{ + u32 low, high; + low = read_ht_irq_low(irq); + high = read_ht_irq_high(irq); + + low &= ~(HT_IRQ_LOW_DEST_ID_MASK); + high &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + + low |= HT_IRQ_LOW_DEST_ID(dest); + high |= HT_IRQ_HIGH_DEST_ID(dest); + + write_ht_irq_low(irq, low); + write_ht_irq_high(irq, high); +} + +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +{ + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + target_ht_irq(irq, dest); + set_native_irq_info(irq, mask); +} +#endif + +static struct hw_interrupt_type ht_irq_chip = { + .name = "PCI-HT", + .mask = mask_ht_irq, + .unmask = unmask_ht_irq, + .ack = ack_ioapic_irq, +#ifdef CONFIG_SMP + .set_affinity = set_ht_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +{ + int vector; + + vector = assign_irq_vector(irq); + if (vector >= 0) { + u32 low, high; + unsigned dest; + cpumask_t tmp; + + cpus_clear(tmp); + cpu_set(vector >> 8, tmp); + dest = cpu_mask_to_apicid(tmp); + + high = HT_IRQ_HIGH_DEST_ID(dest); + + low = HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(vector) | + ((INT_DEST_MODE == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_low(irq, low); + write_ht_irq_high(irq, high); + + set_irq_chip_and_handler(irq, &ht_irq_chip, handle_edge_irq); + } + return vector; +} +#endif /* CONFIG_HT_IRQ */ + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -2694,13 +2793,34 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a if (!ioapic && (irq < 16)) disable_8259A_irq(irq); + ioapic_write_entry(ioapic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } #endif /* CONFIG_ACPI */ + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +static int __init parse_enable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = -1; + return 0; +} +early_param("enable_timer_pin_1", parse_enable_timer_pin_1); + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 5fe547cd8f9f..3dd2e180151b 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -55,6 +55,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) { /* high bit used in ret_from_ code */ int irq = ~regs->orig_eax; + struct irq_desc *desc = irq_desc + irq; #ifdef CONFIG_4KSTACKS union irq_ctx *curctx, *irqctx; u32 *isp; @@ -94,7 +95,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) * current stack (which is the irq stack already after all) */ if (curctx != irqctx) { - int arg1, arg2, ebx; + int arg1, arg2, arg3, ebx; /* build the stack frame on the IRQ stack */ isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); @@ -110,16 +111,17 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) (curctx->tinfo.preempt_count & SOFTIRQ_MASK); asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_IRQ \n" + " xchgl %%ebx,%%esp \n" + " call *%%edi \n" " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) - : "0" (irq), "1" (regs), "2" (isp) - : "memory", "cc", "ecx" + : "=a" (arg1), "=d" (arg2), "=c" (arg3), "=b" (ebx) + : "0" (irq), "1" (desc), "2" (regs), "3" (isp), + "D" (desc->handle_irq) + : "memory", "cc" ); } else #endif - __do_IRQ(irq, regs); + desc->handle_irq(irq, desc, regs); irq_exit(); @@ -253,7 +255,8 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %8s", irq_desc[i].chip->name); + seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq)); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index afe6505ca0b3..d98e44b16fe2 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -230,20 +230,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; - struct kretprobe_instance *ri; - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->task = current; + struct kretprobe_instance *ri; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->task = current; ri->ret_addr = (kprobe_opcode_t *) *sara; /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; - - add_rp_inst(ri); - } else { - rp->nmissed++; - } + add_rp_inst(ri); + } else { + rp->nmissed++; + } } /* @@ -359,7 +359,7 @@ no_kprobe: void __kprobes kretprobe_trampoline_holder(void) { asm volatile ( ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" + "kretprobe_trampoline: \n" " pushf\n" /* skip cs, eip, orig_eax, es, ds */ " subl $20, %esp\n" @@ -395,14 +395,15 @@ no_kprobe: */ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; - struct hlist_head *head; - struct hlist_node *node, *tmp; + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + INIT_HLIST_HEAD(&empty_rp); spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); + head = kretprobe_inst_table_head(current); /* * It is possible to have multiple instances associated with a given @@ -413,14 +414,14 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) * We can handle this because: * - instances are always inserted at the head of the list * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the + * function, the first instance's ret_addr will point to the * real return address, and all the rest will point to * kretprobe_trampoline */ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) + if (ri->task != current) /* another task is sharing our hash bucket */ - continue; + continue; if (ri->rp && ri->rp->handler){ __get_cpu_var(current_kprobe) = &ri->rp->kp; @@ -429,7 +430,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) } orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri); + recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) /* @@ -444,6 +445,10 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) spin_unlock_irqrestore(&kretprobe_lock, flags); + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } return (void*)orig_ret_address; } diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index 983f95707e11..445211eb2d57 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -1,5 +1,5 @@ /* - * linux/kernel/ldt.c + * linux/arch/i386/kernel/ldt.c * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 6b1ae6ba76f0..91966bafb3dc 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/kexec.h> #include <linux/delay.h> +#include <linux/init.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -20,70 +21,13 @@ #include <asm/system.h> #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) - -#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L2_ATTR (_PAGE_PRESENT) - -#define LEVEL0_SIZE (1UL << 12UL) - -#ifndef CONFIG_X86_PAE -#define LEVEL1_SIZE (1UL << 22UL) -static u32 pgtable_level1[1024] PAGE_ALIGNED; - -static void identity_map_page(unsigned long address) -{ - unsigned long level1_index, level2_index; - u32 *pgtable_level2; - - /* Find the current page table */ - pgtable_level2 = __va(read_cr3()); - - /* Find the indexes of the physical address to identity map */ - level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; - level2_index = address / LEVEL1_SIZE; - - /* Identity map the page table entry */ - pgtable_level1[level1_index] = address | L0_ATTR; - pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; - - /* Flush the tlb so the new mapping takes effect. - * Global tlb entries are not flushed but that is not an issue. - */ - load_cr3(pgtable_level2); -} - -#else -#define LEVEL1_SIZE (1UL << 21UL) -#define LEVEL2_SIZE (1UL << 30UL) -static u64 pgtable_level1[512] PAGE_ALIGNED; -static u64 pgtable_level2[512] PAGE_ALIGNED; - -static void identity_map_page(unsigned long address) -{ - unsigned long level1_index, level2_index, level3_index; - u64 *pgtable_level3; - - /* Find the current page table */ - pgtable_level3 = __va(read_cr3()); - - /* Find the indexes of the physical address to identity map */ - level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; - level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; - level3_index = address / LEVEL2_SIZE; - - /* Identity map the page table entry */ - pgtable_level1[level1_index] = address | L0_ATTR; - pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; - set_64bit(&pgtable_level3[level3_index], - __pa(pgtable_level2) | L2_ATTR); - - /* Flush the tlb so the new mapping takes effect. - * Global tlb entries are not flushed but that is not an issue. - */ - load_cr3(pgtable_level3); -} +static u32 kexec_pgd[1024] PAGE_ALIGNED; +#ifdef CONFIG_X86_PAE +static u32 kexec_pmd0[1024] PAGE_ALIGNED; +static u32 kexec_pmd1[1024] PAGE_ALIGNED; #endif +static u32 kexec_pte0[1024] PAGE_ALIGNED; +static u32 kexec_pte1[1024] PAGE_ALIGNED; static void set_idt(void *newidt, __u16 limit) { @@ -127,16 +71,6 @@ static void load_segments(void) #undef __STR } -typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( - unsigned long indirection_page, - unsigned long reboot_code_buffer, - unsigned long start_address, - unsigned int has_pae) ATTRIB_NORET; - -extern const unsigned char relocate_new_kernel[]; -extern void relocate_new_kernel_end(void); -extern const unsigned int relocate_new_kernel_size; - /* * A architecture hook called to validate the * proposed image and prepare the control pages @@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image) */ NORET_TYPE void machine_kexec(struct kimage *image) { - unsigned long page_list; - unsigned long reboot_code_buffer; - - relocate_new_kernel_t rnk; + unsigned long page_list[PAGES_NR]; + void *control_page; /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - /* Compute some offsets */ - reboot_code_buffer = page_to_pfn(image->control_code_page) - << PAGE_SHIFT; - page_list = image->head; - - /* Set up an identity mapping for the reboot_code_buffer */ - identity_map_page(reboot_code_buffer); - - /* copy it out */ - memcpy((void *)reboot_code_buffer, relocate_new_kernel, - relocate_new_kernel_size); + control_page = page_address(image->control_code_page); + memcpy(control_page, relocate_kernel, PAGE_SIZE); + + page_list[PA_CONTROL_PAGE] = __pa(control_page); + page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; + page_list[PA_PGD] = __pa(kexec_pgd); + page_list[VA_PGD] = (unsigned long)kexec_pgd; +#ifdef CONFIG_X86_PAE + page_list[PA_PMD_0] = __pa(kexec_pmd0); + page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; + page_list[PA_PMD_1] = __pa(kexec_pmd1); + page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; +#endif + page_list[PA_PTE_0] = __pa(kexec_pte0); + page_list[VA_PTE_0] = (unsigned long)kexec_pte0; + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image) set_idt(phys_to_virt(0),0); /* now call it */ - rnk = (relocate_new_kernel_t) reboot_code_buffer; - (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); +} + +/* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never sets it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ +static int __init parse_crashkernel(char *arg) +{ + unsigned long size, base; + size = memparse(arg, &arg); + if (*arg == '@') { + base = memparse(arg+1, &arg); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + return 0; } +early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index cd5456f14af4..eb57a851789d 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -42,6 +42,7 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/mca.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/io.h> #include <linux/proc_fs.h> @@ -414,7 +415,8 @@ subsys_initcall(mca_init); /*--------------------------------------------------------------------*/ -static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) +static __kprobes void +mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) { int slot = mca_dev->slot; @@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) /*--------------------------------------------------------------------*/ -static int mca_handle_nmi_callback(struct device *dev, void *data) +static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) { struct mca_device *mca_dev = to_mca_device(dev); unsigned char pos5; @@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data) return 0; } -void mca_handle_nmi(void) +void __kprobes mca_handle_nmi(void) { /* First try - scan the various adapters and see if a specific * adapter was responsible for the error. diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 40b44cc0d14b..9b9479768d5e 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -2,6 +2,7 @@ * Intel CPU Microcode Update Driver for Linux * * Copyright (C) 2000-2004 Tigran Aivazian + * 2006 Shaohua Li <shaohua.li@intel.com> * * This driver allows to upgrade microcode on Intel processors * belonging to IA-32 family - PentiumPro, Pentium II, @@ -82,6 +83,9 @@ #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/cpu.h> +#include <linux/firmware.h> +#include <linux/platform_device.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -91,9 +95,6 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); MODULE_LICENSE("GPL"); -static int verbose; -module_param(verbose, int, 0644); - #define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ @@ -120,55 +121,40 @@ static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DEFINE_MUTEX(microcode_mutex); -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -typedef enum mc_error_code { - MC_SUCCESS = 0, - MC_IGNORED = 1, - MC_NOTFOUND = 2, - MC_MARKED = 3, - MC_ALLOCATED = 4, -} mc_error_code_t; - static struct ucode_cpu_info { + int valid; unsigned int sig; - unsigned int pf, orig_pf; + unsigned int pf; unsigned int rev; - unsigned int cksum; - mc_error_code_t err; microcode_t *mc; } ucode_cpu_info[NR_CPUS]; - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} -static void collect_cpu_info (void *unused) +static void collect_cpu_info(int cpu_num) { - int cpu_num = smp_processor_id(); struct cpuinfo_x86 *c = cpu_data + cpu_num; struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; - uci->sig = uci->pf = uci->rev = uci->cksum = 0; - uci->err = MC_NOTFOUND; + /* We should bind the task to the CPU */ + BUG_ON(raw_smp_processor_id() != cpu_num); + uci->pf = uci->rev = 0; uci->mc = NULL; + uci->valid = 1; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num); + printk(KERN_ERR "microcode: CPU%d not a capable Intel " + "processor\n", cpu_num); + uci->valid = 0; return; - } else { - uci->sig = cpuid_eax(0x00000001); + } - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - uci->orig_pf = uci->pf; + uci->sig = cpuid_eax(0x00000001); + + if ((c->x86_model >= 5) || (c->x86 > 6)) { + /* get processor flags from MSR 0x17 */ + rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + uci->pf = 1 << ((val[1] >> 18) & 7); } wrmsr(MSR_IA32_UCODE_REV, 0, 0); @@ -180,218 +166,159 @@ static void collect_cpu_info (void *unused) uci->sig, uci->pf, uci->rev); } -static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum) +static inline int microcode_update_match(int cpu_num, + microcode_header_t *mc_header, int sig, int pf) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - pr_debug("Microcode Found.\n"); - pr_debug(" Header Revision 0x%x\n", mc_header->hdrver); - pr_debug(" Loader Revision 0x%x\n", mc_header->ldrver); - pr_debug(" Revision 0x%x \n", mc_header->rev); - pr_debug(" Date %x/%x/%x\n", - ((mc_header->date >> 24 ) & 0xff), - ((mc_header->date >> 16 ) & 0xff), - (mc_header->date & 0xFFFF)); - pr_debug(" Signature 0x%x\n", sig); - pr_debug(" Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n", - ((sig >> 12) & 0x3), - ((sig >> 8) & 0xf), - ((sig >> 4) & 0xf), - ((sig & 0xf))); - pr_debug(" Processor Flags 0x%x\n", pf); - pr_debug(" Checksum 0x%x\n", cksum); - - if (mc_header->rev < uci->rev) { - if (uci->err == MC_NOTFOUND) { - uci->err = MC_IGNORED; - uci->cksum = mc_header->rev; - } else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev) - uci->cksum = mc_header->rev; - } else if (mc_header->rev == uci->rev) { - if (uci->err < MC_MARKED) { - /* notify the caller of success on this cpu */ - uci->err = MC_SUCCESS; - } - } else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) { - pr_debug("microcode: CPU%d found a matching microcode update with " - " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); - uci->cksum = cksum; - uci->pf = pf; /* keep the original mc pf for cksum calculation */ - uci->err = MC_MARKED; /* found the match */ - for_each_online_cpu(cpu_num) { - if (ucode_cpu_info + cpu_num != uci - && ucode_cpu_info[cpu_num].mc == uci->mc) { - uci->mc = NULL; - break; - } - } - if (uci->mc != NULL) { - vfree(uci->mc); - uci->mc = NULL; - } - } - return; + if (!sigmatch(sig, uci->sig, pf, uci->pf) + || mc_header->rev <= uci->rev) + return 0; + return 1; } -static int find_matching_ucodes (void) +static int microcode_sanity_check(void *mc) { - int cursor = 0; - int error = 0; - - while (cursor + MC_HEADER_SIZE < user_buffer_size) { - microcode_header_t mc_header; - void *newmc = NULL; - int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size; + microcode_header_t *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + struct extended_signature *ext_sig; + unsigned long total_size, data_size, ext_table_size; + int sum, orig_sum, ext_sigcount = 0, i; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + if (data_size + MC_HEADER_SIZE > total_size) { + printk(KERN_ERR "microcode: error! " + "Bad data size in microcode data file\n"); + return -EINVAL; + } - if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - error = -EFAULT; - goto out; + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + printk(KERN_ERR "microcode: error! " + "Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + printk(KERN_ERR "microcode: error! " + "Small exttable size in microcode data file\n"); + return -EINVAL; } - - total_size = get_totalsize(&mc_header); - if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - error = -EINVAL; - goto out; + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + printk(KERN_ERR "microcode: error! " + "Bad exttable size in microcode data file\n"); + return -EFAULT; } + ext_sigcount = ext_header->count; + } - data_size = get_datasize(&mc_header); - if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - error = -EINVAL; - goto out; + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + printk(KERN_WARNING "microcode: aborting, " + "bad extended signature table checksum\n"); + return -EINVAL; } + } - if (mc_header.ldrver != 1 || mc_header.hdrver != 1) { - printk(KERN_ERR "microcode: error! Unknown microcode update format\n"); - error = -EINVAL; - goto out; + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (struct extended_signature *)((void *)ext_header + + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i); + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + printk(KERN_ERR "microcode: aborting, bad checksum\n"); + return -EINVAL; } + } + return 0; +} - for_each_online_cpu(cpu_num) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf)) - mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum); - } +/* + * return 0 - no update found + * return 1 - found update + * return < 0 - error + */ +static int get_maching_microcode(void *mc, int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + microcode_header_t *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + void *new_mc; + + if (microcode_update_match(cpu, mc_header, + mc_header->sig, mc_header->pf)) + goto find; + + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = (struct extended_sigtable *)(mc + + get_datasize(mc_header) + MC_HEADER_SIZE); + ext_sigcount = ext_header->count; + ext_sig = (struct extended_signature *)((void *)ext_header + + EXT_HEADER_SIZE); + for (i = 0; i < ext_sigcount; i++) { + if (microcode_update_match(cpu, mc_header, + ext_sig->sig, ext_sig->pf)) + goto find; + ext_sig++; + } + return 0; +find: + pr_debug("microcode: CPU %d found a matching microcode update with" + " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); + new_mc = vmalloc(total_size); + if (!new_mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - struct extended_sigtable ext_header; - struct extended_signature ext_sig; - int ext_sigcount; + /* free previous update file */ + vfree(uci->mc); - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - error = -EINVAL; - goto out; - } - if (copy_from_user(&ext_header, user_buffer + cursor - + MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - error = -EFAULT; - goto out; - } - if (ext_table_size != exttable_size(&ext_header)) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - error = -EFAULT; - goto out; - } - - ext_sigcount = ext_header.count; - - for (i = 0; i < ext_sigcount; i++) { - if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE - + EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - error = -EFAULT; - goto out; - } - for_each_online_cpu(cpu_num) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) { - mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum); - } - } - } - } - /* now check if any cpu has matched */ - allocated_flag = 0; - sum = 0; - for_each_online_cpu(cpu_num) { - if (ucode_cpu_info[cpu_num].err == MC_MARKED) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (!allocated_flag) { - allocated_flag = 1; - newmc = vmalloc(total_size); - if (!newmc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - error = -ENOMEM; - goto out; - } - if (copy_from_user(newmc + MC_HEADER_SIZE, - user_buffer + cursor + MC_HEADER_SIZE, - total_size - MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(newmc); - error = -EFAULT; - goto out; - } - memcpy(newmc, &mc_header, MC_HEADER_SIZE); - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size); - i = ext_table_size / DWSIZE; - while (i--) ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n"); - vfree(newmc); - error = -EINVAL; - goto out; - } - } - - /* calculate the checksum */ - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) sum += ((int *)newmc)[i]; - sum -= (mc_header.sig + mc_header.pf + mc_header.cksum); - } - ucode_cpu_info[cpu_num].mc = newmc; - ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */ - if (sum + uci->sig + uci->pf + uci->cksum != 0) { - printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num); - error = -EINVAL; - goto out; - } - } - } - cursor += total_size; /* goto the next update patch */ - } /* end of while */ -out: - return error; + memcpy(new_mc, mc, total_size); + uci->mc = new_mc; + return 1; } -static void do_update_one (void * unused) +static void apply_microcode(int cpu) { unsigned long flags; unsigned int val[2]; - int cpu_num = smp_processor_id(); + int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (uci->mc == NULL) { - if (verbose) { - if (uci->err == MC_SUCCESS) - printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n", - cpu_num, uci->rev); - else - printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); - } + /* We should bind the task to the CPU */ + BUG_ON(cpu_num != cpu); + + if (uci->mc == NULL) return; - } /* serialize access to the physical write to MSR 0x79 */ spin_lock_irqsave(µcode_update_lock, flags); @@ -408,68 +335,107 @@ static void do_update_one (void * unused) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - /* notify the caller of success on this cpu */ - uci->err = MC_SUCCESS; spin_unlock_irqrestore(µcode_update_lock, flags); - printk(KERN_INFO "microcode: CPU%d updated from revision " + if (val[1] != uci->mc->hdr.rev) { + printk(KERN_ERR "microcode: CPU%d updated from revision " + "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); + return; + } + pr_debug("microcode: CPU%d updated from revision " "0x%x to 0x%x, date = %08x \n", cpu_num, uci->rev, val[1], uci->mc->hdr.date); - return; + uci->rev = val[1]; } -static int do_microcode_update (void) -{ - int i, error; +#ifdef CONFIG_MICROCODE_OLD_INTERFACE +static void __user *user_buffer; /* user area microcode data buffer */ +static unsigned int user_buffer_size; /* it's size */ - if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) { - printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); - error = -EIO; - goto out; +static long get_next_ucode(void **mc, long offset) +{ + microcode_header_t mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= user_buffer_size) + return 0; + if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + return -EFAULT; } - - if ((error = find_matching_ucodes())) { - printk(KERN_ERR "microcode: Error in the microcode data\n"); - goto out_free; + total_size = get_totalsize(&mc_header); + if (offset + total_size > user_buffer_size) { + printk(KERN_ERR "microcode: error! Bad total size in microcode " + "data file\n"); + return -EINVAL; } - - if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) { - printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); - error = -EIO; + *mc = vmalloc(total_size); + if (!*mc) + return -ENOMEM; + if (copy_from_user(*mc, user_buffer + offset, total_size)) { + printk(KERN_ERR "microcode: error! Can not read user data\n"); + vfree(*mc); + return -EFAULT; } + return offset + total_size; +} + +static int do_microcode_update (void) +{ + long cursor = 0; + int error = 0; + void *new_mc; + int cpu; + cpumask_t old; + + old = current->cpus_allowed; -out_free: - for_each_online_cpu(i) { - if (ucode_cpu_info[i].mc) { - int j; - void *tmp = ucode_cpu_info[i].mc; - vfree(tmp); - for_each_online_cpu(j) { - if (ucode_cpu_info[j].mc == tmp) - ucode_cpu_info[j].mc = NULL; - } + while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { + error = microcode_sanity_check(new_mc); + if (error) + goto out; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + for_each_online_cpu(cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!uci->valid) + continue; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + error = get_maching_microcode(new_mc, cpu); + if (error < 0) + goto out; + if (error == 1) + apply_microcode(cpu); } - if (ucode_cpu_info[i].err == MC_IGNORED && verbose) - printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision" - " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev); + vfree(new_mc); } out: + if (cursor > 0) + vfree(new_mc); + if (cursor < 0) + error = cursor; + set_cpus_allowed(current, old); return error; } +static int microcode_open (struct inode *unused1, struct file *unused2) +{ + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) { ssize_t ret; - if (len < DEFAULT_UCODE_TOTALSIZE) { - printk(KERN_ERR "microcode: not enough data\n"); - return -EINVAL; - } - if ((len >> PAGE_SHIFT) > num_physpages) { printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); return -EINVAL; } + lock_cpu_hotplug(); mutex_lock(µcode_mutex); user_buffer = (void __user *) buf; @@ -480,6 +446,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ ret = (ssize_t)len; mutex_unlock(µcode_mutex); + unlock_cpu_hotplug(); return ret; } @@ -496,7 +463,7 @@ static struct miscdevice microcode_dev = { .fops = µcode_fops, }; -static int __init microcode_init (void) +static int __init microcode_dev_init (void) { int error; @@ -508,6 +475,280 @@ static int __init microcode_init (void) return error; } + return 0; +} + +static void __exit microcode_dev_exit (void) +{ + misc_deregister(µcode_dev); +} + +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +#else +#define microcode_dev_init() 0 +#define microcode_dev_exit() do { } while(0) +#endif + +static long get_next_ucode_from_buffer(void **mc, void *buf, + unsigned long size, long offset) +{ + microcode_header_t *mc_header; + unsigned long total_size; + + /* No more data */ + if (offset >= size) + return 0; + mc_header = (microcode_header_t *)(buf + offset); + total_size = get_totalsize(mc_header); + + if (offset + total_size > size) { + printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); + return -EINVAL; + } + + *mc = vmalloc(total_size); + if (!*mc) { + printk(KERN_ERR "microcode: error! Can not allocate memory\n"); + return -ENOMEM; + } + memcpy(*mc, buf + offset, total_size); + return offset + total_size; +} + +/* fake device for request_firmware */ +static struct platform_device *microcode_pdev; + +static int cpu_request_microcode(int cpu) +{ + char name[30]; + struct cpuinfo_x86 *c = cpu_data + cpu; + const struct firmware *firmware; + void *buf; + unsigned long size; + long offset = 0; + int error; + void *mc; + + /* We should bind the task to the CPU */ + BUG_ON(cpu != raw_smp_processor_id()); + sprintf(name,"intel-ucode/%02x-%02x-%02x", + c->x86, c->x86_model, c->x86_mask); + error = request_firmware(&firmware, name, µcode_pdev->dev); + if (error) { + pr_debug("ucode data file %s load failed\n", name); + return error; + } + buf = (void *)firmware->data; + size = firmware->size; + while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) + > 0) { + error = microcode_sanity_check(mc); + if (error) + break; + error = get_maching_microcode(mc, cpu); + if (error < 0) + break; + /* + * It's possible the data file has multiple matching ucode, + * lets keep searching till the latest version + */ + if (error == 1) { + apply_microcode(cpu); + error = 0; + } + vfree(mc); + } + if (offset > 0) + vfree(mc); + if (offset < 0) + error = offset; + release_firmware(firmware); + + return error; +} + +static void microcode_init_cpu(int cpu) +{ + cpumask_t old; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + old = current->cpus_allowed; + + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + mutex_lock(µcode_mutex); + collect_cpu_info(cpu); + if (uci->valid) + cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + set_cpus_allowed(current, old); +} + +static void microcode_fini_cpu(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + mutex_lock(µcode_mutex); + uci->valid = 0; + vfree(uci->mc); + uci->mc = NULL; + mutex_unlock(µcode_mutex); +} + +static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + char *end; + unsigned long val = simple_strtoul(buf, &end, 0); + int err = 0; + int cpu = dev->id; + + if (end == buf) + return -EINVAL; + if (val == 1) { + cpumask_t old; + + old = current->cpus_allowed; + + lock_cpu_hotplug(); + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + mutex_lock(µcode_mutex); + if (uci->valid) + err = cpu_request_microcode(cpu); + mutex_unlock(µcode_mutex); + unlock_cpu_hotplug(); + set_cpus_allowed(current, old); + } + if (err) + return err; + return sz; +} + +static ssize_t version_show(struct sys_device *dev, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->rev); +} + +static ssize_t pf_show(struct sys_device *dev, char *buf) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; + + return sprintf(buf, "0x%x\n", uci->pf); +} + +static SYSDEV_ATTR(reload, 0200, NULL, reload_store); +static SYSDEV_ATTR(version, 0400, version_show, NULL); +static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); + +static struct attribute *mc_default_attrs[] = { + &attr_reload.attr, + &attr_version.attr, + &attr_processor_flags.attr, + NULL +}; + +static struct attribute_group mc_attr_group = { + .attrs = mc_default_attrs, + .name = "microcode", +}; + +static int mc_sysdev_add(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (!cpu_online(cpu)) + return 0; + pr_debug("Microcode:CPU %d added\n", cpu); + memset(uci, 0, sizeof(*uci)); + sysfs_create_group(&sys_dev->kobj, &mc_attr_group); + + microcode_init_cpu(cpu); + return 0; +} + +static int mc_sysdev_remove(struct sys_device *sys_dev) +{ + int cpu = sys_dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("Microcode:CPU %d removed\n", cpu); + microcode_fini_cpu(cpu); + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return 0; +} + +static int mc_sysdev_resume(struct sys_device *dev) +{ + int cpu = dev->id; + + if (!cpu_online(cpu)) + return 0; + pr_debug("Microcode:CPU %d resumed\n", cpu); + /* only CPU 0 will apply ucode here */ + apply_microcode(0); + return 0; +} + +static struct sysdev_driver mc_sysdev_driver = { + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, +}; + +#ifdef CONFIG_HOTPLUG_CPU +static __cpuinit int +mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + mc_sysdev_add(sys_dev); + break; + case CPU_DOWN_PREPARE: + mc_sysdev_remove(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block mc_cpu_notifier = { + .notifier_call = mc_cpu_callback, +}; +#endif + +static int __init microcode_init (void) +{ + int error; + + error = microcode_dev_init(); + if (error) + return error; + microcode_pdev = platform_device_register_simple("microcode", -1, + NULL, 0); + if (IS_ERR(microcode_pdev)) { + microcode_dev_exit(); + return PTR_ERR(microcode_pdev); + } + + lock_cpu_hotplug(); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + unlock_cpu_hotplug(); + if (error) { + microcode_dev_exit(); + platform_device_unregister(microcode_pdev); + return error; + } + + register_hotcpu_notifier(&mc_cpu_notifier); + printk(KERN_INFO "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); return 0; @@ -515,9 +756,16 @@ static int __init microcode_init (void) static void __exit microcode_exit (void) { - misc_deregister(µcode_dev); + microcode_dev_exit(); + + unregister_hotcpu_notifier(&mc_cpu_notifier); + + lock_cpu_hotplug(); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + unlock_cpu_hotplug(); + + platform_device_unregister(microcode_pdev); } module_init(microcode_init) module_exit(microcode_exit) -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index a70b5fa0ef06..442aaf8c77eb 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -30,6 +30,7 @@ #include <asm/io_apic.h> #include <mach_apic.h> +#include <mach_apicdef.h> #include <mach_mpparse.h> #include <bios_ebda.h> @@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ -static unsigned int __devinitdata num_processors; +unsigned int __cpuinitdata num_processors; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mpc_oem_bus_info(m, str, translation_table[mpc_record]); +#if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { printk(KERN_WARNING "MP table busid value (%d) for bustype %s " " is too large, max. supported is %d\n", m->mpc_busid, str, MAX_MP_BUSSES - 1); return; } +#endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; @@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); - /* - * Well it seems all SMP boards in existence - * use ExtINT/LVT1 == LINT0 and - * NMI/LVT2 == LINT1 - the following check - * will show us if this assumptions is false. - * Until then we do not have to add baggage. - */ - if ((m->mpc_irqtype == mp_ExtINT) && - (m->mpc_destapiclint != 0)) - BUG(); - if ((m->mpc_irqtype == mp_NMI) && - (m->mpc_destapiclint != 1)) - BUG(); } #ifdef CONFIG_X86_NUMAQ @@ -822,8 +812,7 @@ int es7000_plat; #ifdef CONFIG_ACPI -void __init mp_register_lapic_address ( - u64 address) +void __init mp_register_lapic_address(u64 address) { mp_lapic_addr = (unsigned long) address; @@ -835,13 +824,10 @@ void __init mp_register_lapic_address ( Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } - -void __devinit mp_register_lapic ( - u8 id, - u8 enabled) +void __devinit mp_register_lapic (u8 id, u8 enabled) { struct mpc_config_processor processor; - int boot_cpu = 0; + int boot_cpu = 0; if (MAX_APICS - id <= 0) { printk(KERN_WARNING "Processor #%d invalid (max %d)\n", @@ -878,11 +864,9 @@ static struct mp_ioapic_routing { u32 pin_programmed[4]; } mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic ( - int gsi) +static int mp_find_ioapic (int gsi) { - int i = 0; + int i = 0; /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { @@ -895,15 +879,11 @@ static int mp_find_ioapic ( return -1; } - -void __init mp_register_ioapic ( - u8 id, - u32 address, - u32 gsi_base) +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { - int idx = 0; - int tmpid; + int idx = 0; + int tmpid; if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " @@ -949,16 +929,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - return; } - -void __init mp_override_legacy_irq ( - u8 bus_irq, - u8 polarity, - u8 trigger, - u32 gsi) +void __init +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { struct mpc_config_intsrc intsrc; int ioapic = -1; @@ -996,15 +970,13 @@ void __init mp_override_legacy_irq ( mp_irqs[mp_irq_entries] = intsrc; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!\n"); - - return; } void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; + int i = 0; + int ioapic = -1; /* * Fabricate the legacy ISA bus (bus #31). @@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void) #define MAX_GSI_NUM 4096 -int mp_register_gsi (u32 gsi, int triggering, int polarity) +int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; - static int pci_irq = 16; + int ioapic = -1; + int ioapic_pin = 0; + int idx, bit = 0; + static int pci_irq = 16; /* * Mapping between Global System Interrups, which * represent all possible interrupts, and IRQs diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acb351478e42..3e8e3adb0489 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -13,7 +13,6 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include <linux/config.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/module.h> @@ -21,83 +20,177 @@ #include <linux/sysdev.h> #include <linux/sysctl.h> #include <linux/percpu.h> +#include <linux/dmi.h> +#include <linux/kprobes.h> #include <asm/smp.h> #include <asm/nmi.h> +#include <asm/kdebug.h> #include <asm/intel_arch_perfmon.h> #include "mach_traps.h" -unsigned int nmi_watchdog = NMI_NONE; -extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; -static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -static unsigned int nmi_p4_cccr_val; -extern void show_registers(struct pt_regs *regs); +int unknown_nmi_panic; +int nmi_watchdog_enabled; -/* - * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: - * - it may be reserved by some other driver, or not - * - when not reserved by some other driver, it may be used for - * the NMI watchdog, or not - * - * This is maintained separately from nmi_active because the NMI - * watchdog may also be driven from the I/O APIC timer. +/* perfctr_nmi_owner tracks the ownership of the perfctr registers: + * evtsel_nmi_owner tracks the ownership of the event selection + * - different performance counters/ event selection may be reserved for + * different subsystems this reservation system just tries to coordinate + * things a little */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); -static unsigned int lapic_nmi_owner; -#define LAPIC_NMI_WATCHDOG (1<<0) -#define LAPIC_NMI_RESERVED (1<<1) +static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); +static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); + +/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) + */ +#define NMI_MAX_COUNTER_BITS 66 /* nmi_active: - * +1: the lapic NMI watchdog is active, but can be disabled - * 0: the lapic NMI watchdog has not been set up, and cannot + * >0: the lapic NMI watchdog is active, but can be disabled + * <0: the lapic NMI watchdog has not been set up, and cannot * be enabled - * -1: the lapic NMI watchdog is disabled, but can be enabled + * 0: the lapic NMI watchdog is disabled, but can be enabled */ -int nmi_active; +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING +unsigned int nmi_watchdog = NMI_DEFAULT; +static unsigned int nmi_hz = HZ; -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED +struct nmi_watchdog_ctlblk { + int enabled; + u64 check_bit; + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); -#define MSR_P4_MISC_ENABLE 0x1A0 -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) -#define MSR_P4_PERFCTR0 0x300 -#define MSR_P4_CCCR0 0x360 -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS (1<<3) -#define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - CRU_ESCR0 (with any non-null event selector) through a complemented - max threshold. [IA32-Vol3, Section 14.9.9] */ -#define MSR_P4_IQ_COUNTER0 0x30C -#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) -#define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ - P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +/* local prototypes */ +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK +extern void show_registers(struct pt_regs *regs); +extern int unknown_nmi_panic; + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; +} + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; +} + +/* checks for a bit availability (hack for oprofile) */ +int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) +{ + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); +} + +/* checks the an msr for availability */ +int avail_to_resrv_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); +} + +int reserve_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) + return 1; + return 0; +} + +void release_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) + return 1; + return 0; +} + +void release_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); +} + +static __cpuinit inline int nmi_known_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return 1; + else + return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); + } + return 0; +} #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when @@ -125,7 +218,18 @@ static int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - if (nmi_watchdog == NMI_NONE) + /* Enable NMI watchdog for newer systems. + Actually it should be safe for most systems before 2004 too except + for some IBM systems that corrupt registers when NMI happens + during SMM. Unfortunately we don't have more exact information + on these and use this coarse check. */ + if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004) + nmi_watchdog = NMI_LOCAL_APIC; + + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) + return 0; + + if (!atomic_read(&nmi_active)) return 0; prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); @@ -149,25 +253,45 @@ static int __init check_nmi_watchdog(void) if (!cpu_isset(cpu, cpu_callin_map)) continue; #endif + if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) + continue; if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], nmi_count(cpu)); - nmi_active = 0; - lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; - kfree(prev_nmi_count); - return -1; + per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; + atomic_dec(&nmi_active); } } + if (!atomic_read(&nmi_active)) { + kfree(prev_nmi_count); + atomic_set(&nmi_active, -1); + return -1; + } endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) + if (nmi_watchdog == NMI_LOCAL_APIC) { + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + nmi_hz = 1; + /* + * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && + ((u64)cpu_khz * 1000) > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + nmi_hz = count + 1; + } + } kfree(prev_nmi_count); return 0; @@ -181,124 +305,70 @@ static int __init setup_nmi_watchdog(char *str) get_option(&str, &nmi); - if (nmi >= NMI_INVALID) + if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) return 0; - if (nmi == NMI_NONE) - nmi_watchdog = nmi; /* * If any other x86 CPU has a local APIC, then * please test the NMI stuff there and send me the * missing bits. Right now Intel P6/P4 and AMD K7 only. */ - if ((nmi == NMI_LOCAL_APIC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) - nmi_watchdog = nmi; - if ((nmi == NMI_LOCAL_APIC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) - nmi_watchdog = nmi; - /* - * We can enable the IO-APIC watchdog - * unconditionally. - */ - if (nmi == NMI_IO_APIC) { - nmi_active = 1; - nmi_watchdog = nmi; - } + if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) + return 0; /* no lapic support */ + nmi_watchdog = nmi; return 1; } __setup("nmi_watchdog=", setup_nmi_watchdog); -static void disable_intel_arch_watchdog(void); - static void disable_lapic_nmi_watchdog(void) { - if (nmi_active <= 0) + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) return; - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - wrmsr(MSR_K7_EVNTSEL0, 0, 0); - break; - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - disable_intel_arch_watchdog(); - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) - break; - wrmsr(MSR_P6_EVNTSEL0, 0, 0); - break; - case 15: - if (boot_cpu_data.x86_model > 0x4) - break; + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wrmsr(MSR_P4_IQ_CCCR0, 0, 0); - wrmsr(MSR_P4_CRU_ESCR0, 0, 0); - break; - } - break; - } - nmi_active = -1; - /* tell do_nmi() and others that we're not active any more */ - nmi_watchdog = 0; + BUG_ON(atomic_read(&nmi_active) != 0); } static void enable_lapic_nmi_watchdog(void) { - if (nmi_active < 0) { - nmi_watchdog = NMI_LOCAL_APIC; - setup_apic_nmi_watchdog(); - } -} + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); -int reserve_lapic_nmi(void) -{ - unsigned int old_owner; - - spin_lock(&lapic_nmi_owner_lock); - old_owner = lapic_nmi_owner; - lapic_nmi_owner |= LAPIC_NMI_RESERVED; - spin_unlock(&lapic_nmi_owner_lock); - if (old_owner & LAPIC_NMI_RESERVED) - return -EBUSY; - if (old_owner & LAPIC_NMI_WATCHDOG) - disable_lapic_nmi_watchdog(); - return 0; -} + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; -void release_lapic_nmi(void) -{ - unsigned int new_owner; + /* are we lapic aware */ + if (nmi_known_cpu() <= 0) + return; - spin_lock(&lapic_nmi_owner_lock); - new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; - lapic_nmi_owner = new_owner; - spin_unlock(&lapic_nmi_owner_lock); - if (new_owner & LAPIC_NMI_WATCHDOG) - enable_lapic_nmi_watchdog(); + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + touch_nmi_watchdog(); } void disable_timer_nmi_watchdog(void) { - if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) + BUG_ON(nmi_watchdog != NMI_IO_APIC); + + if (atomic_read(&nmi_active) <= 0) return; - unset_nmi_callback(); - nmi_active = -1; - nmi_watchdog = NMI_NONE; + disable_irq(0); + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + + BUG_ON(atomic_read(&nmi_active) != 0); } void enable_timer_nmi_watchdog(void) { - if (nmi_active < 0) { - nmi_watchdog = NMI_IO_APIC; + BUG_ON(nmi_watchdog != NMI_IO_APIC); + + if (atomic_read(&nmi_active) == 0) { touch_nmi_watchdog(); - nmi_active = 1; + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + enable_irq(0); } } @@ -308,15 +378,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { - nmi_pm_active = nmi_active; - disable_lapic_nmi_watchdog(); + /* only CPU0 goes here, other CPUs should be offline */ + nmi_pm_active = atomic_read(&nmi_active); + stop_apic_nmi_watchdog(NULL); + BUG_ON(atomic_read(&nmi_active) != 0); return 0; } static int lapic_nmi_resume(struct sys_device *dev) { - if (nmi_pm_active > 0) - enable_lapic_nmi_watchdog(); + /* only CPU0 goes here, other CPUs should be offline */ + if (nmi_pm_active > 0) { + setup_apic_nmi_watchdog(NULL); + touch_nmi_watchdog(); + } return 0; } @@ -336,7 +411,13 @@ static int __init init_lapic_nmi_sysfs(void) { int error; - if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) + /* should really be a BUG_ON but b/c this is an + * init call, it just doesn't work. -dcz + */ + if (nmi_watchdog != NMI_LOCAL_APIC) + return 0; + + if ( atomic_read(&nmi_active) < 0 ) return 0; error = sysdev_class_register(&nmi_sysclass); @@ -354,138 +435,269 @@ late_initcall(init_lapic_nmi_sysfs); * Original code written by Keith Owens. */ -static void clear_msr_range(unsigned int base, unsigned int n) -{ - unsigned int i; - - for(i = 0; i < n; ++i) - wrmsr(base+i, 0, 0); -} - -static void write_watchdog_counter(const char *descr) +static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) { u64 count = (u64)cpu_khz * 1000; do_div(count, nmi_hz); if(descr) Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(nmi_perfctr_msr, 0 - count); + wrmsrl(perfctr_msr, 0 - count); } -static void setup_k7_watchdog(void) +/* Note that these events don't tick when the CPU idles. This means + the frequency varies with CPU load. */ + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = MSR_K7_PERFCTR0; + evntsel_msr = MSR_K7_EVNTSEL0; + if (!reserve_perfctr_nmi(perfctr_msr)) + goto fail; - nmi_perfctr_msr = MSR_K7_PERFCTR0; + if (!reserve_evntsel_nmi(evntsel_msr)) + goto fail1; - clear_msr_range(MSR_K7_EVNTSEL0, 4); - clear_msr_range(MSR_K7_PERFCTR0, 4); + wrmsrl(perfctr_msr, 0UL); evntsel = K7_EVNTSEL_INT | K7_EVNTSEL_OS | K7_EVNTSEL_USR | K7_NMI_EVENT; - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - write_watchdog_counter("K7_PERFCTR0"); + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL<<63; + return 1; +fail1: + release_perfctr_nmi(perfctr_msr); +fail: + return 0; +} + +static void stop_k7_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); + + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); } -static void setup_p6_watchdog(void) +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = MSR_P6_PERFCTR0; + evntsel_msr = MSR_P6_EVNTSEL0; + if (!reserve_perfctr_nmi(perfctr_msr)) + goto fail; - nmi_perfctr_msr = MSR_P6_PERFCTR0; + if (!reserve_evntsel_nmi(evntsel_msr)) + goto fail1; - clear_msr_range(MSR_P6_EVNTSEL0, 2); - clear_msr_range(MSR_P6_PERFCTR0, 2); + wrmsrl(perfctr_msr, 0UL); evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS | P6_EVNTSEL_USR | P6_NMI_EVENT; - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - write_watchdog_counter("P6_PERFCTR0"); + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL<<39; + return 1; +fail1: + release_perfctr_nmi(perfctr_msr); +fail: + return 0; +} + +static void stop_p6_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); + + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); } +/* Note that these events don't tick when the CPU idles. This means + the frequency varies with CPU load. */ + +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +#define P4_CCCR_OVF (1<<31) +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ + static int setup_p4_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) return 0; - nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; - nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; #ifdef CONFIG_SMP - if (smp_num_siblings == 2) - nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else #endif + ht_num = 0; - if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) - clear_msr_range(0x3F1, 2); - /* MSR 0x3F0 seems to have a default value of 0xFC00, but current - docs doesn't fully define it, so leave it alone for now. */ - if (boot_cpu_data.x86_model >= 0x3) { - /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ - clear_msr_range(0x3A0, 26); - clear_msr_range(0x3BC, 3); + /* performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); } else { - clear_msr_range(0x3A0, 31); + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); } - clear_msr_range(0x3C0, 6); - clear_msr_range(0x3C8, 6); - clear_msr_range(0x3E0, 2); - clear_msr_range(MSR_P4_CCCR0, 18); - clear_msr_range(MSR_P4_PERFCTR0, 18); - - wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - write_watchdog_counter("P4_IQ_COUNTER0"); + + if (!reserve_perfctr_nmi(perfctr_msr)) + goto fail; + + if (!reserve_evntsel_nmi(evntsel_msr)) + goto fail1; + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + wd->check_bit = 1ULL<<39; return 1; +fail1: + release_perfctr_nmi(perfctr_msr); +fail: + return 0; } -static void disable_intel_arch_watchdog(void) +static void stop_p4_watchdog(void) { - unsigned ebx; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebp indicates event present. - */ - ebx = cpuid_ebx(10); - if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); + + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); } +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + static int setup_intel_arch_watchdog(void) { + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; - unsigned ebx; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); /* * Check whether the Architectural PerfMon supports * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebp indicates event present. + * NOTE: Corresponding bit = 0 in ebx indicates event present. */ - ebx = cpuid_ebx(10); - if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + goto fail; + + perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; - nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + if (!reserve_perfctr_nmi(perfctr_msr)) + goto fail; - clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); - clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); + if (!reserve_evntsel_nmi(evntsel_msr)) + goto fail1; + + wrmsrl(perfctr_msr, 0UL); evntsel = ARCH_PERFMON_EVENTSEL_INT | ARCH_PERFMON_EVENTSEL_OS @@ -493,51 +705,145 @@ static int setup_intel_arch_watchdog(void) | ARCH_PERFMON_NMI_EVENT_SEL | ARCH_PERFMON_NMI_EVENT_UMASK; - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); - write_watchdog_counter("INTEL_ARCH_PERFCTR0"); + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL << (eax.split.bit_width - 1); return 1; +fail1: + release_perfctr_nmi(perfctr_msr); +fail: + return 0; } -void setup_apic_nmi_watchdog (void) +static void stop_intel_arch_watchdog(void) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) - return; - setup_k7_watchdog(); - break; - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - if (!setup_intel_arch_watchdog()) + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return; + + wrmsr(wd->evntsel_msr, 0, 0); + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); +} + +void setup_apic_nmi_watchdog (void *unused) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* only support LOCAL and IO APICs for now */ + if ((nmi_watchdog != NMI_LOCAL_APIC) && + (nmi_watchdog != NMI_IO_APIC)) + return; + + if (wd->enabled == 1) + return; + + /* cheap hack to support suspend/resume */ + /* if cpu0 is not active neither should the other cpus */ + if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) + return; + + if (nmi_watchdog == NMI_LOCAL_APIC) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) return; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) + if (!setup_k7_watchdog()) return; - - setup_p6_watchdog(); break; - case 15: - if (boot_cpu_data.x86_model > 0x4) - return; + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + if (!setup_intel_arch_watchdog()) + return; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + return; + + if (!setup_p6_watchdog()) + return; + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + return; - if (!setup_p4_watchdog()) + if (!setup_p4_watchdog()) + return; + break; + default: return; + } break; default: return; } - break; - default: + } + wd->enabled = 1; + atomic_inc(&nmi_active); +} + +void stop_apic_nmi_watchdog(void *unused) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* only support LOCAL and IO APICs for now */ + if ((nmi_watchdog != NMI_LOCAL_APIC) && + (nmi_watchdog != NMI_IO_APIC)) + return; + + if (wd->enabled == 0) return; + + if (nmi_watchdog == NMI_LOCAL_APIC) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + stop_k7_watchdog(); + break; + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + stop_intel_arch_watchdog(); + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + break; + stop_p6_watchdog(); + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + break; + stop_p4_watchdog(); + break; + } + break; + default: + return; + } } - lapic_nmi_owner = LAPIC_NMI_WATCHDOG; - nmi_active = 1; + wd->enabled = 0; + atomic_dec(&nmi_active); } /* @@ -579,7 +885,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { /* @@ -588,11 +894,23 @@ void nmi_watchdog_tick (struct pt_regs * regs) * smp_processor_id(). */ unsigned int sum; + int touched = 0; int cpu = smp_processor_id(); + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 dummy; + int rc=0; + + /* check for other users first */ + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) { + rc = 1; + touched = 1; + } sum = per_cpu(irq_stat, cpu).apic_timer_irqs; - if (last_irq_sums[cpu] == sum) { + /* if the apic timer isn't firing, this cpu isn't doing much */ + if (!touched && last_irq_sums[cpu] == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... @@ -607,27 +925,59 @@ void nmi_watchdog_tick (struct pt_regs * regs) last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } - if (nmi_perfctr_msr) { - if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. + /* see if the nmi watchdog went off */ + if (wd->enabled) { + if (nmi_watchdog == NMI_LOCAL_APIC) { + rdmsrl(wd->perfctr_msr, dummy); + if (dummy & wd->check_bit){ + /* this wasn't a watchdog timer interrupt */ + goto done; + } + + /* only Intel P4 uses the cccr msr */ + if (wd->cccr_msr != 0) { + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + } + else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { + /* P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + } + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL); + rc = 1; + } else if (nmi_watchdog == NMI_IO_APIC) { + /* don't know how to accurately check for this. + * just assume it was a watchdog timer interrupt + * This matches the old behaviour. */ - wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); - apic_write(APIC_LVTPC, APIC_DM_NMI); + rc = 1; } - else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || - nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { - /* Only P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - } - write_watchdog_counter(NULL); } +done: + return rc; +} + +int do_nmi_callback(struct pt_regs * regs, int cpu) +{ +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; } #ifdef CONFIG_SYSCTL @@ -637,36 +987,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) unsigned char reason = get_nmi_reason(); char buf[64]; - if (!(reason & 0xc0)) { - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - } + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(regs, buf); return 0; } /* - * proc handler for /proc/sys/kernel/unknown_nmi_panic + * proc handler for /proc/sys/kernel/nmi */ -int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { int old_state; - old_state = unknown_nmi_panic; + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!unknown_nmi_panic) + if (!!old_state == !!nmi_watchdog_enabled) return 0; - if (unknown_nmi_panic) { - if (reserve_lapic_nmi() < 0) { - unknown_nmi_panic = 0; - return -EBUSY; - } else { - set_nmi_callback(unknown_nmi_panic_callback); - } + if (atomic_read(&nmi_active) < 0) { + printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EIO; + } + + if (nmi_watchdog == NMI_DEFAULT) { + if (nmi_known_cpu() > 0) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); } else { - release_lapic_nmi(); - unset_nmi_callback(); + printk( KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; } return 0; } @@ -675,7 +1035,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); -EXPORT_SYMBOL(reserve_lapic_nmi); -EXPORT_SYMBOL(release_lapic_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); +EXPORT_SYMBOL(reserve_perfctr_nmi); +EXPORT_SYMBOL(release_perfctr_nmi); +EXPORT_SYMBOL(reserve_evntsel_nmi); +EXPORT_SYMBOL(release_evntsel_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 8657c739656a..dad02a960e03 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> +#include <linux/personality.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -296,9 +297,9 @@ void show_regs(struct pt_regs * regs) if (user_mode_vm(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), system_utsname.release, - (int)strcspn(system_utsname.version, " "), - system_utsname.version); + regs->eflags, print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", @@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs) * the "args". */ extern void kernel_thread_helper(void); -__asm__(".section .text\n" - ".align 4\n" - "kernel_thread_helper:\n\t" - "movl %edx,%eax\n\t" - "pushl %edx\n\t" - "call *%ebx\n\t" - "pushl %eax\n\t" - "call do_exit\n" - ".previous"); /* * Create a kernel thread @@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xes = __USER_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ @@ -433,13 +425,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, + IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES); set_tsk_thread_flag(p, TIF_IO_BITMAP); } @@ -905,7 +896,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) unsigned long arch_align_stack(unsigned long sp) { - if (randomize_va_space) + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index d3db03f4085d..775f50e9395b 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ return addr; } -static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) +static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) { int i, copied; - unsigned char opcode[16]; + unsigned char opcode[15]; unsigned long addr = convert_eip_to_linear(child, regs); copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); for (i = 0; i < copied; i++) { switch (opcode[i]) { - /* popf */ - case 0x9d: + /* popf and iret */ + case 0x9d: case 0xcf: return 1; /* opcode and address size prefixes */ case 0x66: case 0x67: @@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child) * don't mark it as being "us" that set it, so that we * won't clear it by hand later. */ - if (is_at_popf(child, regs)) + if (is_setting_trap_flag(child, regs)) return; child->ptrace |= PT_DTRACE; diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S index d312616effa1..f151d6fae462 100644 --- a/arch/i386/kernel/relocate_kernel.S +++ b/arch/i386/kernel/relocate_kernel.S @@ -7,16 +7,138 @@ */ #include <linux/linkage.h> +#include <asm/page.h> +#include <asm/kexec.h> + +/* + * Must be relocatable PIC code callable as a C function + */ + +#define PTR(x) (x << 2) +#define PAGE_ALIGNED (1 << PAGE_SHIFT) +#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ +#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ + + .text + .align PAGE_ALIGNED + .globl relocate_kernel +relocate_kernel: + movl 8(%esp), %ebp /* list of pages */ + +#ifdef CONFIG_X86_PAE + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_0)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_1)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#else + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#endif - /* - * Must be relocatable PIC code callable as a C function, that once - * it starts can not use the previous processes stack. - */ - .globl relocate_new_kernel relocate_new_kernel: /* read the arguments and say goodbye to the stack */ movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* reboot_code_buffer */ + movl 8(%esp), %ebp /* list of pages */ movl 12(%esp), %edx /* start address */ movl 16(%esp), %ecx /* cpu_has_pae */ @@ -24,11 +146,26 @@ relocate_new_kernel: pushl $0 popfl - /* set a new stack at the bottom of our page... */ - lea 4096(%ebp), %esp + /* get physical address of control page now */ + /* this is impossible after page table switch */ + movl PTR(PA_CONTROL_PAGE)(%ebp), %edi - /* store the parameters back on the stack */ - pushl %edx /* store the start address */ + /* switch to new set of page tables */ + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + + /* jump to identity mapped page */ + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax + ret + +identity_mapped: + /* store the start address on the stack */ + pushl %edx /* Set cr0 to a known state: * 31 0 == Paging disabled @@ -113,8 +250,3 @@ relocate_new_kernel: xorl %edi, %edi xorl %ebp, %ebp ret -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c deleted file mode 100644 index 98352c374c76..000000000000 --- a/arch/i386/kernel/semaphore.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * i386 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> - */ -#include <asm/semaphore.h> - -/* - * The semaphore operations have a special calling sequence that - * allow us to do a simpler in-line version of them. These routines - * need to convert that sequence back into the C sequence when - * there is contention on the semaphore. - * - * %eax contains the semaphore pointer on entry. Save the C-clobbered - * registers (%eax, %edx and %ecx) except %eax whish is either a return - * value or just clobbered.. - */ -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down_interruptible\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down_trylock\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __up\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" - "ret" -); - -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK_PREFIX "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK_PREFIX "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 16d99444cf66..000cf03751fe 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI - int acpi_disabled = 0; -#else - int acpi_disabled = 1; -#endif -EXPORT_SYMBOL(acpi_disabled); - -#ifdef CONFIG_ACPI -int __initdata acpi_force = 0; -extern acpi_interrupt_flags acpi_sci_flags; -#endif - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA @@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void generic_apic_probe(char *); extern int root_mountflags; unsigned long saved_videomode; @@ -222,9 +209,6 @@ static struct resource adapter_rom_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -286,9 +270,6 @@ static struct resource standard_io_resources[] = { { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) static int __init romchecksum(unsigned char *rom, unsigned long length) @@ -345,7 +326,7 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -701,238 +682,132 @@ static inline void copy_edd(void) } #endif -static void __init parse_cmdline_early (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - int userdef = 0; +static int __initdata user_defined_memmap = 0; - /* Save unparsed command line copy for /proc/cmdline */ - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to <mem>, overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * <start> to <start>+<mem>, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; - for (;;) { - if (c != ' ') - goto next_char; - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to <mem>, overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * <start> to <start>+<mem>, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] + if (strcmp(arg, "nopentium") == 0) { + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. */ - if (!memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; - - mem_size = memparse(from+4, &from); - limit_regions(mem_size); - userdef=1; - } - } - - else if (!memcmp(from, "memmap=", 7)) { - if (to != command_line) - to--; - if (!memcmp(from+7, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - from += 8+7; - e820.nr_map = 0; - userdef = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; + unsigned long long mem_size; - mem_size = memparse(from+7, &from); - if (*from == '@') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*from == '#') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*from == '$') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - userdef=1; - } - } - } - - else if (!memcmp(from, "noexec=", 7)) - noexec_setup(from + 7); + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; -#ifdef CONFIG_X86_SMP - /* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. */ - else if (!memcmp(from, "maxcpus=", 8)) { - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(from + 8, NULL, 0); - } + find_max_pfn(); + saved_max_pfn = max_pfn; #endif - -#ifdef CONFIG_ACPI - /* "acpi=off" disables both ACPI table parsing and interpreter */ - else if (!memcmp(from, "acpi=off", 8)) { - disable_acpi(); - } - - /* acpi=force to over-ride black-list */ - else if (!memcmp(from, "acpi=force", 10)) { - acpi_force = 1; - acpi_ht = 1; - acpi_disabled = 0; - } - - /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { - acpi_strict = 1; - } - - /* Limit ACPI just to boot-time to enable HT */ - else if (!memcmp(from, "acpi=ht", 7)) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ - else if (!memcmp(from, "pci=noacpi", 10)) { - acpi_disable_pci(); - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (!memcmp(from, "acpi=noirq", 10)) { - acpi_noirq_set(); + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; } + } + return 0; +} +early_param("memmap", parse_memmap); - else if (!memcmp(from, "acpi_sci=edge", 13)) - acpi_sci_flags.trigger = 1; - - else if (!memcmp(from, "acpi_sci=level", 14)) - acpi_sci_flags.trigger = 3; - - else if (!memcmp(from, "acpi_sci=high", 13)) - acpi_sci_flags.polarity = 1; - - else if (!memcmp(from, "acpi_sci=low", 12)) - acpi_sci_flags.polarity = 3; - -#ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) - acpi_skip_timer_override = 1; - - if (!memcmp(from, "disable_timer_pin_1", 19)) - disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) - disable_timer_pin_1 = -1; +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable IO-APIC */ - else if (!memcmp(from, "noapic", 6)) - disable_ioapic_setup(); -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ -#ifdef CONFIG_X86_LOCAL_APIC - /* enable local APIC */ - else if (!memcmp(from, "lapic", 5)) - lapic_enable(); +/* + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. + */ +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable local APIC */ - else if (!memcmp(from, "nolapic", 6)) - lapic_disable(); -#endif /* CONFIG_X86_LOCAL_APIC */ + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); -#ifdef CONFIG_KEXEC - /* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never set's it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ - else if (!memcmp(from, "crashkernel=", 12)) { - unsigned long size, base; - size = memparse(from+12, &from); - if (*from == '@') { - base = memparse(from+1, &from); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - } -#endif -#ifdef CONFIG_PROC_VMCORE - /* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ - else if (!memcmp(from, "elfcorehdr=", 11)) - elfcorehdr_addr = memparse(from+11, &from); -#endif +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the + * vmalloc area - the default is 128m. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; - /* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ - else if (!memcmp(from, "highmem=", 8)) - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; - - /* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ - else if (!memcmp(from, "vmalloc=", 8)) - __VMALLOC_RESERVE = memparse(from+8, &from); - - next_char: - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - if (userdef) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + __VMALLOC_RESERVE = memparse(arg, &arg); + return 0; } +early_param("vmalloc", parse_vmalloc); /* * reservetop=size reserves a hole at the top of the kernel address space which @@ -1189,6 +1064,14 @@ static unsigned long __init setup_memory(void) } printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); @@ -1200,22 +1083,20 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - unsigned int max_dma, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = highend_pfn - low; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn}; + add_active_range(0, 0, highend_pfn); +#else + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn}; + add_active_range(0, 0, max_low_pfn); #endif - } - free_area_init(zones_size); + + free_area_init_nodes(max_zone_pfns); } #else extern unsigned long __init setup_memory(void); @@ -1385,7 +1266,7 @@ static int __init request_standard_resources(void) request_resource(&iomem_resource, &video_ram_resource); /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); return 0; } @@ -1518,17 +1399,15 @@ void __init setup_arch(char **cmdline_p) data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; - parse_cmdline_early(cmdline_p); + parse_early_param(); -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); } -#endif + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; max_low_pfn = setup_memory(); @@ -1557,7 +1436,7 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH - generic_apic_probe(*cmdline_p); + generic_apic_probe(); #endif if (efi_enabled) efi_map_memmap(); @@ -1569,9 +1448,11 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif +#ifdef CONFIG_PCI #ifdef CONFIG_X86_IO_APIC check_acpi_pci(); /* Checks more than just ACPI actually */ #endif +#endif #ifdef CONFIG_ACPI acpi_boot_init(); diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 465188e2d701..1b080ab8a49f 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -700,3 +700,30 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, return 0; } EXPORT_SYMBOL(smp_call_function_single); + +static int convert_apicid_to_cpu(int apic_id) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) { + if (x86_cpu_to_apicid[i] == apic_id) + return i; + } + return -1; +} + +int safe_smp_processor_id(void) +{ + int apicid, cpuid; + + if (!boot_cpu_has(X86_FEATURE_APIC)) + return 0; + + apicid = hard_smp_processor_id(); + if (apicid == BAD_APICID) + return 0; + + cpuid = convert_apicid_to_cpu(apicid); + + return cpuid >= 0 ? cpuid : 0; +} diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index efe07990e7fc..4bb8b77cd65b 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -102,6 +102,8 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); +u8 apicid_2_node[MAX_APICID]; + /* * Trampoline 80x86 program as an array. */ @@ -177,6 +179,9 @@ static void __devinit smp_store_cpu_info(int id) */ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (num_possible_cpus() == 1) + goto valid_k7; + /* Athlon 660/661 is valid. */ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) goto valid_k7; @@ -607,6 +612,7 @@ extern struct { /* which logical CPUs are on which nodes */ cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; +EXPORT_SYMBOL(node_2_cpu_mask); /* which node each logical CPU is on */ int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); @@ -951,6 +957,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); + x86_cpu_to_apicid[cpu] = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -1055,7 +1062,7 @@ static void __cpuinit do_warm_boot_cpu(void *p) static int __cpuinit __smp_prepare_cpu(int cpu) { - DECLARE_COMPLETION(done); + DECLARE_COMPLETION_ONSTACK(done); struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; @@ -1376,7 +1383,8 @@ int __cpu_disable(void) */ if (cpu == 0) return -EBUSY; - + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ local_irq_enable(); @@ -1490,3 +1498,16 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } + +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus); diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 83db411b3aa7..f7e735c077c3 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -30,6 +30,7 @@ #include <linux/nodemask.h> #include <asm/srat.h> #include <asm/topology.h> +#include <asm/smp.h> /* * proximity macros and definitions @@ -54,8 +55,7 @@ struct node_memory_chunk_s { static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; static int num_memory_chunks; /* total number of memory chunks */ -static int zholes_size_init; -static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; +static u8 __initdata apicid_to_pxm[MAX_APICID]; extern void * boot_ioremap(unsigned long, unsigned long); @@ -71,6 +71,8 @@ static void __init parse_cpu_affinity_structure(char *p) /* mark this node as "seen" in node bitmap */ BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); + apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain; + printk("CPU 0x%02X in proximity domain 0x%02X\n", cpu_affinity->apic_id, cpu_affinity->proximity_domain); } @@ -135,47 +137,6 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -/* Take a chunk of pages from page frame cstart to cend and count the number - * of pages in each zone, returned via zones[]. - */ -static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, - unsigned long *zones) -{ - unsigned long max_dma; - extern unsigned long max_low_pfn; - - int z; - unsigned long rend; - - /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide - * similarly scoped information and should be handled in a consistant - * manner. - */ - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - /* Split the hole into the zones in which it falls. Repeatedly - * take the segment in which the remaining hole starts, round it - * to the end of that zone. - */ - memset(zones, 0, MAX_NR_ZONES * sizeof(long)); - while (cstart < cend) { - if (cstart < max_dma) { - z = ZONE_DMA; - rend = (cend < max_dma)? cend : max_dma; - - } else if (cstart < max_low_pfn) { - z = ZONE_NORMAL; - rend = (cend < max_low_pfn)? cend : max_low_pfn; - - } else { - z = ZONE_HIGHMEM; - rend = cend; - } - zones[z] += rend - cstart; - cstart = rend; - } -} - /* * The SRAT table always lists ascending addresses, so can always * assume that the first "start" address that you see is the real @@ -220,7 +181,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); - memset(zholes_size, 0, sizeof(zholes_size)); num_memory_chunks = 0; while (p < end) { @@ -279,11 +239,15 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) printk("Number of logical nodes in system = %d\n", num_online_nodes()); printk("Number of memory chunks in system = %d\n", num_memory_chunks); + for (i = 0; i < MAX_APICID; i++) + apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); + for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); + add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); } for_each_online_node(nid) { @@ -392,57 +356,7 @@ int __init get_memcfg_from_srat(void) return acpi20_parse_srat((struct acpi_table_srat *)header); } out_err: + remove_all_active_ranges(); printk("failed to get NUMA memory information from SRAT table\n"); return 0; } - -/* For each node run the memory list to determine whether there are - * any memory holes. For each hole determine which ZONE they fall - * into. - * - * NOTE#1: this requires knowledge of the zone boundries and so - * _cannot_ be performed before those are calculated in setup_memory. - * - * NOTE#2: we rely on the fact that the memory chunks are ordered by - * start pfn number during setup. - */ -static void __init get_zholes_init(void) -{ - int nid; - int c; - int first; - unsigned long end = 0; - - for_each_online_node(nid) { - first = 1; - for (c = 0; c < num_memory_chunks; c++){ - if (node_memory_chunk[c].nid == nid) { - if (first) { - end = node_memory_chunk[c].end_pfn; - first = 0; - - } else { - /* Record any gap between this chunk - * and the previous chunk on this node - * against the zones it spans. - */ - chunk_to_zones(end, - node_memory_chunk[c].start_pfn, - &zholes_size[nid * MAX_NR_ZONES]); - } - } - } - } -} - -unsigned long * __init get_zholes_size(int nid) -{ - if (!zholes_size_init) { - zholes_size_init++; - get_zholes_init(); - } - if (nid >= MAX_NUMNODES || !node_online(nid)) - printk("%s: nid = %d is invalid/offline. num_online_nodes = %d", - __FUNCTION__, nid, num_online_nodes()); - return &zholes_size[nid * MAX_NR_ZONES]; -} diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c deleted file mode 100644 index e62a037ab399..000000000000 --- a/arch/i386/kernel/stacktrace.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * arch/i386/kernel/stacktrace.c - * - * Stack trace management functions - * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> - */ -#include <linux/sched.h> -#include <linux/stacktrace.h> - -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) -{ - return p > (void *)tinfo && - p < (void *)tinfo + THREAD_SIZE - 3; -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer: - */ -static inline unsigned long -save_context_stack(struct stack_trace *trace, unsigned int skip, - struct thread_info *tinfo, unsigned long *stack, - unsigned long ebp) -{ - unsigned long addr; - -#ifdef CONFIG_FRAME_POINTER - while (valid_stack_ptr(tinfo, (void *)ebp)) { - addr = *(unsigned long *)(ebp + 4); - if (!skip) - trace->entries[trace->nr_entries++] = addr; - else - skip--; - if (trace->nr_entries >= trace->max_entries) - break; - /* - * break out of recursive entries (such as - * end_of_stack_stop_unwind_function): - */ - if (ebp == *(unsigned long *)ebp) - break; - - ebp = *(unsigned long *)ebp; - } -#else - while (valid_stack_ptr(tinfo, stack)) { - addr = *stack++; - if (__kernel_text_address(addr)) { - if (!skip) - trace->entries[trace->nr_entries++] = addr; - else - skip--; - if (trace->nr_entries >= trace->max_entries) - break; - } - } -#endif - - return ebp; -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. - */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) -{ - unsigned long ebp; - unsigned long *stack = &ebp; - - WARN_ON(trace->nr_entries || !trace->max_entries); - - if (!task || task == current) { - /* Grab ebp right from our regs: */ - asm ("movl %%ebp, %0" : "=r" (ebp)); - } else { - /* ebp is the last reg pushed by switch_to(): */ - ebp = *(unsigned long *) task->thread.esp; - } - - while (1) { - struct thread_info *context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - - ebp = save_context_stack(trace, skip, context, stack, ebp); - stack = (unsigned long *)context->previous_esp; - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) - break; - trace->entries[trace->nr_entries++] = ULONG_MAX; - if (trace->nr_entries >= trace->max_entries) - break; - } -} - diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index 8fdb1fb17a5f..4048397f1740 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -21,6 +21,7 @@ #include <linux/utsname.h> #include <asm/uaccess.h> +#include <asm/unistd.h> #include <asm/ipc.h> /* @@ -210,7 +211,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name) if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -226,16 +227,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); - error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); - error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); - error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); - error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); - error |= __put_user(0,name->machine+__OLD_UTS_LEN); + error = __copy_to_user(&name->sysname, &utsname()->sysname, + __OLD_UTS_LEN); + error |= __put_user(0, name->sysname + __OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename, &utsname()->nodename, + __OLD_UTS_LEN); + error |= __put_user(0, name->nodename + __OLD_UTS_LEN); + error |= __copy_to_user(&name->release, &utsname()->release, + __OLD_UTS_LEN); + error |= __put_user(0, name->release + __OLD_UTS_LEN); + error |= __copy_to_user(&name->version, &utsname()->version, + __OLD_UTS_LEN); + error |= __put_user(0, name->version + __OLD_UTS_LEN); + error |= __copy_to_user(&name->machine, &utsname()->machine, + __OLD_UTS_LEN); + error |= __put_user(0, name->machine + __OLD_UTS_LEN); up_read(&uts_sem); @@ -243,3 +249,17 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) return error; } + + +/* + * Do a system call from kernel instead of calling sys_execve so we + * end up with proper pt_regs. + */ +int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +{ + long __res; + asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" + : "=a" (__res) + : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); + return __res; +} diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d4775398..7e639f78b0b9 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -317,3 +317,4 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_getcpu diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 1302e4ab3c4f..58a2d5582419 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -76,8 +76,6 @@ int pit_latch_buggy; /* extern */ unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); -extern unsigned long wall_jiffies; - DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -130,18 +128,33 @@ static int set_rtc_mmss(unsigned long nowtime) int timer_ack; -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) +#ifdef CONFIG_SMP + if (!user_mode_vm(regs) && in_lock_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->ebp + 4); - +#else + unsigned long *sp; + if ((regs->xcs & 3) == 0) + sp = (unsigned long *)®s->esp; + else + sp = (unsigned long *)regs->esp; + /* Return address is either directly at stack pointer + or above a saved eflags. Eflags has bits 22-31 zero, + kernel addresses don't. */ + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; +#endif + } +#endif return pc; } EXPORT_SYMBOL(profile_pc); -#endif /* * This is the same as the above, except we _also_ save the current @@ -314,7 +327,6 @@ static int timer_resume(struct sys_device *dev) do_settimeofday(&ts); write_seqlock_irqsave(&xtime_lock, flags); jiffies_64 += sleep_length; - wall_jiffies += sleep_length; write_sequnlock_irqrestore(&xtime_lock, flags); touch_softlockup_watchdog(); return 0; diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index e2e281d4bcc8..07d6da36a825 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -28,6 +28,7 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/nodemask.h> +#include <linux/mmzone.h> #include <asm/cpu.h> static struct i386_cpu cpu_devices[NR_CPUS]; @@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); #endif /*CONFIG_HOTPLUG_CPU*/ - - -#ifdef CONFIG_NUMA -#include <linux/mmzone.h> - static int __init topology_init(void) { int i; +#ifdef CONFIG_NUMA for_each_online_node(i) register_one_node(i); +#endif /* CONFIG_NUMA */ for_each_present_cpu(i) arch_register_cpu(i); return 0; } -#else /* !CONFIG_NUMA */ - -static int __init topology_init(void) -{ - int i; - - for_each_present_cpu(i) - arch_register_cpu(i); - return 0; -} - -#endif /* CONFIG_NUMA */ - subsys_initcall(topology_init); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4fcc6690be99..00489b706d27 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -28,6 +28,7 @@ #include <linux/kprobes.h> #include <linux/kexec.h> #include <linux/unwind.h> +#include <linux/uaccess.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -40,7 +41,6 @@ #include <asm/processor.h> #include <asm/system.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <asm/atomic.h> #include <asm/debugreg.h> @@ -51,11 +51,14 @@ #include <asm/smp.h> #include <asm/arch_hooks.h> #include <asm/kdebug.h> +#include <asm/stacktrace.h> #include <linux/module.h> #include "mach_traps.h" +int panic_on_unrecovered_nmi; + asmlinkage int system_call(void); struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, @@ -118,26 +121,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } -/* - * Print one address/symbol entries per line. - */ -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) -{ - printk(" [<%08lx>] ", addr); - - print_symbol("%s\n", addr); -} - static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - char *log_lvl) + struct stacktrace_ops *ops, void *data) { unsigned long addr; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + ops->address(data, addr); /* * break out of recursive entries (such as * end_of_stack_stop_unwind_function): @@ -150,30 +143,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + ops->address(data, addr); } #endif return ebp; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + static asmlinkage int -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +dump_trace_unwind(struct unwind_frame_info *info, void *data) { + struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; while (unwind(info) == 0 && UNW_PC(info)) { n++; - print_addr_and_symbol(UNW_PC(info), log_lvl); + oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; } return n; } -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl) +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, + struct stacktrace_ops *ops, void *data) { - unsigned long ebp; + unsigned long ebp = 0; if (!task) task = current; @@ -181,54 +181,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, if (call_trace >= 0) { int unw_ret = 0; struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; if (regs) { if (unwind_init_frame_info(&info, task, regs) == 0) - unw_ret = show_trace_unwind(&info, log_lvl); + unw_ret = dump_trace_unwind(&info, &oad); } else if (task == current) - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); else { if (unwind_init_blocked(&info, task) == 0) - unw_ret = show_trace_unwind(&info, log_lvl); + unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - print_symbol("DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - printk("Leftover inexact backtrace:\n"); + ops->warning(data, "Leftover inexact backtrace:\n"); stack = (void *)UNW_SP(&info); + if (!stack) + return; + ebp = UNW_FP(&info); } else - printk("Full inexact backtrace again:\n"); + ops->warning(data, "Full inexact backtrace again:\n"); } else if (call_trace >= 1) return; else - printk("Full inexact backtrace again:\n"); + ops->warning(data, "Full inexact backtrace again:\n"); } else - printk("Inexact backtrace:\n"); + ops->warning(data, "Inexact backtrace:\n"); + } + if (!stack) { + unsigned long dummy; + stack = &dummy; + if (task && task != current) + stack = (unsigned long *)task->thread.esp; } - if (task == current) { - /* Grab ebp right from our regs */ - asm ("movl %%ebp, %0" : "=r" (ebp) : ); - } else { - /* ebp is the last reg pushed by switch_to */ - ebp = *(unsigned long *) task->thread.esp; +#ifdef CONFIG_FRAME_POINTER + if (!ebp) { + if (task == current) { + /* Grab ebp right from our regs */ + asm ("movl %%ebp, %0" : "=r" (ebp) : ); + } else { + /* ebp is the last reg pushed by switch_to */ + ebp = *(unsigned long *) task->thread.esp; + } } +#endif while (1) { struct thread_info *context; context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = print_context_stack(context, stack, ebp, log_lvl); + ebp = print_context_stack(context, stack, ebp, ops, data); + /* Should be after the line below, but somewhere + in early boot context comes out corrupted and we + can't reference it -AK */ + if (ops->stack(data, "IRQ") < 0) + break; stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk("%s =======================\n", log_lvl); } } +EXPORT_SYMBOL(dump_trace); + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) +static int print_trace_stack(void *data, char *name) +{ + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr) +{ + printk("%s [<%08lx>] ", (char *)data, addr); + print_symbol("%s\n", addr); +} + +static struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long * stack, char *log_lvl) +{ + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); + printk("%s =======================\n", log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long * stack) { show_trace_log_lvl(task, regs, stack, ""); } @@ -291,12 +353,13 @@ void show_registers(struct pt_regs *regs) ss = regs->xss & 0xffff; } print_modules(); - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" - "EFLAGS: %08lx (%s %.*s) \n", + printk(KERN_EMERG "CPU: %d\n" + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, system_utsname.release, - (int)strcspn(system_utsname.version, " "), - system_utsname.version); + print_tainted(), regs->eflags, init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); @@ -348,7 +411,7 @@ static void handle_BUG(struct pt_regs *regs) if (eip < PAGE_OFFSET) return; - if (__get_user(ud2, (unsigned short __user *)eip)) + if (probe_kernel_address((unsigned short __user *)eip, ud2)) return; if (ud2 != 0x0b0f) return; @@ -361,7 +424,8 @@ static void handle_BUG(struct pt_regs *regs) char *file; char c; - if (__get_user(line, (unsigned short __user *)(eip + 2))) + if (probe_kernel_address((unsigned short __user *)(eip + 2), + line)) break; if (__get_user(file, (char * __user *)(eip + 4)) || (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) @@ -634,18 +698,24 @@ gp_in_kernel: } } -static void mem_parity_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " - "to continue\n"); + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " + "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You probably have a hardware problem with your RAM " "chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); } -static void io_check_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +io_check_error(unsigned char reason, struct pt_regs * regs) { unsigned long i; @@ -661,7 +731,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { #ifdef CONFIG_MCA /* Might actually be able to figure out what the guilty party @@ -671,15 +742,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) return; } #endif - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - printk("Dazed and confused, but trying to continue\n"); - printk("Do you have a strange power saving mode enabled?\n"); + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " + "CPU %d.\n", reason, smp_processor_id()); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } static DEFINE_SPINLOCK(nmi_print_lock); -void die_nmi (struct pt_regs *regs, const char *msg) +void __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) @@ -711,7 +785,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static __kprobes void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -728,12 +802,12 @@ static void default_do_nmi(struct pt_regs * regs) * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. */ - if (nmi_watchdog) { - nmi_watchdog_tick(regs); + if (nmi_watchdog_tick(regs, reason)) return; - } + if (!do_nmi_callback(regs, smp_processor_id())) #endif - unknown_nmi_error(reason, regs); + unknown_nmi_error(reason, regs); + return; } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -749,14 +823,7 @@ static void default_do_nmi(struct pt_regs * regs) reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) -{ - return 0; -} - -static nmi_callback_t nmi_callback = dummy_nmi_callback; - -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; @@ -766,25 +833,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - if (!rcu_dereference(nmi_callback)(regs, cpu)) - default_do_nmi(regs); + default_do_nmi(regs); nmi_exit(); } -void set_nmi_callback(nmi_callback_t callback) -{ - vmalloc_sync_all(); - rcu_assign_pointer(nmi_callback, callback); -} -EXPORT_SYMBOL_GPL(set_nmi_callback); - -void unset_nmi_callback(void) -{ - nmi_callback = dummy_nmi_callback; -} -EXPORT_SYMBOL_GPL(unset_nmi_callback); - #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { @@ -1124,20 +1177,6 @@ void __init trap_init_f00f_bug(void) } #endif -#define _set_gate(gate_addr,type,dpl,addr,seg) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" ((seg) << 16)); \ -} while (0) - - /* * This needs to use 'idt_table' rather than 'idt', and * thus use the _nonmapped_ version of the IDT, as the @@ -1146,7 +1185,7 @@ do { \ */ void set_intr_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); } /* @@ -1154,22 +1193,22 @@ void set_intr_gate(unsigned int n, void *addr) */ static inline void set_system_intr_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); + _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS); } static void __init set_trap_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS); } static void __init set_system_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS); } static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { - _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); + _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); } diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 7e0d8dab2075..b8fa0a8b2e47 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -void tsc_init(void) +void __init tsc_init(void) { if (!cpu_has_tsc || tsc_disable) return; diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile index 914933e9ec3d..d86a548b8d54 100644 --- a/arch/i386/lib/Makefile +++ b/arch/i386/lib/Makefile @@ -4,6 +4,6 @@ lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ - bitops.o + bitops.o semaphore.o lib-$(CONFIG_X86_USE_3DNOW) += mmx.o diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index 3c0714c4b669..f6edb11364df 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -11,7 +11,6 @@ */ #include <linux/module.h> -#include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> diff --git a/arch/i386/lib/semaphore.S b/arch/i386/lib/semaphore.S new file mode 100644 index 000000000000..ef6ad9e1a609 --- /dev/null +++ b/arch/i386/lib/semaphore.S @@ -0,0 +1,216 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> + */ + +#include <linux/linkage.h> +#include <asm/rwlock.h> +#include <asm/alternative-asm.i> +#include <asm/frame.i> +#include <asm/dwarf2.h> + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %eax contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax whish is either a return + * value or just clobbered.. + */ + .section .sched.text +ENTRY(__down_failed) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed) + +ENTRY(__down_failed_interruptible) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_interruptible + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_interruptible) + +ENTRY(__down_failed_trylock) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_trylock + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_trylock) + +ENTRY(__up_wakeup) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __up + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__up_wakeup) + +/* + * rw spinlock fallbacks + */ +#ifdef CONFIG_SMP +ENTRY(__write_lock_failed) + CFI_STARTPROC simple + FRAME +2: LOCK_PREFIX + addl $ RW_LOCK_BIAS,(%eax) +1: rep; nop + cmpl $ RW_LOCK_BIAS,(%eax) + jne 1b + LOCK_PREFIX + subl $ RW_LOCK_BIAS,(%eax) + jnz 2b + ENDFRAME + ret + CFI_ENDPROC + END(__write_lock_failed) + +ENTRY(__read_lock_failed) + CFI_STARTPROC + FRAME +2: LOCK_PREFIX + incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + LOCK_PREFIX + decl (%eax) + js 2b + ENDFRAME + ret + CFI_ENDPROC + END(__read_lock_failed) + +#endif + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_down_read_failed + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + calll rwsem_down_write_failed + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + CFI_STARTPROC + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call rwsem_wake + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 +1: ret + CFI_ENDPROC + END(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_downgrade_wake + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_downgrade_wake) + diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index efc7e7d5f4d0..08502fc6d0cb 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -739,7 +739,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && current->pid == 1) { + if (retval == -ENOMEM && is_init(current)) { up_read(¤t->mm->mmap_sem); blk_congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c index ef7a6e6fcb9f..33d9f93557ba 100644 --- a/arch/i386/mach-generic/bigsmp.c +++ b/arch/i386/mach-generic/bigsmp.c @@ -5,6 +5,7 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> +#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c index 845cdd0b3593..aa144d82334d 100644 --- a/arch/i386/mach-generic/es7000.c +++ b/arch/i386/mach-generic/es7000.c @@ -4,6 +4,7 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> +#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index bcd1bcfaa723..94b1fd9cbe3c 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/init.h> +#include <linux/errno.h> #include <asm/fixmap.h> #include <asm/mpspec.h> #include <asm/apicdef.h> @@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = { NULL, }; -static int cmdline_apic; +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + genapic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + return -ENOENT; +} +early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { @@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void) } } -void __init generic_apic_probe(char *command_line) +void __init generic_apic_probe(void) { - char *s; - int i; - int changed = 0; - - s = strstr(command_line, "apic="); - if (s && (s == command_line || isspace(s[-1]))) { - char *p = strchr(s, ' '), old; - if (!p) - p = strchr(s, '\0'); - old = *p; - *p = 0; - for (i = 0; !changed && apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, s+5)) { - changed = 1; + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { genapic = apic_probe[i]; + break; } } - if (!changed) - printk(KERN_ERR "Unknown genapic `%s' specified.\n", s); - *p = old; - cmdline_apic = changed; - } - for (i = 0; !changed && apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - changed = 1; - genapic = apic_probe[i]; - } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); } - /* Not visible without early console */ - if (!changed) - panic("Didn't find an APIC driver"); - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } @@ -119,7 +117,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +#ifdef CONFIG_SMP int hard_smp_processor_id(void) { return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); } +#endif diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c index b73501ddd653..f7e5d66648dc 100644 --- a/arch/i386/mach-generic/summit.c +++ b/arch/i386/mach-generic/summit.c @@ -4,6 +4,7 @@ #define APIC_DEFINITION 1 #include <linux/threads.h> #include <linux/cpumask.h> +#include <asm/smp.h> #include <asm/mpspec.h> #include <asm/genapic.h> #include <asm/fixmap.h> diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c index 828522541a88..5929f884d79b 100644 --- a/arch/i386/mach-visws/visws_apic.c +++ b/arch/i386/mach-visws/visws_apic.c @@ -1,5 +1,5 @@ /* - * linux/arch/i386/mach_visws/visws_apic.c + * linux/arch/i386/mach-visws/visws_apic.c * * Copyright (C) 1999 Bent Hagemark, Ingo Molnar * diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 6c86575ffdcb..856c73fcb7e7 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -99,6 +99,7 @@ static void do_boot_cpu(__u8 cpuid); static void do_quad_bootstrap(void); int hard_smp_processor_id(void); +int safe_smp_processor_id(void); /* Inline functions */ static inline void @@ -1247,6 +1248,12 @@ hard_smp_processor_id(void) return 0; } +int +safe_smp_processor_id(void) +{ + return hard_smp_processor_id(); +} + /* broadcast a halt to all other CPUs */ void smp_send_stop(void) diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index fb5d8b747de4..455597db84df 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -153,23 +153,7 @@ static void __init find_max_pfn_node(int nid) */ if (node_start_pfn[nid] > max_pfn) node_start_pfn[nid] = max_pfn; - if (node_start_pfn[nid] > node_end_pfn[nid]) - BUG(); -} - -/* Find the owning node for a pfn. */ -int early_pfn_to_nid(unsigned long pfn) -{ - int nid; - - for_each_node(nid) { - if (node_end_pfn[nid] == 0) - break; - if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) - return nid; - } - - return 0; + BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); } /* @@ -227,6 +211,8 @@ static unsigned long calculate_numa_remap_pages(void) unsigned long pfn; for_each_online_node(nid) { + unsigned old_end_pfn = node_end_pfn[nid]; + /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -276,6 +262,7 @@ static unsigned long calculate_numa_remap_pages(void) node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; + shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); @@ -322,6 +309,11 @@ unsigned long __init setup_memory(void) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = system_max_low_pfn; + high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); @@ -364,45 +356,22 @@ void __init numa_kva_reserve(void) void __init zone_sizes_init(void) { int nid; - - - for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned long *zholes_size; - unsigned int max_dma; - - unsigned long low = max_low_pfn; - unsigned long start = node_start_pfn[nid]; - unsigned long high = node_end_pfn[nid]; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (node_has_online_mem(nid)){ - if (start > low) { -#ifdef CONFIG_HIGHMEM - BUG_ON(start > high); - zones_size[ZONE_HIGHMEM] = high - start; -#endif - } else { - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - BUG_ON(max_dma > low); - BUG_ON(low > high); - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - } + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn + }; + + /* If SRAT has not registered memory, register it now */ + if (find_max_pfn_with_active_regions() == 0) { + for_each_online_node(nid) { + if (node_has_online_mem(nid)) + add_active_range(nid, node_start_pfn[nid], + node_end_pfn[nid]); } - - zholes_size = get_zholes_size(nid); - - free_area_init_node(nid, NODE_DATA(nid), zones_size, start, - zholes_size); } + + free_area_init_nodes(max_zone_pfns); return; } diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c index de03c5430abc..0ce4f22a2635 100644 --- a/arch/i386/mm/extable.c +++ b/arch/i386/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) + if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index f7279468323a..2581575786c1 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -27,21 +27,24 @@ #include <asm/uaccess.h> #include <asm/desc.h> #include <asm/kdebug.h> +#include <asm/segment.h> extern void die(const char *,struct pt_regs *,long); -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + int register_page_fault_notifier(struct notifier_block *nb) { vmalloc_sync_all(); return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(register_page_fault_notifier); int unregister_page_fault_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); static inline int notify_page_fault(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -55,14 +58,6 @@ static inline int notify_page_fault(enum die_val val, const char *str, }; return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); } -#else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - return NOTIFY_DONE; -} -#endif - /* * Unlock any spinlocks which will prevent us from getting the @@ -119,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, } /* The standard kernel/user address space limit. */ - *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(SEGMENT_IS_FLAT_CODE(seg))) return eip; /* Check the segment exists, is within the current LDT/GDT size, @@ -436,11 +431,7 @@ good_area: write = 0; switch (error_code & 3) { default: /* 3: write, present */ -#ifdef TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) - printk("WP fault at %08lx\n", regs->eip); -#endif - /* fall through */ + /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; @@ -449,7 +440,7 @@ good_area: case 1: /* read, present */ goto bad_area; case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; } @@ -598,7 +589,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (tsk->pid == 1) { + if (is_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index b6eb4dcb8777..f9f647cdbc7b 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -38,23 +38,20 @@ void *kmap_atomic(struct page *page, enum km_type type) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM if (!pte_none(*(kmap_pte-idx))) BUG(); -#endif set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); - __flush_tlb_one(vaddr); return (void*) vaddr; } void kunmap_atomic(void *kvaddr, enum km_type type) { -#ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) { // FIXME +#ifdef CONFIG_DEBUG_HIGHMEM + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { dec_preempt_count(); preempt_check_resched(); return; @@ -62,14 +59,14 @@ void kunmap_atomic(void *kvaddr, enum km_type type) if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) BUG(); - +#endif /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - __flush_tlb_one(vaddr); -#endif + kpte_clear_flush(kmap_pte-idx, vaddr); dec_preempt_count(); preempt_check_resched(); @@ -88,7 +85,6 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); - __flush_tlb_one(vaddr); return (void*) vaddr; } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index efd0bcdac65d..167416155ee4 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; * on Enable * off Disable */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE @@ -487,6 +493,7 @@ int __init set_kernel_exec(unsigned long vaddr, int enable) pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); else pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); + pte_update_defer(&init_mm, vaddr, pte); __flush_tlb_all(); out: return ret; @@ -552,18 +559,6 @@ static void __init test_wp_bit(void) } } -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - num_physpages = highend_pfn; -#else - num_physpages = max_low_pfn; -#endif -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif -} - static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) @@ -574,8 +569,7 @@ void __init mem_init(void) int bad_ppro; #ifdef CONFIG_FLATMEM - if (!mem_map) - BUG(); + BUG_ON(!mem_map); #endif bad_ppro = ppro_with_ram_bug(); @@ -590,14 +584,6 @@ void __init mem_init(void) } #endif - set_max_mapnr_init(); - -#ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 247fde76aaed..fff08ae7b5ed 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -21,82 +21,6 @@ #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 -static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pte_t *pte; - unsigned long pfn; - - pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(pmd, addr); - if (!pte) - return -ENOMEM; - do { - BUG_ON(!pte_none(*pte)); - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); - return 0; -} - -static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pmd_t *pmd; - unsigned long next; - - phys_addr -= addr; - pmd = pmd_alloc(&init_mm, pud, addr); - if (!pmd) - return -ENOMEM; - do { - next = pmd_addr_end(addr, end); - if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, flags)) - return -ENOMEM; - } while (pmd++, addr = next, addr != end); - return 0; -} - -static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pud_t *pud; - unsigned long next; - - phys_addr -= addr; - pud = pud_alloc(&init_mm, pgd, addr); - if (!pud) - return -ENOMEM; - do { - next = pud_addr_end(addr, end); - if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, flags)) - return -ENOMEM; - } while (pud++, addr = next, addr != end); - return 0; -} - -static int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, unsigned long flags) -{ - pgd_t *pgd; - unsigned long next; - int err; - - BUG_ON(addr >= end); - flush_cache_all(); - phys_addr -= addr; - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, end); - err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); - if (err) - break; - } while (pgd++, addr = next, addr != end); - flush_tlb_all(); - return err; -} - /* * Generic mapping function (not visible outside): */ @@ -115,6 +39,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l void __iomem * addr; struct vm_struct * area; unsigned long offset, last_addr; + pgprot_t prot; /* Don't allow wraparound or zero size */ last_addr = phys_addr + size - 1; @@ -142,6 +67,9 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l return NULL; } + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY + | _PAGE_ACCESSED | flags); + /* * Mappings have to be page-aligned */ @@ -158,7 +86,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l area->phys_addr = phys_addr; addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long) addr, - (unsigned long) addr + size, phys_addr, flags)) { + (unsigned long) addr + size, phys_addr, prot)) { vunmap((void __force *) addr); return NULL; } diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 5f8dc8a21bd7..3700eef78743 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -17,14 +17,15 @@ #include <asm/nmi.h> #include <asm/msr.h> #include <asm/apic.h> +#include <asm/kdebug.h> #include "op_counter.h" #include "op_x86_model.h" - + static struct op_x86_model_spec const * model; static struct op_msrs cpu_msrs[NR_CPUS]; static unsigned long saved_lvtpc[NR_CPUS]; - + static int nmi_start(void); static void nmi_stop(void); @@ -82,13 +83,24 @@ static void exit_driverfs(void) #define exit_driverfs() do { } while (0) #endif /* CONFIG_PM */ - -static int nmi_callback(struct pt_regs * regs, int cpu) +static int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - return model->check_ctrs(regs, &cpu_msrs[cpu]); + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + int cpu = smp_processor_id(); + + switch(val) { + case DIE_NMI: + if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } - - + static void nmi_cpu_save_registers(struct op_msrs * msrs) { unsigned int const nr_ctrs = model->num_counters; @@ -98,15 +110,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs) unsigned int i; for (i = 0; i < nr_ctrs; ++i) { - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); + if (counters[i].addr){ + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } } for (i = 0; i < nr_ctrls; ++i) { - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + if (controls[i].addr){ + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } } } @@ -170,27 +186,29 @@ static void nmi_cpu_setup(void * dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } +static struct notifier_block profile_exceptions_nb = { + .notifier_call = profile_exceptions_notify, + .next = NULL, + .priority = 0 +}; static int nmi_setup(void) { + int err=0; + if (!allocate_msrs()) return -ENOMEM; - /* We walk a thin line between law and rape here. - * We need to be careful to install our NMI handler - * without actually triggering any NMIs as this will - * break the core code horrifically. - */ - if (reserve_lapic_nmi() < 0) { + if ((err = register_die_notifier(&profile_exceptions_nb))){ free_msrs(); - return -EBUSY; + return err; } + /* We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ on_each_cpu(nmi_save_registers, NULL, 0, 1); on_each_cpu(nmi_cpu_setup, NULL, 0, 1); - set_nmi_callback(nmi_callback); nmi_enabled = 1; return 0; } @@ -205,15 +223,19 @@ static void nmi_restore_registers(struct op_msrs * msrs) unsigned int i; for (i = 0; i < nr_ctrls; ++i) { - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + if (controls[i].addr){ + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } } for (i = 0; i < nr_ctrs; ++i) { - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); + if (counters[i].addr){ + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } } } @@ -234,6 +256,7 @@ static void nmi_cpu_shutdown(void * dummy) apic_write(APIC_LVTPC, saved_lvtpc[cpu]); apic_write(APIC_LVTERR, v); nmi_restore_registers(msrs); + model->shutdown(msrs); } @@ -241,8 +264,7 @@ static void nmi_shutdown(void) { nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); - unset_nmi_callback(); - release_lapic_nmi(); + unregister_die_notifier(&profile_exceptions_nb); free_msrs(); } @@ -284,6 +306,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root) struct dentry * dir; char buf[4]; + /* quick little hack to _not_ expose a counter if it is not + * available for use. This should protect userspace app. + * NOTE: assumes 1:1 mapping here (that counters are organized + * sequentially in their struct assignment). + */ + if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + continue; + snprintf(buf, sizeof(buf), "%d", i); dir = oprofilefs_mkdir(sb, root, buf); oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index 930a1127bb30..abf0ba52a635 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -17,34 +17,49 @@ #include <asm/nmi.h> #include <asm/apic.h> #include <asm/ptrace.h> +#include <asm/kdebug.h> -static int nmi_timer_callback(struct pt_regs * regs, int cpu) +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - oprofile_add_sample(regs, 0); - return 1; + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch(val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, + .next = NULL, + .priority = 0 +}; + static int timer_start(void) { - disable_timer_nmi_watchdog(); - set_nmi_callback(nmi_timer_callback); + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; return 0; } static void timer_stop(void) { - enable_timer_nmi_watchdog(); - unset_nmi_callback(); + unregister_die_notifier(&profile_timer_exceptions_nb); synchronize_sched(); /* Allow already-started NMIs to complete. */ } int __init op_nmi_timer_init(struct oprofile_operations * ops) { - extern int nmi_active; - - if (nmi_active <= 0) + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) return -ENODEV; ops->start = timer_start; diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c index 693bdea4a52b..3057a19e4641 100644 --- a/arch/i386/oprofile/op_model_athlon.c +++ b/arch/i386/oprofile/op_model_athlon.c @@ -21,10 +21,12 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) @@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS]; static void athlon_fill_in_addresses(struct op_msrs * const msrs) { - msrs->counters[0].addr = MSR_K7_PERFCTR0; - msrs->counters[1].addr = MSR_K7_PERFCTR1; - msrs->counters[2].addr = MSR_K7_PERFCTR2; - msrs->counters[3].addr = MSR_K7_PERFCTR3; - - msrs->controls[0].addr = MSR_K7_EVNTSEL0; - msrs->controls[1].addr = MSR_K7_EVNTSEL1; - msrs->controls[2].addr = MSR_K7_EVNTSEL2; - msrs->controls[3].addr = MSR_K7_EVNTSEL3; + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } + + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } } @@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); CTRL_WRITE(low, high, msrs, i); } - + /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); @@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs, int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); @@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (!reset_value[i]) + continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } +static void athlon_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } +} struct op_x86_model_spec const op_athlon_spec = { .num_counters = NUM_COUNTERS, @@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = { .setup_ctrs = &athlon_setup_ctrs, .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, - .stop = &athlon_stop + .stop = &athlon_stop, + .shutdown = &athlon_shutdown }; diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index 7c61d357b82b..47925927b12f 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c @@ -32,7 +32,7 @@ #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) static unsigned int num_counters = NUM_COUNTERS_NON_HT; - +static unsigned int num_controls = NUM_CONTROLS_NON_HT; /* this has to be checked dynamically since the hyper-threadedness of a chip is discovered at @@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT; static inline void setup_num_counters(void) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (smp_num_siblings == 2){ num_counters = NUM_COUNTERS_HT2; + num_controls = NUM_CONTROLS_HT2; + } #endif } @@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT -/* All cccr we don't use. */ -static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { - MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, - MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, - MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, - MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, - MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 -}; - /* p4 event codes in libop/op_event.h are indices into this table. */ static struct p4_event_binding p4_events[NUM_EVENTS] = { @@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) @@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; static void p4_fill_in_addresses(struct op_msrs * const msrs) { unsigned int i; - unsigned int addr, stag; + unsigned int addr, cccraddr, stag; setup_num_counters(); stag = get_stagger(); - /* the counter registers we pay attention to */ + /* initialize some registers */ for (i = 0; i < num_counters; ++i) { - msrs->counters[i].addr = - p4_counters[VIRT_CTR(stag, i)].counter_address; + msrs->counters[i].addr = 0; } - - /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ - - /* 18 CCCR registers */ - for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; - addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + for (i = 0; i < num_controls; ++i) { + msrs->controls[i].addr = 0; } + /* the counter & cccr registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + addr = p4_counters[VIRT_CTR(stag, i)].counter_address; + cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; + if (reserve_perfctr_nmi(addr)){ + msrs->counters[i].addr = addr; + msrs->controls[i].addr = cccraddr; + } + } + /* 43 ESCR registers in three or four discontiguous group */ for (addr = MSR_P4_BSU_ESCR0 + stag; addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 @@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) if (boot_cpu_data.x86_model >= 0x3) { for (addr = MSR_P4_BSU_ESCR0 + stag; addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } } else { for (addr = MSR_P4_IQ_ESCR0 + stag; addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } } for (addr = MSR_P4_RAT_ESCR0 + stag; addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } for (addr = MSR_P4_MS_ESCR0 + stag; addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } for (addr = MSR_P4_IX_ESCR0 + stag; addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } /* there are 2 remaining non-contiguously located ESCRs */ if (num_counters == NUM_COUNTERS_NON_HT) { /* standard non-HT CPUs handle both remaining ESCRs*/ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; } else if (stag == 0) { /* HT CPUs give the first remainder to the even thread, as the 32nd control register */ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; } else { /* and two copies of the second to the odd thread, for the 22st and 23nd control registers */ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } } } @@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; - unsigned int addr; unsigned int stag; stag = get_stagger(); @@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); CCCR_SET_REQUIRED_BITS(low); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } - /* clear cccrs outside our concern */ - for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { - rdmsr(p4_unused_cccr[i], low, high); - CCCR_CLEAR(low); - CCCR_SET_REQUIRED_BITS(low); - wrmsr(p4_unused_cccr[i], low, high); - } - /* clear all escrs (including those outside our concern) */ - for (addr = MSR_P4_BSU_ESCR0 + stag; - addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { - wrmsr(addr, 0, 0); - } - - /* On older models clear also MSR_P4_IQ_ESCR0/1 */ - if (boot_cpu_data.x86_model < 0x3) { - wrmsr(MSR_P4_IQ_ESCR0, 0, 0); - wrmsr(MSR_P4_IQ_ESCR1, 0, 0); - } - - for (addr = MSR_P4_RAT_ESCR0 + stag; - addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { - wrmsr(addr, 0, 0); - } - - for (addr = MSR_P4_MS_ESCR0 + stag; - addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ - wrmsr(addr, 0, 0); - } - - for (addr = MSR_P4_IX_ESCR0 + stag; - addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ - wrmsr(addr, 0, 0); + for (i = num_counters; i < num_controls; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + wrmsr(msrs->controls[i].addr, 0, 0); } - if (num_counters == NUM_COUNTERS_NON_HT) { - wrmsr(MSR_P4_CRU_ESCR4, 0, 0); - wrmsr(MSR_P4_CRU_ESCR5, 0, 0); - } else if (stag == 0) { - wrmsr(MSR_P4_CRU_ESCR4, 0, 0); - } else { - wrmsr(MSR_P4_CRU_ESCR5, 0, 0); - } - /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); @@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs) stag = get_stagger(); for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); CCCR_SET_DISABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); } } +static void p4_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < num_counters ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(msrs->counters[i].addr); + } + /* some of the control registers are specially reserved in + * conjunction with the counter registers (hence the starting offset). + * This saves a few bits. + */ + for (i = num_counters ; i < num_controls ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(msrs->controls[i].addr); + } +} + #ifdef CONFIG_SMP struct op_x86_model_spec const op_p4_ht2_spec = { @@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, .start = &p4_start, - .stop = &p4_stop + .stop = &p4_stop, + .shutdown = &p4_shutdown }; #endif @@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = { .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, .start = &p4_start, - .stop = &p4_stop + .stop = &p4_stop, + .shutdown = &p4_shutdown }; diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index 5c3ab4b027ad..ca2447e05e15 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c @@ -22,10 +22,12 @@ #define NUM_COUNTERS 2 #define NUM_CONTROLS 2 +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) @@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS]; static void ppro_fill_in_addresses(struct op_msrs * const msrs) { - msrs->counters[0].addr = MSR_P6_PERFCTR0; - msrs->counters[1].addr = MSR_P6_PERFCTR1; + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } - msrs->controls[0].addr = MSR_P6_EVNTSEL0; - msrs->controls[1].addr = MSR_P6_EVNTSEL1; + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } } @@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); CTRL_WRITE(low, high, msrs, i); @@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); @@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT(low, counter_config[i].event); CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; } } } @@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs, int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); @@ -118,18 +138,44 @@ static int ppro_check_ctrs(struct pt_regs * const regs, static void ppro_start(struct op_msrs const * const msrs) { unsigned int low,high; - CTRL_READ(low, high, msrs, 0); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + int i; + + for (i = 0; i < NUM_COUNTERS; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } } static void ppro_stop(struct op_msrs const * const msrs) { unsigned int low,high; - CTRL_READ(low, high, msrs, 0); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + int i; + + for (i = 0; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } +} + +static void ppro_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_P6_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); + } } @@ -140,5 +186,6 @@ struct op_x86_model_spec const op_ppro_spec = { .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, .start = &ppro_start, - .stop = &ppro_stop + .stop = &ppro_stop, + .shutdown = &ppro_shutdown }; diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h index 123b7e90a9ee..abb1aa95b979 100644 --- a/arch/i386/oprofile/op_x86_model.h +++ b/arch/i386/oprofile/op_x86_model.h @@ -40,6 +40,7 @@ struct op_x86_model_spec { struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index 62ad75c57e6a..1594d2f55c8f 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile @@ -11,4 +11,4 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o -obj-y += $(pci-y) common.o +obj-y += $(pci-y) common.o early.o diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 0a362e3aeac5..68bce194e688 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -242,6 +242,10 @@ char * __devinit pcibios_setup(char *str) acpi_noirq_set(); return NULL; } + else if (!strcmp(str, "noearly")) { + pci_probe |= PCI_PROBE_NOEARLY; + return NULL; + } #ifndef CONFIG_X86_VISWS else if (!strcmp(str, "usepirqmask")) { pci_probe |= PCI_USE_PIRQ_MASK; diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 5d81fb510375..5acf0b4743cf 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -254,7 +254,16 @@ static int __init pci_check_type2(void) return works; } -void __init pci_direct_init(void) +void __init pci_direct_init(int type) +{ + printk(KERN_INFO "PCI: Using configuration type %d\n", type); + if (type == 1) + raw_pci_ops = &pci_direct_conf1; + else + raw_pci_ops = &pci_direct_conf2; +} + +int __init pci_direct_probe(void) { struct resource *region, *region2; @@ -264,19 +273,16 @@ void __init pci_direct_init(void) if (!region) goto type2; - if (pci_check_type1()) { - printk(KERN_INFO "PCI: Using configuration type 1\n"); - raw_pci_ops = &pci_direct_conf1; - return; - } + if (pci_check_type1()) + return 1; release_resource(region); type2: if ((pci_probe & PCI_PROBE_CONF2) == 0) - return; + return 0; region = request_region(0xCF8, 4, "PCI conf2"); if (!region) - return; + return 0; region2 = request_region(0xC000, 0x1000, "PCI conf2"); if (!region2) goto fail2; @@ -284,10 +290,11 @@ void __init pci_direct_init(void) if (pci_check_type2()) { printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; - return; + return 2; } release_resource(region2); fail2: release_resource(region); + return 0; } diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c new file mode 100644 index 000000000000..713d6c866cae --- /dev/null +++ b/arch/i386/pci/early.c @@ -0,0 +1,52 @@ +#include <linux/kernel.h> +#include <linux/pci.h> +#include <asm/pci-direct.h> +#include <asm/io.h> +#include "pci.h" + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +#define PDprintk(x...) + +u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + if (v != 0xffffffff) + PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); + return v; +} + +u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) +{ + u8 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inb(0xcfc + (offset&3)); + PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); + return v; +} + +u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) +{ + u16 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inw(0xcfc + (offset&2)); + PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); + return v; +} + +void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, + u32 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outl(val, 0xcfc); +} + +int early_pci_allowed(void) +{ + return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == + PCI_PROBE_CONF1; +} diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 83c3645ccc43..b60d7e8689ed 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -393,7 +393,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); * We pretend to bring them out of full D3 state, and restore the proper * IRQ, PCI cache line size, and BARs, otherwise the device won't function * properly. In some cases, the device will generate an interrupt on - * the wrong IRQ line, causing any devices sharing the the line it's + * the wrong IRQ line, causing any devices sharing the line it's * *supposed* to use to be disabled by the kernel's IRQ debug code. */ static u16 toshiba_line_size; diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c index 51087a9d9172..d028e1b05c36 100644 --- a/arch/i386/pci/init.c +++ b/arch/i386/pci/init.c @@ -6,8 +6,13 @@ in the right sequence from here. */ static __init int pci_access_init(void) { + int type = 0; + +#ifdef CONFIG_PCI_DIRECT + type = pci_direct_probe(); +#endif #ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(); + pci_mmcfg_init(type); #endif if (raw_pci_ops) return 0; @@ -21,7 +26,7 @@ static __init int pci_access_init(void) * fails. */ #ifdef CONFIG_PCI_DIRECT - pci_direct_init(); + pci_direct_init(type); #endif return 0; } diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 4a8995c9c762..47f02af74be3 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -981,10 +981,6 @@ static void __init pcibios_fixup_irqs(void) pci_name(bridge), 'A' + pin, irq); } if (irq >= 0) { - if (use_pci_vector() && - !platform_legacy_irq(irq)) - irq = IO_APIC_VECTOR(irq); - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", pci_name(dev), 'A' + pin, irq); dev->irq = irq; @@ -1169,33 +1165,3 @@ static int pirq_enable_irq(struct pci_dev *dev) } return 0; } - -int pci_vector_resources(int last, int nr_released) -{ - int count = nr_released; - - int next = last; - int offset = (last % 8); - - while (next < FIRST_SYSTEM_VECTOR) { - next += 8; -#ifdef CONFIG_X86_64 - if (next == IA32_SYSCALL_VECTOR) - continue; -#else - if (next == SYSCALL_VECTOR) - continue; -#endif - count++; - if (next >= FIRST_SYSTEM_VECTOR) { - if (offset%8) { - next = FIRST_DEVICE_VECTOR + offset; - offset++; - continue; - } - count--; - } - } - - return count; -} diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 972180f738d9..d0c3da3aa2aa 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -67,7 +67,10 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) return 0; } -static inline void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) +/* + * This is always called under pci_config_lock + */ +static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { u32 dev_base = base | (bus << 20) | (devfn << 12); if (dev_base != mmcfg_last_accessed_device) { @@ -151,6 +154,38 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; + +static __init void pci_mmcfg_insert_resources(void) +{ +#define PCI_MMCFG_RESOURCE_NAME_LEN 19 + int i; + struct resource *res; + char *names; + unsigned num_buses; + + res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), + pci_mmcfg_config_num, GFP_KERNEL); + + if (!res) { + printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); + return; + } + + names = (void *)&res[pci_mmcfg_config_num]; + for (i = 0; i < pci_mmcfg_config_num; i++, res++) { + num_buses = pci_mmcfg_config[i].end_bus_number - + pci_mmcfg_config[i].start_bus_number + 1; + res->name = names; + snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", + pci_mmcfg_config[i].pci_segment_group_number); + res->start = pci_mmcfg_config[i].base_address; + res->end = res->start + (num_buses << 20) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + names += PCI_MMCFG_RESOURCE_NAME_LEN; + } +} + /* K8 systems have some devices (typically in the builtin northbridge) that are only accessible using type1 Normally this can be expressed in the MCFG by not listing them @@ -187,7 +222,9 @@ static __init void unreachable_devices(void) } } -void __init pci_mmcfg_init(void) + + +void __init pci_mmcfg_init(int type) { if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; @@ -198,7 +235,9 @@ void __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return; - if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + /* Only do this check when type 1 works. If it doesn't work + assume we run on a Mac and always use MCFG */ + if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address, pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, E820_RESERVED)) { printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", @@ -212,4 +251,5 @@ void __init pci_mmcfg_init(void) pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; unreachable_devices(); + pci_mmcfg_insert_resources(); } diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index bf4e79335388..1814f74569c6 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -17,6 +17,7 @@ #define PCI_PROBE_CONF2 0x0004 #define PCI_PROBE_MMCONF 0x0008 #define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 @@ -81,7 +82,9 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus, extern int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value); -extern void pci_direct_init(void); +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(void); +extern void pci_mmcfg_init(int type); extern void pcibios_sort(void); + |