diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-02-26 13:02:23 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-26 13:02:23 +0100 |
commit | 8e818179eb9e8f9e44d8410dd2a25077d026a08e (patch) | |
tree | 7d08afd30c95c04129c20693d974a18799caeb5a /arch/x86 | |
parent | perfcounters/powerpc: Add support for POWER5 processors (diff) | |
parent | Merge branches 'x86/apic', 'x86/defconfig', 'x86/memtest', 'x86/mm' and 'linu... (diff) | |
download | linux-8e818179eb9e8f9e44d8410dd2a25077d026a08e.tar.xz linux-8e818179eb9e8f9e44d8410dd2a25077d026a08e.zip |
Merge branch 'x86/core' into perfcounters/core
Conflicts:
arch/x86/kernel/apic/apic.c
arch/x86/kernel/irqinit_32.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
121 files changed, 1167 insertions, 5447 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8780ffde6eff..2535427020b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -285,6 +285,7 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs +if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" default y @@ -293,12 +294,36 @@ config X86_EXTENDED_PLATFORM standard PC platforms. (which covers the vast majority of systems out there.) - If you enable this option then you'll be able to select a number - of non-PC x86 platforms. + If you enable this option then you'll be able to select support + for the following (non-PC) 32 bit x86 platforms: + AMD Elan + NUMAQ (IBM/Sequent) + RDC R-321x SoC + SGI 320/540 (Visual Workstation) + Summit/EXA (IBM x440) + Unisys ES7000 IA32 series If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. +endif + +if X86_64 +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + ---help--- + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + If you enable this option then you'll be able to select support + for the following (non-PC) 64 bit x86 platforms: + ScaleMP vSMP + SGI Ultraviolet + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif # This is an alphabetically sorted list of 64 bit extended platforms # Please maintain the alphabetic order if and when there are additions @@ -397,19 +422,6 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on SMP && !PCI && BROKEN - depends on X86_32_NON_STANDARD - ---help--- - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ba4781b93890..fdb45df608b6 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -175,28 +175,8 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE - bool "Memory mapped IO tracing" - depends on DEBUG_KERNEL && PCI - select TRACING - help - Mmiotrace traces Memory Mapped I/O access and is meant for - debugging and reverse engineering. It is called from the ioremap - implementation and works via page faults. Tracing is disabled by - default and can be enabled at run-time. - - See Documentation/tracers/mmiotrace.txt. - If you are not helping to develop drivers, say N. - -config MMIOTRACE_TEST - tristate "Test module for mmiotrace" - depends on MMIOTRACE && m - help - This is a dumb module for testing mmiotrace. It is very dangerous - as it will write garbage to IO memory starting at a given address. - However, it should be safe to use on e.g. unused portion of VRAM. - - Say N, unless you absolutely know what you are doing. +config HAVE_MMIOTRACE_SUPPORT + def_bool y # # IO delay types: diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index cd48c7210016..c70eff69a1fb 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -32,7 +32,6 @@ setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o setup-y += printf.o string.o tty.o video.o video-mode.o version.o setup-$(CONFIG_X86_APM_BOOT) += apm.o -setup-$(CONFIG_X86_VOYAGER) += voyager.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index fba8e9c6a504..7c19ce8c2442 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -126,11 +126,6 @@ static void enable_a20_fast(void) int enable_a20(void) { -#ifdef CONFIG_X86_VOYAGER - /* On Voyager, a20_test() is unsafe? */ - enable_a20_kbc(); - return 0; -#else int loops = A20_ENABLE_LOOPS; int kbc_err; @@ -164,5 +159,4 @@ int enable_a20(void) } return -1; -#endif } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index cc0ef13fba7a..7b2692e897e5 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -302,9 +302,6 @@ void probe_cards(int unsafe); /* video-vesa.c */ void vesa_store_edid(void); -/* voyager.c */ -int query_voyager(void); - #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 29c5fbf08392..3a8a866fb2e2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -25,14 +25,12 @@ #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/boot.h> #include <asm/asm-offsets.h> .section ".text.head","ax",@progbits - .globl startup_32 - -startup_32: +ENTRY(startup_32) cld /* test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ @@ -113,6 +111,8 @@ startup_32: */ leal relocated(%ebx), %eax jmp *%eax +ENDPROC(startup_32) + .section ".text" relocated: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1d5dff4123e1..ed4a82948002 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -26,8 +26,8 @@ #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/pgtable.h> -#include <asm/page.h> +#include <asm/pgtable_types.h> +#include <asm/page_types.h> #include <asm/boot.h> #include <asm/msr.h> #include <asm/processor-flags.h> @@ -35,9 +35,7 @@ .section ".text.head" .code32 - .globl startup_32 - -startup_32: +ENTRY(startup_32) cld /* test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ @@ -176,6 +174,7 @@ startup_32: /* Jump from 32bit compatibility mode into 64bit mode. */ lret +ENDPROC(startup_32) no_longmode: /* This isn't an x86-64 CPU so hang */ @@ -295,7 +294,6 @@ relocated: call decompress_kernel popq %rsi - /* * Jump to the decompressed kernel. */ diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef50c84e8b4b..11f272c6f5e9 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -8,6 +8,8 @@ * * ----------------------------------------------------------------------- */ +#include <linux/linkage.h> + /* * Memory copy routines */ @@ -15,9 +17,7 @@ .code16gcc .text - .globl memcpy - .type memcpy, @function -memcpy: +GLOBAL(memcpy) pushw %si pushw %di movw %ax, %di @@ -31,11 +31,9 @@ memcpy: popw %di popw %si ret - .size memcpy, .-memcpy +ENDPROC(memcpy) - .globl memset - .type memset, @function -memset: +GLOBAL(memset) pushw %di movw %ax, %di movzbl %dl, %eax @@ -48,52 +46,42 @@ memset: rep; stosb popw %di ret - .size memset, .-memset +ENDPROC(memset) - .globl copy_from_fs - .type copy_from_fs, @function -copy_from_fs: +GLOBAL(copy_from_fs) pushw %ds pushw %fs popw %ds call memcpy popw %ds ret - .size copy_from_fs, .-copy_from_fs +ENDPROC(copy_from_fs) - .globl copy_to_fs - .type copy_to_fs, @function -copy_to_fs: +GLOBAL(copy_to_fs) pushw %es pushw %fs popw %es call memcpy popw %es ret - .size copy_to_fs, .-copy_to_fs +ENDPROC(copy_to_fs) #if 0 /* Not currently used, but can be enabled as needed */ - - .globl copy_from_gs - .type copy_from_gs, @function -copy_from_gs: +GLOBAL(copy_from_gs) pushw %ds pushw %gs popw %ds call memcpy popw %ds ret - .size copy_from_gs, .-copy_from_gs - .globl copy_to_gs +ENDPROC(copy_from_gs) - .type copy_to_gs, @function -copy_to_gs: +GLOBAL(copy_to_gs) pushw %es pushw %gs popw %es call memcpy popw %es ret - .size copy_to_gs, .-copy_to_gs - +ENDPROC(copy_to_gs) #endif diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b993062e9a5f..7ccff4884a23 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -19,7 +19,7 @@ #include <linux/utsrelease.h> #include <asm/boot.h> #include <asm/e820.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/setup.h> #include "boot.h" #include "offsets.h" diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 197421db1af1..58f0415d3ae0 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -149,11 +149,6 @@ void main(void) /* Query MCA information */ query_mca(); - /* Voyager */ -#ifdef CONFIG_X86_VOYAGER - query_voyager(); -#endif - /* Query Intel SpeedStep (IST) information */ query_ist(); diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index 141b6e20ed31..019c17a75851 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -15,18 +15,15 @@ #include <asm/boot.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <linux/linkage.h> .text - - .globl protected_mode_jump - .type protected_mode_jump, @function - .code16 /* * void protected_mode_jump(u32 entrypoint, u32 bootparams); */ -protected_mode_jump: +GLOBAL(protected_mode_jump) movl %edx, %esi # Pointer to boot_params table xorl %ebx, %ebx @@ -47,12 +44,10 @@ protected_mode_jump: .byte 0x66, 0xea # ljmpl opcode 2: .long in_pm32 # offset .word __BOOT_CS # segment - - .size protected_mode_jump, .-protected_mode_jump +ENDPROC(protected_mode_jump) .code32 - .type in_pm32, @function -in_pm32: +GLOBAL(in_pm32) # Set up data segments for flat 32-bit mode movl %ecx, %ds movl %ecx, %es @@ -78,5 +73,4 @@ in_pm32: lldt %cx jmpl *%eax # Jump to the 32-bit entrypoint - - .size in_pm32, .-in_pm32 +ENDPROC(in_pm32) diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c deleted file mode 100644 index 433909d61e5c..000000000000 --- a/arch/x86/boot/voyager.c +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * Get the Voyager config information - */ - -#include "boot.h" - -int query_voyager(void) -{ - u8 err; - u16 es, di; - /* Abuse the apm_bios_info area for this */ - u8 *data_ptr = (u8 *)&boot_params.apm_bios_info; - - data_ptr[0] = 0xff; /* Flag on config not found(?) */ - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=q" (err), "=r" (es), "=D" (di) - : "a" (0xffc0)); - - if (err) - return -1; /* Not Voyager */ - - set_fs(es); - copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */ - return 0; -} diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 096dd5359cd9..235b81d0f6f2 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:57:57 2009 +# Tue Feb 24 15:50:58 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -197,7 +197,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set @@ -267,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -# CONFIG_X86_MCE is not set +CONFIG_X86_MCE=y +CONFIG_X86_MCE_NONFATAL=y +CONFIG_X86_MCE_P4THERMAL=y CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2efb5d5063ff..9fe5d212ab4c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:57:29 2009 +# Tue Feb 24 15:44:16 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -199,7 +199,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y @@ -267,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -# CONFIG_X86_MCE is not set +CONFIG_X86_MCE=y +CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y # CONFIG_I8K is not set CONFIG_MICROCODE=y CONFIG_MICROCODE_INTEL=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index dd77ac0cac46..588a7aa937e1 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -33,8 +33,6 @@ #include <asm/sigframe.h> #include <asm/sys_ia32.h> -#define DEBUG_SIG 0 - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ @@ -190,42 +188,47 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, /* * Do a signal return; undo the signal stack. */ +#define loadsegment_gs(v) load_gs_index(v) +#define loadsegment_fs(v) loadsegment(fs, v) +#define loadsegment_ds(v) loadsegment(ds, v) +#define loadsegment_es(v) loadsegment(es, v) + +#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; }) +#define set_user_seg(seg, v) loadsegment_##seg(v) + #define COPY(x) { \ get_user_ex(regs->x, &sc->x); \ } -#define COPY_SEG_CPL3(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp | 3; \ -} +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) + +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) #define RELOAD_SEG(seg) { \ - unsigned int cur, pre; \ - get_user_ex(pre, &sc->seg); \ - savesegment(seg, cur); \ + unsigned int pre = GET_SEG(seg); \ + unsigned int cur = get_user_seg(seg); \ pre |= 3; \ if (pre != cur) \ - loadsegment(seg, pre); \ + set_user_seg(seg, pre); \ } static int ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *pax) { - unsigned int tmpflags, gs, oldgs, err = 0; + unsigned int tmpflags, err = 0; void __user *buf; u32 tmp; /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG restore_sigcontext: " - "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", - sc, sc->err, sc->ip, sc->cs, sc->flags); -#endif - get_user_try { /* * Reload fs and gs if they have changed in the signal @@ -233,12 +236,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, * the handler, but does not clobber them at least in the * normal case. */ - get_user_ex(gs, &sc->gs); - gs |= 3; - savesegment(gs, oldgs); - if (gs != oldgs) - load_gs_index(gs); - + RELOAD_SEG(gs); RELOAD_SEG(fs); RELOAD_SEG(ds); RELOAD_SEG(es); @@ -337,17 +335,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned int mask) { - int tmp, err = 0; + int err = 0; put_user_try { - savesegment(gs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->fs); - savesegment(ds, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->ds); - savesegment(es, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->es); + put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs); + put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs); + put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds); + put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es); put_user_ex(regs->di, &sc->di); put_user_ex(regs->si, &sc->si); @@ -488,11 +482,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->cs = __USER32_CS; regs->ss = __USER32_DS; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; } @@ -574,10 +563,5 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER32_CS; regs->ss = __USER32_DS; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index dce1bf696cca..a6208dc74633 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -146,7 +146,7 @@ static inline u64 native_x2apic_icr_read(void) return val; } -extern int x2apic; +extern int x2apic, x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h deleted file mode 100644 index cbd4957838a6..000000000000 --- a/arch/x86/include/asm/arch_hooks.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _ASM_X86_ARCH_HOOKS_H -#define _ASM_X86_ARCH_HOOKS_H - -#include <linux/interrupt.h> - -/* - * linux/include/asm/arch_hooks.h - * - * define the architecture specific hooks - */ - -/* these aren't arch hooks, they are generic routines - * that can be used by the hooks */ -extern void init_ISA_irqs(void); -extern irqreturn_t timer_interrupt(int irq, void *dev_id); - -/* these are the defined hooks */ -extern void intr_init_hook(void); -extern void pre_intr_init_hook(void); -extern void pre_setup_arch_hook(void); -extern void trap_init_hook(void); -extern void pre_time_init_hook(void); -extern void time_init_hook(void); -extern void mca_nmi_hook(void); - -#endif /* _ASM_X86_ARCH_HOOKS_H */ diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1f..1a99e6c092af 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -60,4 +60,8 @@ extern struct irq_chip i8259A_chip; extern void mask_8259A(void); extern void unmask_8259A(void); +#ifdef CONFIG_X86_32 +extern void init_ISA_irqs(void); +#endif + #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4f8e820cf38f..683d0b4c00fc 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -124,10 +124,15 @@ static inline void *phys_to_virt(phys_addr_t address) /* * ISA I/O bus memory addresses are 1:1 with the physical address. + * However, we truncate the address to unsigned int to avoid undesirable + * promitions in legacy drivers. */ -#define isa_virt_to_bus (unsigned long)virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt +static inline unsigned int isa_virt_to_bus(volatile void *address) +{ + return (unsigned int)virt_to_phys(address); +} +#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page)) +#define isa_bus_to_virt phys_to_virt /* * However PCI ones are not necessarily 1:1 and therefore these interfaces diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index c1f06289b14b..bd46495ff7de 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -23,6 +23,12 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> +int +reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot); + +void +free_io_memtype(u64 base, unsigned long size); + void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index b07278c55e9e..8a285f356f8a 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -128,7 +128,7 @@ #ifndef __ASSEMBLY__ static inline int invalid_vm86_irq(int irq) { - return irq < 3 || irq > 15; + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; } #endif diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608af..886c9402ec45 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include <linux/types.h> #include <linux/ioctl.h> +/* Select x86 specific features in <linux/kvm.h> */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5d98d0b68ffc..9320e2a8a26a 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -52,70 +52,14 @@ #endif +#define GLOBAL(name) \ + .globl name; \ + name: + #ifdef CONFIG_X86_ALIGNMENT_16 #define __ALIGN .align 16,0x90 #define __ALIGN_STR ".align 16,0x90" #endif -/* - * to check ENTRY_X86/END_X86 and - * KPROBE_ENTRY_X86/KPROBE_END_X86 - * unbalanced-missed-mixed appearance - */ -#define __set_entry_x86 .set ENTRY_X86_IN, 0 -#define __unset_entry_x86 .set ENTRY_X86_IN, 1 -#define __set_kprobe_x86 .set KPROBE_X86_IN, 0 -#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 - -#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" - -#define __check_entry_x86 \ - .ifdef ENTRY_X86_IN; \ - .ifeq ENTRY_X86_IN; \ - __macro_err_x86; \ - .abort; \ - .endif; \ - .endif - -#define __check_kprobe_x86 \ - .ifdef KPROBE_X86_IN; \ - .ifeq KPROBE_X86_IN; \ - __macro_err_x86; \ - .abort; \ - .endif; \ - .endif - -#define __check_entry_kprobe_x86 \ - __check_entry_x86; \ - __check_kprobe_x86 - -#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 - -#define ENTRY_X86(name) \ - __check_entry_kprobe_x86; \ - __set_entry_x86; \ - .globl name; \ - __ALIGN; \ - name: - -#define END_X86(name) \ - __unset_entry_x86; \ - __check_entry_kprobe_x86; \ - .size name, .-name - -#define KPROBE_ENTRY_X86(name) \ - __check_entry_kprobe_x86; \ - __set_kprobe_x86; \ - .pushsection .kprobes.text, "ax"; \ - .globl name; \ - __ALIGN; \ - name: - -#define KPROBE_END_X86(name) \ - __unset_kprobe_x86; \ - __check_entry_kprobe_x86; \ - .size name, .-name; \ - .popsection - #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h deleted file mode 100644 index 9e5a459fd15b..000000000000 --- a/arch/x86/include/asm/mach-voyager/do_timer.h +++ /dev/null @@ -1,17 +0,0 @@ -/* defines for inline arch setup functions */ -#include <linux/clockchips.h> - -#include <asm/voyager.h> -#include <asm/i8253.h> - -/** - * do_timer_interrupt_hook - hook into timer tick - * - * Call the pit clock event handler. see asm/i8253.h - **/ -static inline void do_timer_interrupt_hook(void) -{ - global_clock_event->event_handler(global_clock_event); - voyager_timer_interrupt(); -} - diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h deleted file mode 100644 index ae52624b5937..000000000000 --- a/arch/x86/include/asm/mach-voyager/entry_arch.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: James.Bottomley@HansenPartnership.com - * - * linux/arch/i386/voyager/entry_arch.h - * - * This file builds the VIC and QIC CPI gates - */ - -/* initialise the voyager interrupt gates - * - * This uses the macros in irq.h to set up assembly jump gates. The - * calls are then redirected to the same routine with smp_ prefixed */ -BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT) -BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT) -BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0); - -/* do all the QIC interrupts */ -BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI); -BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI); -BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI); -BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI); -BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI); -BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI); diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h deleted file mode 100644 index 71729ca05cd7..000000000000 --- a/arch/x86/include/asm/mach-voyager/setup_arch.h +++ /dev/null @@ -1,12 +0,0 @@ -#include <asm/voyager.h> -#include <asm/setup.h> -#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \ - (&boot_params.apm_bios_info)) - -/* Hook to call BIOS initialisation function */ - -/* for voyager, pass the voyager BIOS/SUS info area to the detection - * routines */ - -#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO); - diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca5..105fb90a0635 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9e..a29f48c2a322 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index b5486aaf36ec..f1e4a79a6e41 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -33,12 +33,10 @@ /* 44=32+12, the limit we can fit into an unsigned long pfn */ #define __PHYSICAL_MASK_SHIFT 44 #define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 3 #else /* !CONFIG_X86_PAE */ #define __PHYSICAL_MASK_SHIFT 32 #define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 2 #endif /* CONFIG_X86_PAE */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index bc73af3eda9c..d38c91b70248 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define PAGETABLE_LEVELS 4 - #define THREAD_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 2c52ff767584..2d625da6603c 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -16,12 +16,6 @@ (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ -#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) - -/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ -#define PTE_FLAGS_MASK (~PTE_PFN_MASK) - #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 9709fdff6615..b0e70056838e 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -15,4 +15,7 @@ extern int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type); extern int free_memtype(u64 start, u64 end); +extern int kernel_map_sync_memtype(u64 base, unsigned long size, + unsigned long flag); + #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 09ae67efcebd..daacc23e3fb9 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -17,6 +17,7 @@ typedef union { #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 +#define PAGETABLE_LEVELS 2 /* * traditional i386 two-level paging structure: @@ -25,6 +26,7 @@ typedef union { #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 + /* * the i386 is two-level, so we don't really have any * PMD directory physically. diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index bcc89625ebe5..1bd5876c8649 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -24,6 +24,8 @@ typedef union { #define SHARED_KERNEL_PMD 1 #endif +#define PAGETABLE_LEVELS 3 + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2f59135c6f2a..fbf42b8e0383 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -18,6 +18,7 @@ typedef struct { pteval_t pte; } pte_t; #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 +#define PAGETABLE_LEVELS 4 /* * PGDIR_SHIFT determines what a top-level page table entry can map diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9dafe87be2de..4d258ad76a0f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -173,6 +173,12 @@ #include <linux/types.h> +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) + typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dabab1a19ddd..c7a98f738210 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); #endif #endif /* X86_64 */ -extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; @@ -862,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x) * User space process size: 3GB (default). */ #define TASK_SIZE PAGE_OFFSET +#define TASK_SIZE_MAX TASK_SIZE #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP @@ -921,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. @@ -930,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); 0xc0000000 : 0xFFFFe000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX TASK_SIZE64 +#define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 8029369cd6f4..66801cb72f69 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -13,6 +13,7 @@ struct mpc_cpu; struct mpc_bus; struct mpc_oemtable; + struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); @@ -33,6 +34,14 @@ struct x86_quirks { int (*update_apic)(void); }; +extern void x86_quirk_pre_intr_init(void); +extern void x86_quirk_intr_init(void); + +extern void x86_quirk_trap_init(void); + +extern void x86_quirk_pre_time_init(void); +extern void x86_quirk_time_init(void); + #endif /* __ASSEMBLY__ */ #ifdef __i386__ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 258ef730aaa4..7043408f6904 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -82,7 +82,7 @@ asmlinkage long sys_iopl(unsigned int, struct pt_regs *); /* kernel/signal_64.c */ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); -asmlinkage long sys_rt_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 2bb6a835c453..a81195eaa2b3 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/pm.h> #include <linux/percpu.h> +#include <linux/interrupt.h> #define TICK_SIZE (tick_nsec / 1000) @@ -12,6 +13,7 @@ unsigned long native_calibrate_tsc(void); #ifdef CONFIG_X86_32 extern int timer_ack; extern int recalibrate_cpu_khz(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id); #endif /* CONFIG_X86_32 */ extern int no_timer_check; diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 5e06259e90e5..a0ba61386972 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) } static __always_inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { might_fault(); if (__builtin_constant_p(n)) { @@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, - unsigned long n) + unsigned long n, unsigned long total) { return __copy_from_user_ll_nocache_nozero(to, from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 84210c479fca..dcaa0404cf7b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -189,17 +189,28 @@ extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); static inline int __copy_from_user_nocache(void *dst, const void __user *src, - unsigned size) + unsigned size, unsigned long total) { might_sleep(); - return __copy_user_nocache(dst, src, size, 1); + /* + * In practice this limit means that large file write()s + * which get chunked to 4K copies get handled via + * non-temporal stores here. Smaller writes get handled + * via regular __copy_from_user(): + */ + if (likely(total >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 1); + else + return __copy_from_user(dst, src, size); } static inline int __copy_from_user_inatomic_nocache(void *dst, - const void __user *src, - unsigned size) + const void __user *src, unsigned size, unsigned total) { - return __copy_user_nocache(dst, src, size, 0); + if (likely(total >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 0); + else + return __copy_from_user_inatomic(dst, src, size); } unsigned long diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h deleted file mode 100644 index 53100f353612..000000000000 --- a/arch/x86/include/asm/vic.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager Interrupt Controller */ - -/* The eight CPI vectors. To activate a CPI, you write a bit mask - * corresponding to the processor set to be interrupted into the - * relevant register. That set of CPUs will then be interrupted with - * the CPI */ -static const int VIC_CPI_Registers[] = - {0xFC00, 0xFC01, 0xFC08, 0xFC09, - 0xFC10, 0xFC11, 0xFC18, 0xFC19 }; - -#define VIC_PROC_WHO_AM_I 0xfc29 -# define QUAD_IDENTIFIER 0xC0 -# define EIGHT_SLOT_IDENTIFIER 0xE0 -#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72 -#define VIC_CPI_BASE_REGISTER 0xFC41 -#define VIC_PROCESSOR_ID 0xFC21 -# define VIC_CPU_MASQUERADE_ENABLE 0x8 - -#define VIC_CLAIM_REGISTER_0 0xFC38 -#define VIC_CLAIM_REGISTER_1 0xFC39 -#define VIC_REDIRECT_REGISTER_0 0xFC60 -#define VIC_REDIRECT_REGISTER_1 0xFC61 -#define VIC_PRIORITY_REGISTER 0xFC20 - -#define VIC_PRIMARY_MC_BASE 0xFC48 -#define VIC_SECONDARY_MC_BASE 0xFC49 - -#define QIC_PROCESSOR_ID 0xFC71 -# define QIC_CPUID_ENABLE 0x08 - -#define QIC_VIC_CPI_BASE_REGISTER 0xFC79 -#define QIC_CPI_BASE_REGISTER 0xFC7A - -#define QIC_MASK_REGISTER0 0xFC80 -/* NOTE: these are masked high, enabled low */ -# define QIC_PERF_TIMER 0x01 -# define QIC_LPE 0x02 -# define QIC_SYS_INT 0x04 -# define QIC_CMN_INT 0x08 -/* at the moment, just enable CMN_INT, disable SYS_INT */ -# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */)) -#define QIC_MASK_REGISTER1 0xFC81 -# define QIC_BOOT_CPI_MASK 0xFE -/* Enable CPI's 1-6 inclusive */ -# define QIC_CPI_ENABLE 0x81 - -#define QIC_INTERRUPT_CLEAR0 0xFC8A -#define QIC_INTERRUPT_CLEAR1 0xFC8B - -/* this is where we place the CPI vectors */ -#define VIC_DEFAULT_CPI_BASE 0xC0 -/* this is where we place the QIC CPI vectors */ -#define QIC_DEFAULT_CPI_BASE 0xD0 - -#define VIC_BOOT_INTERRUPT_MASK 0xfe - -extern void smp_vic_timer_interrupt(void); diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h deleted file mode 100644 index c1635d43616f..000000000000 --- a/arch/x86/include/asm/voyager.h +++ /dev/null @@ -1,571 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager system */ - -#undef VOYAGER_DEBUG -#undef VOYAGER_CAT_DEBUG - -#ifdef VOYAGER_DEBUG -#define VDEBUG(x) printk x -#else -#define VDEBUG(x) -#endif - -/* There are three levels of voyager machine: 3,4 and 5. The rule is - * if it's less than 3435 it's a Level 3 except for a 3360 which is - * a level 4. A 3435 or above is a Level 5 */ -#define VOYAGER_LEVEL5_AND_ABOVE 0x3435 -#define VOYAGER_LEVEL4 0x3360 - -/* The L4 DINO ASIC */ -#define VOYAGER_DINO 0x43 - -/* voyager ports in standard I/O space */ -#define VOYAGER_MC_SETUP 0x96 - - -#define VOYAGER_CAT_CONFIG_PORT 0x97 -# define VOYAGER_CAT_DESELECT 0xff -#define VOYAGER_SSPB_RELOCATION_PORT 0x98 - -/* Valid CAT controller commands */ -/* start instruction register cycle */ -#define VOYAGER_CAT_IRCYC 0x01 -/* start data register cycle */ -#define VOYAGER_CAT_DRCYC 0x02 -/* move to execute state */ -#define VOYAGER_CAT_RUN 0x0F -/* end operation */ -#define VOYAGER_CAT_END 0x80 -/* hold in idle state */ -#define VOYAGER_CAT_HOLD 0x90 -/* single step an "intest" vector */ -#define VOYAGER_CAT_STEP 0xE0 -/* return cat controller to CLEMSON mode */ -#define VOYAGER_CAT_CLEMSON 0xFF - -/* the default cat command header */ -#define VOYAGER_CAT_HEADER 0x7F - -/* the range of possible CAT module ids in the system */ -#define VOYAGER_MIN_MODULE 0x10 -#define VOYAGER_MAX_MODULE 0x1f - -/* The voyager registers per asic */ -#define VOYAGER_ASIC_ID_REG 0x00 -#define VOYAGER_ASIC_TYPE_REG 0x01 -/* the sub address registers can be made auto incrementing on reads */ -#define VOYAGER_AUTO_INC_REG 0x02 -# define VOYAGER_AUTO_INC 0x04 -# define VOYAGER_NO_AUTO_INC 0xfb -#define VOYAGER_SUBADDRDATA 0x03 -#define VOYAGER_SCANPATH 0x05 -# define VOYAGER_CONNECT_ASIC 0x01 -# define VOYAGER_DISCONNECT_ASIC 0xfe -#define VOYAGER_SUBADDRLO 0x06 -#define VOYAGER_SUBADDRHI 0x07 -#define VOYAGER_SUBMODSELECT 0x08 -#define VOYAGER_SUBMODPRESENT 0x09 - -#define VOYAGER_SUBADDR_LO 0xff -#define VOYAGER_SUBADDR_HI 0xffff - -/* the maximum size of a scan path -- used to form instructions */ -#define VOYAGER_MAX_SCAN_PATH 0x100 -/* the biggest possible register size (in bytes) */ -#define VOYAGER_MAX_REG_SIZE 4 - -/* Total number of possible modules (including submodules) */ -#define VOYAGER_MAX_MODULES 16 -/* Largest number of asics per module */ -#define VOYAGER_MAX_ASICS_PER_MODULE 7 - -/* the CAT asic of each module is always the first one */ -#define VOYAGER_CAT_ID 0 -#define VOYAGER_PSI 0x1a - -/* voyager instruction operations and registers */ -#define VOYAGER_READ_CONFIG 0x1 -#define VOYAGER_WRITE_CONFIG 0x2 -#define VOYAGER_BYPASS 0xff - -typedef struct voyager_asic { - __u8 asic_addr; /* ASIC address; Level 4 */ - __u8 asic_type; /* ASIC type */ - __u8 asic_id; /* ASIC id */ - __u8 jtag_id[4]; /* JTAG id */ - __u8 asic_location; /* Location within scan path; start w/ 0 */ - __u8 bit_location; /* Location within bit stream; start w/ 0 */ - __u8 ireg_length; /* Instruction register length */ - __u16 subaddr; /* Amount of sub address space */ - struct voyager_asic *next; /* Next asic in linked list */ -} voyager_asic_t; - -typedef struct voyager_module { - __u8 module_addr; /* Module address */ - __u8 scan_path_connected; /* Scan path connected */ - __u16 ee_size; /* Size of the EEPROM */ - __u16 num_asics; /* Number of Asics */ - __u16 inst_bits; /* Instruction bits in the scan path */ - __u16 largest_reg; /* Largest register in the scan path */ - __u16 smallest_reg; /* Smallest register in the scan path */ - voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */ - struct voyager_module *submodule; /* Submodule pointer */ - struct voyager_module *next; /* Next module in linked list */ -} voyager_module_t; - -typedef struct voyager_eeprom_hdr { - __u8 module_id[4]; - __u8 version_id; - __u8 config_id; - __u16 boundry_id; /* boundary scan id */ - __u16 ee_size; /* size of EEPROM */ - __u8 assembly[11]; /* assembly # */ - __u8 assembly_rev; /* assembly rev */ - __u8 tracer[4]; /* tracer number */ - __u16 assembly_cksum; /* asm checksum */ - __u16 power_consump; /* pwr requirements */ - __u16 num_asics; /* number of asics */ - __u16 bist_time; /* min. bist time */ - __u16 err_log_offset; /* error log offset */ - __u16 scan_path_offset;/* scan path offset */ - __u16 cct_offset; - __u16 log_length; /* length of err log */ - __u16 xsum_end; /* offset to end of - checksum */ - __u8 reserved[4]; - __u8 sflag; /* starting sentinal */ - __u8 part_number[13]; /* prom part number */ - __u8 version[10]; /* version number */ - __u8 signature[8]; - __u16 eeprom_chksum; - __u32 data_stamp_offset; - __u8 eflag ; /* ending sentinal */ -} __attribute__((packed)) voyager_eprom_hdr_t; - - - -#define VOYAGER_EPROM_SIZE_OFFSET \ - ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size))) -#define VOYAGER_XSUM_END_OFFSET 0x2a - -/* the following three definitions are for internal table layouts - * in the module EPROMs. We really only care about the IDs and - * offsets */ -typedef struct voyager_sp_table { - __u8 asic_id; - __u8 bypass_flag; - __u16 asic_data_offset; - __u16 config_data_offset; -} __attribute__((packed)) voyager_sp_table_t; - -typedef struct voyager_jtag_table { - __u8 icode[4]; - __u8 runbist[4]; - __u8 intest[4]; - __u8 samp_preld[4]; - __u8 ireg_len; -} __attribute__((packed)) voyager_jtt_t; - -typedef struct voyager_asic_data_table { - __u8 jtag_id[4]; - __u16 length_bsr; - __u16 length_bist_reg; - __u32 bist_clk; - __u16 subaddr_bits; - __u16 seed_bits; - __u16 sig_bits; - __u16 jtag_offset; -} __attribute__((packed)) voyager_at_t; - -/* Voyager Interrupt Controller (VIC) registers */ - -/* Base to add to Cross Processor Interrupts (CPIs) when triggering - * the CPU IRQ line */ -/* register defines for the WCBICs (one per processor) */ -#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */ -#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */ -#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */ -#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */ -#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */ -#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */ -#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */ -#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */ - - -/* top of memory registers */ -#define VOYAGER_WCBIC_TOM_L 0x4 -#define VOYAGER_WCBIC_TOM_H 0x5 - -/* register defines for Voyager Memory Contol (VMC) - * these are present on L4 machines only */ -#define VOYAGER_VMC1 0x81 -#define VOYAGER_VMC2 0x91 -#define VOYAGER_VMC3 0xa1 -#define VOYAGER_VMC4 0xb1 - -/* VMC Ports */ -#define VOYAGER_VMC_MEMORY_SETUP 0x9 -# define VMC_Interleaving 0x01 -# define VMC_4Way 0x02 -# define VMC_EvenCacheLines 0x04 -# define VMC_HighLine 0x08 -# define VMC_Start0_Enable 0x20 -# define VMC_Start1_Enable 0x40 -# define VMC_Vremap 0x80 -#define VOYAGER_VMC_BANK_DENSITY 0xa -# define VMC_BANK_EMPTY 0 -# define VMC_BANK_4MB 1 -# define VMC_BANK_16MB 2 -# define VMC_BANK_64MB 3 -# define VMC_BANK0_MASK 0x03 -# define VMC_BANK1_MASK 0x0C -# define VMC_BANK2_MASK 0x30 -# define VMC_BANK3_MASK 0xC0 - -/* Magellan Memory Controller (MMC) defines - present on L5 */ -#define VOYAGER_MMC_ASIC_ID 1 -/* the two memory modules corresponding to memory cards in the system */ -#define VOYAGER_MMC_MEMORY0_MODULE 0x14 -#define VOYAGER_MMC_MEMORY1_MODULE 0x15 -/* the Magellan Memory Address (MMA) defines */ -#define VOYAGER_MMA_ASIC_ID 2 - -/* Submodule number for the Quad Baseboard */ -#define VOYAGER_QUAD_BASEBOARD 1 - -/* ASIC defines for the Quad Baseboard */ -#define VOYAGER_QUAD_QDATA0 1 -#define VOYAGER_QUAD_QDATA1 2 -#define VOYAGER_QUAD_QABC 3 - -/* Useful areas in extended CMOS */ -#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a -#define VOYAGER_MEMORY_CLICKMAP 0xa23 -#define VOYAGER_DUMP_LOCATION 0xb1a - -/* SUS In Control bit - used to tell SUS that we don't need to be - * babysat anymore */ -#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff -# define VOYAGER_IN_CONTROL_FLAG 0x80 - -/* Voyager PSI defines */ -#define VOYAGER_PSI_STATUS_REG 0x08 -# define PSI_DC_FAIL 0x01 -# define PSI_MON 0x02 -# define PSI_FAULT 0x04 -# define PSI_ALARM 0x08 -# define PSI_CURRENT 0x10 -# define PSI_DVM 0x20 -# define PSI_PSCFAULT 0x40 -# define PSI_STAT_CHG 0x80 - -#define VOYAGER_PSI_SUPPLY_REG 0x8000 - /* read */ -# define PSI_FAIL_DC 0x01 -# define PSI_FAIL_AC 0x02 -# define PSI_MON_INT 0x04 -# define PSI_SWITCH_OFF 0x08 -# define PSI_HX_OFF 0x10 -# define PSI_SECURITY 0x20 -# define PSI_CMOS_BATT_LOW 0x40 -# define PSI_CMOS_BATT_FAIL 0x80 - /* write */ -# define PSI_CLR_SWITCH_OFF 0x13 -# define PSI_CLR_HX_OFF 0x14 -# define PSI_CLR_CMOS_BATT_FAIL 0x17 - -#define VOYAGER_PSI_MASK 0x8001 -# define PSI_MASK_MASK 0x10 - -#define VOYAGER_PSI_AC_FAIL_REG 0x8004 -#define AC_FAIL_STAT_CHANGE 0x80 - -#define VOYAGER_PSI_GENERAL_REG 0x8007 - /* read */ -# define PSI_SWITCH_ON 0x01 -# define PSI_SWITCH_ENABLED 0x02 -# define PSI_ALARM_ENABLED 0x08 -# define PSI_SECURE_ENABLED 0x10 -# define PSI_COLD_RESET 0x20 -# define PSI_COLD_START 0x80 - /* write */ -# define PSI_POWER_DOWN 0x10 -# define PSI_SWITCH_DISABLE 0x01 -# define PSI_SWITCH_ENABLE 0x11 -# define PSI_CLEAR 0x12 -# define PSI_ALARM_DISABLE 0x03 -# define PSI_ALARM_ENABLE 0x13 -# define PSI_CLEAR_COLD_RESET 0x05 -# define PSI_SET_COLD_RESET 0x15 -# define PSI_CLEAR_COLD_START 0x07 -# define PSI_SET_COLD_START 0x17 - - - -struct voyager_bios_info { - __u8 len; - __u8 major; - __u8 minor; - __u8 debug; - __u8 num_classes; - __u8 class_1; - __u8 class_2; -}; - -/* The following structures and definitions are for the Kernel/SUS - * interface these are needed to find out how SUS initialised any Quad - * boards in the system */ - -#define NUMBER_OF_MC_BUSSES 2 -#define SLOTS_PER_MC_BUS 8 -#define MAX_CPUS 16 /* 16 way CPU system */ -#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */ -#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */ -#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */ -#define NUMBER_OF_POS_REGS 8 - -typedef struct { - __u8 MC_Slot; - __u8 POS_Values[NUMBER_OF_POS_REGS]; -} __attribute__((packed)) MC_SlotInformation_t; - -struct QuadDescription { - __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields - * will be zero except for slot */ - __u8 StructureVersion; - __u32 CPI_BaseAddress; - __u32 LARC_BankSize; - __u32 LocalMemoryStateBits; - __u8 Slot; /* Processor slots 1 - 4 */ -} __attribute__((packed)); - -struct ProcBoardInfo { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOfBoards; - struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS]; -} __attribute__((packed)); - -struct CacheDescription { - __u8 Level; - __u32 TotalSize; - __u16 LineSize; - __u8 Associativity; - __u8 CacheType; - __u8 WriteType; - __u8 Number_CPUs_SharedBy; - __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS]; - -} __attribute__((packed)); - -struct CPU_Description { - __u8 CPU_HardwareId; - char *FRU_String; - __u8 NumberOfCacheLevels; - struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS]; -} __attribute__((packed)); - -struct CPU_Info { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOf_CPUs; - struct CPU_Description CPU_Data[MAX_CPUS]; -} __attribute__((packed)); - - -/* - * This structure will be used by SUS and the OS. - * The assumption about this structure is that no blank space is - * packed in it by our friend the compiler. - */ -typedef struct { - __u8 Mailbox_SUS; /* Written to by SUS to give - commands/response to the OS */ - __u8 Mailbox_OS; /* Written to by the OS to give - commands/response to SUS */ - __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the - interface SUS supports */ - __u8 OS_MailboxVersion; /* Tells SUS which iteration of the - interface the OS supports */ - __u32 OS_Flags; /* Flags set by the OS as info for - SUS */ - __u32 SUS_Flags; /* Flags set by SUS as info - for the OS */ - __u32 WatchDogPeriod; /* Watchdog period (in seconds) which - the DP uses to see if the OS - is dead */ - __u32 WatchDogCount; /* Updated by the OS on every tic. */ - __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS - where to stuff the SUS error log - on a dump */ - MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS]; - /* Storage for MCA POS data */ - /* All new SECOND_PASS_INTERFACE fields added from this point */ - struct ProcBoardInfo *BoardData; - struct CPU_Info *CPU_Data; - /* All new fields must be added from this point */ -} Voyager_KernelSUS_Mbox_t; - -/* structure for finding the right memory address to send a QIC CPI to */ -struct voyager_qic_cpi { - /* Each cache line (32 bytes) can trigger a cpi. The cpi - * read/write may occur anywhere in the cache line---pick the - * middle to be safe */ - struct { - __u32 pad1[3]; - __u32 cpi; - __u32 pad2[4]; - } qic_cpi[8]; -}; - -struct voyager_status { - __u32 power_fail:1; - __u32 switch_off:1; - __u32 request_from_kernel:1; -}; - -struct voyager_psi_regs { - __u8 cat_id; - __u8 cat_dev; - __u8 cat_control; - __u8 subaddr; - __u8 dummy4; - __u8 checkbit; - __u8 subaddr_low; - __u8 subaddr_high; - __u8 intstatus; - __u8 stat1; - __u8 stat3; - __u8 fault; - __u8 tms; - __u8 gen; - __u8 sysconf; - __u8 dummy15; -}; - -struct voyager_psi_subregs { - __u8 supply; - __u8 mask; - __u8 present; - __u8 DCfail; - __u8 ACfail; - __u8 fail; - __u8 UPSfail; - __u8 genstatus; -}; - -struct voyager_psi { - struct voyager_psi_regs regs; - struct voyager_psi_subregs subregs; -}; - -struct voyager_SUS { -#define VOYAGER_DUMP_BUTTON_NMI 0x1 -#define VOYAGER_SUS_VALID 0x2 -#define VOYAGER_SYSINT_COMPLETE 0x3 - __u8 SUS_mbox; -#define VOYAGER_NO_COMMAND 0x0 -#define VOYAGER_IGNORE_DUMP 0x1 -#define VOYAGER_DO_DUMP 0x2 -#define VOYAGER_SYSINT_HANDSHAKE 0x3 -#define VOYAGER_DO_MEM_DUMP 0x4 -#define VOYAGER_SYSINT_WAS_RECOVERED 0x5 - __u8 kernel_mbox; -#define VOYAGER_MAILBOX_VERSION 0x10 - __u8 SUS_version; - __u8 kernel_version; -#define VOYAGER_OS_HAS_SYSINT 0x1 -#define VOYAGER_OS_IN_PROGRESS 0x2 -#define VOYAGER_UPDATING_WDPERIOD 0x4 - __u32 kernel_flags; -#define VOYAGER_SUS_BOOTING 0x1 -#define VOYAGER_SUS_IN_PROGRESS 0x2 - __u32 SUS_flags; - __u32 watchdog_period; - __u32 watchdog_count; - __u32 SUS_errorlog; - /* lots of system configuration stuff under here */ -}; - -/* Variables exported by voyager_smp */ -extern __u32 voyager_extended_vic_processors; -extern __u32 voyager_allowed_boot_processors; -extern __u32 voyager_quad_processors; -extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS]; -extern struct voyager_SUS *voyager_SUS; - -/* variables exported always */ -extern struct task_struct *voyager_thread; -extern int voyager_level; -extern struct voyager_status voyager_status; - -/* functions exported by the voyager and voyager_smp modules */ -extern int voyager_cat_readb(__u8 module, __u8 asic, int reg); -extern void voyager_cat_init(void); -extern void voyager_detect(struct voyager_bios_info *); -extern void voyager_trap_init(void); -extern void voyager_setup_irqs(void); -extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length); -extern void voyager_smp_intr_init(void); -extern __u8 voyager_extended_cmos_read(__u16 cmos_address); -extern void voyager_smp_dump(void); -extern void voyager_timer_interrupt(void); -extern void smp_local_timer_interrupt(void); -extern void voyager_power_off(void); -extern void smp_voyager_power_off(void *dummy); -extern void voyager_restart(void); -extern void voyager_cat_power_off(void); -extern void voyager_cat_do_common_interrupt(void); -extern void voyager_handle_nmi(void); -extern void voyager_smp_intr_init(void); -/* Commands for the following are */ -#define VOYAGER_PSI_READ 0 -#define VOYAGER_PSI_WRITE 1 -#define VOYAGER_PSI_SUBREAD 2 -#define VOYAGER_PSI_SUBWRITE 3 -extern void voyager_cat_psi(__u8, __u16, __u8 *); - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 3355973b12ac..580b4e296010 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -3,8 +3,8 @@ */ #include <asm/segment.h> #include <asm/msr-index.h> -#include <asm/page.h> -#include <asm/pgtable.h> +#include <asm/page_types.h> +#include <asm/pgtable_types.h> #include <asm/processor-flags.h> .code16 diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index a12e6a9fb659..8ded418b0593 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -1,7 +1,7 @@ .section .text.page_aligned #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> +#include <asm/page_types.h> # Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a70..8ea5164cbd04 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -1,8 +1,8 @@ .text #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/pgtable.h> -#include <asm/page.h> +#include <asm/pgtable_types.h> +#include <asm/page_types.h> #include <asm/msr.h> #include <asm/asm-offsets.h> @@ -13,7 +13,6 @@ * Hooray, we are in Long 64-bit mode (but still running in low memory) */ ENTRY(wakeup_long64) -wakeup_long64: movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax @@ -34,16 +33,12 @@ wakeup_long64: movq saved_rip, %rax jmp *%rax +ENDPROC(wakeup_long64) bogus_64_magic: jmp bogus_64_magic - .align 2 - .p2align 4,,15 -.globl do_suspend_lowlevel - .type do_suspend_lowlevel,@function -do_suspend_lowlevel: -.LFB5: +ENTRY(do_suspend_lowlevel) subq $8, %rsp xorl %eax, %eax call save_processor_state @@ -67,7 +62,7 @@ do_suspend_lowlevel: pushfq popq pt_regs_flags(%rax) - movq $.L97, saved_rip(%rip) + movq $resume_point, saved_rip(%rip) movq %rsp, saved_rsp movq %rbp, saved_rbp @@ -78,14 +73,12 @@ do_suspend_lowlevel: addq $8, %rsp movl $3, %edi xorl %eax, %eax - jmp acpi_enter_sleep_state -.L97: - .p2align 4,,7 -.L99: - .align 4 - movl $24, %eax - movw %ax, %ds + call acpi_enter_sleep_state + /* in case something went wrong, restore the machine status and go on */ + jmp resume_point + .align 4 +resume_point: /* We don't restore %rax, it must be 0 anyway */ movq $saved_context, %rax movq saved_context_cr4(%rax), %rbx @@ -117,12 +110,9 @@ do_suspend_lowlevel: xorl %eax, %eax addq $8, %rsp jmp restore_processor_state -.LFE5: -.Lfe5: - .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel - +ENDPROC(do_suspend_lowlevel) + .data -ALIGN ENTRY(saved_rbp) .quad 0 ENTRY(saved_rsi) .quad 0 ENTRY(saved_rdi) .quad 0 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a84ac7b570e6..6907b8e85d52 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { - unsigned long flags; char *vaddr; int nr_pages = 2; struct page *pages[2]; int i; + might_sleep(); if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); @@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) nr_pages = 1; vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); BUG_ON(!vaddr); - local_irq_save(flags); + local_irq_disable(); memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); + local_irq_enable(); vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d1bf032ba26f..4732768c5348 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -35,7 +35,6 @@ #include <linux/mm.h> #include <asm/perf_counter.h> -#include <asm/arch_hooks.h> #include <asm/pgalloc.h> #include <asm/atomic.h> #include <asm/mpspec.h> @@ -840,7 +839,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -1269,14 +1268,7 @@ void __cpuinit end_local_APIC_setup(void) #ifdef CONFIG_X86_X2APIC void check_x2apic(void) { - int msr, msr2; - - if (!cpu_has_x2apic) - return; - - rdmsr(MSR_IA32_APICBASE, msr, msr2); - - if (msr & X2APIC_ENABLE) { + if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); x2apic_preenabled = x2apic = 1; } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index c9ec90742e9f..3a730fa574bb 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -35,7 +35,6 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <asm/acpi.h> -#include <asm/arch_hooks.h> #include <asm/e820.h> #include <asm/setup.h> diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 70935dd904db..e7c163661c77 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,9 +50,16 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { - if (!intr_remapping_enabled) - apic = &apic_flat; + if (x2apic && (apic != &apic_x2apic_phys && +#ifdef CONFIG_X86_UV + apic != &apic_x2apic_uv_x && +#endif + apic != &apic_x2apic_cluster)) { + if (x2apic_phys) + apic = &apic_x2apic_phys; + else + apic = &apic_x2apic_cluster; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index cfe7b09015d8..32838b57a945 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -48,7 +48,7 @@ #include <linux/gfp.h> #include <linux/smp.h> -static inline unsigned summit_get_apic_id(unsigned long x) +static unsigned summit_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; } @@ -58,7 +58,7 @@ static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) default_send_IPI_mask_sequence_logical(mask, vector); } -static inline void summit_send_IPI_allbutself(int vector) +static void summit_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); @@ -67,7 +67,7 @@ static inline void summit_send_IPI_allbutself(int vector) summit_send_IPI_mask(&mask, vector); } -static inline void summit_send_IPI_all(int vector) +static void summit_send_IPI_all(int vector) { summit_send_IPI_mask(&cpu_online_map, vector); } @@ -82,8 +82,8 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int -summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, + char *productid) { if (!strncmp(oem, "IBM ENSW", 8) && (!strncmp(productid, "VIGIL SMP", 9) @@ -98,7 +98,7 @@ summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) } /* Hook from generic ACPI tables.c */ -static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) @@ -186,7 +186,7 @@ static inline int is_WPEG(struct rio_detail *rio){ #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *summit_target_cpus(void) +static const cpumask_t *summit_target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing @@ -195,19 +195,18 @@ static inline const cpumask_t *summit_target_cpus(void) return &cpumask_of_cpu(0); } -static inline unsigned long -summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long summit_check_apicid_present(int bit) +static unsigned long summit_check_apicid_present(int bit) { return 1; } -static inline void summit_init_apic_ldr(void) +static void summit_init_apic_ldr(void) { unsigned long val, id; int count = 0; @@ -234,18 +233,18 @@ static inline void summit_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int summit_apic_id_registered(void) +static int summit_apic_id_registered(void) { return 1; } -static inline void summit_setup_apic_routing(void) +static void summit_setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); } -static inline int summit_apicid_to_node(int logical_apicid) +static int summit_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; @@ -266,7 +265,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu) #endif } -static inline int summit_cpu_present_to_apicid(int mps_cpu) +static int summit_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); @@ -274,28 +273,23 @@ static inline int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t -summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); } -static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) +static physid_mask_t summit_apicid_to_cpu_present(int apicid) { return physid_mask_of_physid(0); } -static inline void summit_setup_portio_remap(void) -{ -} - -static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) { return 1; } -static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpus_found = 0; int num_bits_set; @@ -303,12 +297,10 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) int cpu; num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ if (num_bits_set >= nr_cpu_ids) - return 0xFF; + return BAD_APICID; /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): + * The cpus in the mask must all be on the apic cluster. */ cpu = first_cpu(*cpumask); apicid = summit_cpu_to_logical_apicid(cpu); @@ -318,9 +310,9 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) int new_apicid = summit_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); + printk("%s: Not a valid mask!\n", __func__); - return 0xFF; + return BAD_APICID; } apicid = apicid | new_apicid; cpus_found++; @@ -330,8 +322,7 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int -summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, +static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { int apicid = summit_cpu_to_logical_apicid(0); @@ -356,7 +347,7 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) +static int summit_phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4e39d9ad4d52..354b9c45601d 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -14,10 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic) - return 1; - - return 0; + return x2apic_enabled(); } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index d2d52eb9f7ea..5bcb174409bc 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -10,7 +10,7 @@ #include <asm/apic.h> #include <asm/ipi.h> -static int x2apic_phys; +int x2apic_phys; static int set_x2apic_phys_mode(char *arg) { @@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && x2apic_phys) - return 1; - - return 0; + if (x2apic_phys) + return x2apic_enabled(); + else + return 0; } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 37ba5f85b718..10033fe718e0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1192,6 +1192,7 @@ static int suspend(int vetoable) device_suspend(PMSG_SUSPEND); local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); @@ -1208,6 +1209,7 @@ static int suspend(int vetoable) if (err != APM_SUCCESS) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); device_resume(PMSG_RESUME); @@ -1228,6 +1230,7 @@ static void standby(void) local_irq_disable(); device_power_down(PMSG_SUSPEND); + sysdev_suspend(PMSG_SUSPEND); local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); @@ -1235,6 +1238,7 @@ static void standby(void) apm_error("standby", err); local_irq_disable(); + sysdev_resume(); device_power_up(PMSG_RESUME); local_irq_enable(); } diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c2f930d86640..41ab3f064cb1 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy) } /* Enable Enhanced PowerSaver */ rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - val |= 1 << 16; + if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { + val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; wrmsrl(MSR_IA32_MISC_ENABLE, val); /* Can be locked at 0 */ rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { + if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); return -ENODEV; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index fb039cd345d8..6428aa17b40e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - rc = powernow_k8_cpu_init_acpi(data); - if (rc) { + if (powernow_k8_cpu_init_acpi(data)) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - goto err_out; + kfree(data); + return -ENODEV; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - goto err_out; + kfree(data); + return -ENODEV; } rc = find_psb_table(data); if (rc) { - goto err_out; + kfree(data); + return -ENODEV; } /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index f08998278a3a..c9f1fdc02830 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) enable it if not. */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { - l |= (1<<16); + if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { + l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); wrmsr(MSR_IA32_MISC_ENABLE, l, h); /* check to see if it stuck */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { + if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); return -ENODEV; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7aeef1d327b1..25c559ba8d54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -146,10 +146,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { + if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ + lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 1c838032fd37..fe79985ce0f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) * If we know that the error was in user space, send a * SIGBUS. Otherwise, panic if tolerance is low. * - * do_exit() takes an awful lot of locks and has a slight + * force_sig() takes an awful lot of locks and has a slight * risk of deadlocking. */ if (user_space) { - do_exit(SIGBUS); + force_sig(SIGBUS, current); } else if (panic_on_oops || tolerant < 2) { mce_panic("Uncorrected machine check", &panicm, mcestart); @@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) } -static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) +static void mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable); static int mce_resume(struct sys_device *dev) { mce_init(NULL); + mce_cpu_features(¤t_cpu_data); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 4772e91e8246..9817506dd469 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr) } /* cpu init entry point, called from mce.c with preempt off */ -void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +void mce_amd_feature_init(struct cpuinfo_x86 *c) { unsigned int bank, block; unsigned int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 5e8c79e748a6..aa5e287c98e0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -31,7 +31,7 @@ asmlinkage void smp_thermal_interrupt(void) irq_exit(); } -static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -49,13 +49,13 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); - if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } - if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) tm2 = 1; if (h & APIC_VECTOR_MASK) { @@ -73,7 +73,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); @@ -85,7 +85,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) +void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 9b60fce09f75..f53bdcbaf382 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; /* -EBUSY */ @@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) vendor_thermal_interrupt = intel_thermal_interrupt; rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e85826829cf2..508bec1cee27 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) */ void __init reserve_early(u64 start, u64 end, char *name) { + if (start >= end) + return; + drop_overlaps_that_are_ok(start, end); __reserve_early(start, end, name, 0); } diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index ef00bb77d7e4..fbe66e626c09 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S @@ -6,7 +6,7 @@ */ #include <linux/linkage.h> -#include <asm/page.h> +#include <asm/page_types.h> /* * efi_call_phys(void *, ...) is a function with variable parameters. @@ -113,6 +113,7 @@ ENTRY(efi_call_phys) movl (%edx), %ecx pushl %ecx ret +ENDPROC(efi_call_phys) .previous .data diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S index 99b47d48c9f4..4c07ccab8146 100644 --- a/arch/x86/kernel/efi_stub_64.S +++ b/arch/x86/kernel/efi_stub_64.S @@ -41,6 +41,7 @@ ENTRY(efi_call0) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call0) ENTRY(efi_call1) SAVE_XMM @@ -50,6 +51,7 @@ ENTRY(efi_call1) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call1) ENTRY(efi_call2) SAVE_XMM @@ -59,6 +61,7 @@ ENTRY(efi_call2) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call2) ENTRY(efi_call3) SAVE_XMM @@ -69,6 +72,7 @@ ENTRY(efi_call3) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call3) ENTRY(efi_call4) SAVE_XMM @@ -80,6 +84,7 @@ ENTRY(efi_call4) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call4) ENTRY(efi_call5) SAVE_XMM @@ -92,6 +97,7 @@ ENTRY(efi_call5) addq $48, %rsp RESTORE_XMM ret +ENDPROC(efi_call5) ENTRY(efi_call6) SAVE_XMM @@ -107,3 +113,4 @@ ENTRY(efi_call6) addq $48, %rsp RESTORE_XMM ret +ENDPROC(efi_call6) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e99206831459..899e8938e79f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -47,7 +47,7 @@ #include <asm/errno.h> #include <asm/segment.h> #include <asm/smp.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/desc.h> #include <asm/percpu.h> #include <asm/dwarf2.h> @@ -1359,7 +1359,7 @@ nmi_espfix_stack: CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) + addl $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 pushl 16(%esp) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 860afce9660a..24c7031e23ca 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -48,7 +48,7 @@ #include <asm/unistd.h> #include <asm/thread_info.h> #include <asm/hw_irq.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/irqflags.h> #include <asm/paravirt.h> #include <asm/ftrace.h> @@ -77,20 +77,17 @@ ENTRY(ftrace_caller) movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi -.globl ftrace_call -ftrace_call: +GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME #ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: +GLOBAL(ftrace_graph_call) jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +GLOBAL(ftrace_stub) retq END(ftrace_caller) @@ -110,8 +107,7 @@ ENTRY(mcount) jnz ftrace_graph_caller #endif -.globl ftrace_stub -ftrace_stub: +GLOBAL(ftrace_stub) retq trace: @@ -148,9 +144,7 @@ ENTRY(ftrace_graph_caller) retq END(ftrace_graph_caller) - -.globl return_to_handler -return_to_handler: +GLOBAL(return_to_handler) subq $80, %rsp movq %rax, (%rsp) @@ -188,6 +182,7 @@ return_to_handler: ENTRY(native_usergs_sysret64) swapgs sysretq +ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ @@ -633,16 +628,14 @@ tracesys: * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. */ - .globl int_ret_from_sys_call - .globl int_with_check -int_ret_from_sys_call: +GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ -int_with_check: +GLOBAL(int_with_check) LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%edx diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 2a0aad7718d5..c32ca19d591a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -11,8 +11,8 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> -#include <asm/pgtable.h> +#include <asm/page_types.h> +#include <asm/pgtable_types.h> #include <asm/desc.h> #include <asm/cache.h> #include <asm/thread_info.h> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2e648e3a5ea4..54b29bb24e71 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -329,8 +329,6 @@ early_idt_ripmsg: #endif /* CONFIG_EARLY_PRINTK */ .previous -.balign PAGE_SIZE - #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ ENTRY(name) @@ -419,7 +417,7 @@ ENTRY(phys_base) .section .bss, "aw", @nobits .align L1_CACHE_BYTES ENTRY(idt_table) - .skip 256 * 16 + .skip IDT_ENTRIES * 16 .section .bss.page_aligned, "aw", @nobits .align PAGE_SIZE diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 11d5093eb281..df89102bef80 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -22,7 +22,6 @@ #include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> -#include <asm/arch_hooks.h> #include <asm/i8259.h> /* diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 520e6c1c5d22..f3e11cb295c4 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -18,7 +18,7 @@ #include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> -#include <asm/arch_hooks.h> +#include <asm/setup.h> #include <asm/i8259.h> #include <asm/traps.h> @@ -181,8 +181,8 @@ void __init native_init_IRQ(void) { int i; - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); apic_intr_init(); @@ -201,10 +201,11 @@ void __init native_init_IRQ(void) if (!acpi_ioapic) setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in - * the architecture specific gates) + /* + * Call quirks after call gates are initialised (usually add in + * the architecture specific gates): */ - intr_init_hook(); + x86_quirk_intr_init(); /* * External FPU? Set up irq13 if so, for diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 652fce6d2cce..137f2e8132df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -19,7 +19,6 @@ #include <linux/clocksource.h> #include <linux/kvm_para.h> #include <asm/pvclock.h> -#include <asm/arch_hooks.h> #include <asm/msr.h> #include <asm/apic.h> #include <linux/percpu.h> diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 37f420018a41..f5fc8c781a62 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one( static void machine_kexec_prepare_page_tables(struct kimage *image) { void *control_page; - pmd_t *pmd = 0; + pmd_t *pmd = NULL; control_page = page_address(image->control_code_page); #ifdef CONFIG_X86_PAE diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 2dc183758be3..845d80ce1ef1 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -51,7 +51,6 @@ #include <linux/ioport.h> #include <asm/uaccess.h> #include <linux/init.h> -#include <asm/arch_hooks.h> static unsigned char which_scsi; @@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void) * adapter was responsible for the error. */ bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); - - mca_nmi_hook(); -} /* mca_handle_nmi */ +} diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 7f4d2586972e..37cb1bda1baf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -710,13 +710,22 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * of physical memory; so that simply reserving * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. + * also need to make sure physptr is below than + * max_low_pfn + * we don't need reserve the area above max_low_pfn */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->physptr + size > end) - size = end - mpf->physptr; -#endif + + if (mpf->physptr < end) { + if (mpf->physptr + size > end) + size = end - mpf->physptr; + reserve_bootmem_generic(mpf->physptr, size, + BOOTMEM_DEFAULT); + } +#else reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); +#endif } return 1; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6dc4dca255e4..63dd358d8ee1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -28,7 +28,6 @@ #include <asm/paravirt.h> #include <asm/desc.h> #include <asm/setup.h> -#include <asm/arch_hooks.h> #include <asm/pgtable.h> #include <asm/time.h> #include <asm/pgalloc.h> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index fec79ad85dc6..646da41a620a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -111,9 +111,6 @@ void cpu_idle(void) check_pgt_cache(); rmb(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, 0); - if (cpu_is_offline(cpu)) play_dead(); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d2f7cd5b2c83..fb2159a5c817 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_IA32)) return IA32_PAGE_OFFSET - 3; #endif - return TASK_SIZE64 - 7; + return TASK_SIZE_MAX - 7; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index a160f3119725..2064d0aa8d28 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,7 +7,7 @@ */ #include <linux/linkage.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/kexec.h> #include <asm/processor-flags.h> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index b0bbdd4829c9..d32cfb27a479 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -7,10 +7,10 @@ */ #include <linux/linkage.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/kexec.h> #include <asm/processor-flags.h> -#include <asm/pgtable.h> +#include <asm/pgtable_types.h> /* * Must be relocatable PIC code callable as a C function diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ebef80055795..5b85759e7972 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -74,8 +74,9 @@ #include <asm/e820.h> #include <asm/mpspec.h> #include <asm/setup.h> -#include <asm/arch_hooks.h> #include <asm/efi.h> +#include <asm/timer.h> +#include <asm/i8259.h> #include <asm/sections.h> #include <asm/dmi.h> #include <asm/io_apic.h> @@ -668,7 +669,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); - pre_setup_arch_hook(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif @@ -988,7 +988,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors * * Description: * Perform any necessary interrupt initialisation prior to setting up @@ -996,7 +996,7 @@ void __init setup_arch(char **cmdline_p) * interrupts should be initialised here if the machine emulates a PC * in any way. **/ -void __init pre_intr_init_hook(void) +void __init x86_quirk_pre_intr_init(void) { if (x86_quirks->arch_pre_intr_init) { if (x86_quirks->arch_pre_intr_init()) @@ -1006,7 +1006,7 @@ void __init pre_intr_init_hook(void) } /** - * intr_init_hook - post gate setup interrupt initialisation + * x86_quirk_intr_init - post gate setup interrupt initialisation * * Description: * Fill in any interrupts that may have been left out by the general @@ -1014,7 +1014,7 @@ void __init pre_intr_init_hook(void) * than the devices on the I/O bus (like APIC interrupts in intel MP * systems) are started here. **/ -void __init intr_init_hook(void) +void __init x86_quirk_intr_init(void) { if (x86_quirks->arch_intr_init) { if (x86_quirks->arch_intr_init()) @@ -1023,25 +1023,13 @@ void __init intr_init_hook(void) } /** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On Voyager - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - -/** - * trap_init_hook - initialise system specific traps + * x86_quirk_trap_init - initialise system specific traps * * Description: * Called as the final act of trap_init(). Used in VISWS to initialise * the various board specific APIC traps. **/ -void __init trap_init_hook(void) +void __init x86_quirk_trap_init(void) { if (x86_quirks->arch_trap_init) { if (x86_quirks->arch_trap_init()) @@ -1051,29 +1039,29 @@ void __init trap_init_hook(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; /** - * pre_time_init_hook - do any specific initialisations before. + * x86_quirk_pre_time_init - do any specific initialisations before. * **/ -void __init pre_time_init_hook(void) +void __init x86_quirk_pre_time_init(void) { if (x86_quirks->arch_pre_time_init) x86_quirks->arch_pre_time_init(); } /** - * time_init_hook - do any specific initialisations for the system timer. + * x86_quirk_time_init - do any specific initialisations for the system timer. * * Description: * Must plug the system timer interrupt source at HZ into the IRQ listed * in irq_vectors.h:TIMER_IRQ **/ -void __init time_init_hook(void) +void __init x86_quirk_time_init(void) { if (x86_quirks->arch_time_init) { /* @@ -1088,25 +1076,4 @@ void __init time_init_hook(void) irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Architecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* - * If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - pr_warning("NMI generated from unknown source!\n"); -} -#endif /* CONFIG_MCA */ - #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 764c74e871f2..5c5d87f0b2e1 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -33,7 +33,7 @@ #include <linux/time.h> #include <linux/mca.h> -#include <asm/arch_hooks.h> +#include <asm/setup.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/timer.h> @@ -118,7 +118,7 @@ void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - time_init_hook(); + x86_quirk_time_init(); } /* @@ -131,7 +131,7 @@ void __init hpet_time_init(void) */ void __init time_init(void) { - pre_time_init_hook(); + x86_quirk_pre_time_init(); tsc_init(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index e6e695acd725..241ec3923f61 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_NONE, .name = "timer" }; diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index d8ccc3c6552f..66d874e5404c 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -29,7 +29,7 @@ #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> +#include <asm/page_types.h> /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 95a012a4664e..cddfb8d386b9 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -25,8 +25,8 @@ */ #include <linux/linkage.h> -#include <asm/pgtable.h> -#include <asm/page.h> +#include <asm/pgtable_types.h> +#include <asm/page_types.h> #include <asm/msr.h> #include <asm/segment.h> #include <asm/processor-flags.h> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c85a86cb7fb1..1dba866967e2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -61,7 +61,7 @@ #include <asm/proto.h> #else #include <asm/processor-flags.h> -#include <asm/arch_hooks.h> +#include <asm/setup.h> #include <asm/traps.h> #include "cpu/mcheck/mce.h" @@ -942,7 +942,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_BADSTK; - info.si_addr = 0; + info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 32, SIGILL) == NOTIFY_STOP) return; @@ -1023,6 +1023,6 @@ void __init trap_init(void) cpu_init(); #ifdef CONFIG_X86_32 - trap_init_hook(); + x86_quirk_trap_init(); #endif } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34199d30ff46..191a876e9e87 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -24,7 +24,6 @@ #include <asm/visws/cobalt.h> #include <asm/visws/piix4.h> -#include <asm/arch_hooks.h> #include <asm/io_apic.h> #include <asm/fixmap.h> #include <asm/reboot.h> diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a4791ef412d1..33a788d5879c 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -28,7 +28,6 @@ #include <asm/vmi.h> #include <asm/vmi_time.h> -#include <asm/arch_hooks.h> #include <asm/apicdef.h> #include <asm/apic.h> #include <asm/timer.h> @@ -202,7 +201,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) static struct irqaction vmi_clock_action = { .name = "vmi-timer", .handler = vmi_timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, .mask = CPU_MASK_ALL, }; @@ -283,10 +282,12 @@ void __devinit vmi_time_ap_init(void) #endif /** vmi clocksource */ +static struct clocksource clocksource_vmi; static cycle_t read_real_cycles(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); + return max(ret, clocksource_vmi.cycle_last); } static struct clocksource clocksource_vmi = { diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 3eba7f7bac05..0d860963f268 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -12,7 +12,7 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/cache.h> #include <asm/boot.h> diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 087a7f2c639b..fbfced6f6800 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -6,7 +6,7 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/asm-offsets.h> -#include <asm/page.h> +#include <asm/page_types.h> #undef i386 /* in case the preprocessor is a 32bit one */ diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e665d1c623ca..72bd275a9b5c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) hrtimer_add_expires_ns(&pt->timer, pt->period); pt->scheduled = hrtimer_get_expires_ns(&pt->timer); if (pt->period) - ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); + ps->channels[0].count_load_time = ktime_get(); return (pt->period == 0 ? 0 : 1); } diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index c019b8edcdb7..cf17ed52f6fb 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - kvm_apic_timer_intr_post(vcpu, vec); - /* TODO: PIT, RTC etc. */ -} -EXPORT_SYMBOL_GPL(kvm_timer_intr_post); - void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2bf32a03ceec..82579ee538d0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s); -void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index afac68c0815c..f0b67f2cdd69 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -35,6 +35,12 @@ #include "kvm_cache_regs.h" #include "irq.h" +#ifndef CONFIG_X86_64 +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) +#else +#define mod_64(x, y) ((x) % (y)) +#endif + #define PRId64 "d" #define PRIx64 "llx" #define PRIu64 "u" @@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic) static u32 apic_get_tmcct(struct kvm_lapic *apic) { - u64 counter_passed; - ktime_t passed, now; + ktime_t remaining; + s64 ns; u32 tmcct; ASSERT(apic != NULL); - now = apic->timer.dev.base->get_time(); - tmcct = apic_get_reg(apic, APIC_TMICT); - /* if initial count is 0, current count should also be 0 */ - if (tmcct == 0) + if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - if (unlikely(ktime_to_ns(now) <= - ktime_to_ns(apic->timer.last_update))) { - /* Wrap around */ - passed = ktime_add(( { - (ktime_t) { - .tv64 = KTIME_MAX - - (apic->timer.last_update).tv64}; } - ), now); - apic_debug("time elapsed\n"); - } else - passed = ktime_sub(now, apic->timer.last_update); - - counter_passed = div64_u64(ktime_to_ns(passed), - (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); - - if (counter_passed > tmcct) { - if (unlikely(!apic_lvtt_period(apic))) { - /* one-shot timers stick at 0 until reset */ - tmcct = 0; - } else { - /* - * periodic timers reset to APIC_TMICT when they - * hit 0. The while loop simulates this happening N - * times. (counter_passed %= tmcct) would also work, - * but might be slower or not work on 32-bit?? - */ - while (counter_passed > tmcct) - counter_passed -= tmcct; - tmcct -= counter_passed; - } - } else { - tmcct -= counter_passed; - } + remaining = hrtimer_expires_remaining(&apic->timer.dev); + if (ktime_to_ns(remaining) < 0) + remaining = ktime_set(0, 0); + + ns = mod_64(ktime_to_ns(remaining), apic->timer.period); + tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); return tmcct; } @@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now = apic->timer.dev.base->get_time(); - apic->timer.last_update = now; - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->timer.divide_count; atomic_set(&apic->timer.pending, 0); @@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) } } -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) - apic->timer.last_update = ktime_add_ns( - apic->timer.last_update, - apic->timer.period); -} - int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 81858881287e..45ab6ee71209 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -12,7 +12,6 @@ struct kvm_lapic { atomic_t pending; s64 period; /* unit: ns */ u32 divide_count; - ktime_t last_update; struct hrtimer dev; } timer; struct kvm_vcpu *vcpu; @@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); -void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 83f11c7474a1..2d4477c71473 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; if (mt_mask) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); + if (!kvm_is_mmio_pfn(pfn)) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + mt_mask |= VMX_EPT_IGMT_BIT; + } else + mt_mask = MTRR_TYPE_UNCACHABLE << + kvm_x86_ops->get_mt_mask_shift(); spte |= mt_mask; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1452851ae258..a9e769e4e251 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); - kvm_timer_intr_post(vcpu, intr_vector); out: update_cr8_intercept(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6259d7467648..7611af576829 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: + vmx_load_host_state(to_vmx(vcpu)); msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; @@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) } if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); if (kvm_cpu_has_interrupt(vcpu)) enable_irq_window(vcpu); } @@ -3687,8 +3687,7 @@ static int __init vmx_init(void) if (vm_need_ept()) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_IGMT_BIT); + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a2406..758b7a155ae9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: - case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: case KVM_CAP_MP_STATE: @@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOMMU: r = iommu_found(); break; + case KVM_CAP_CLOCKSOURCE: + r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC); + break; default: r = 0; break; @@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm) } -void kvm_arch_destroy_vm(struct kvm *kvm) +void kvm_arch_sync_events(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index c70e12b1a637..8dab8f7844d3 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -3,7 +3,6 @@ config LGUEST_GUEST select PARAVIRT depends on X86_32 depends on !X86_PAE - depends on !X86_VOYAGER select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index ad374003742f..51f1504cddd9 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -28,7 +28,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile deleted file mode 100644 index 15c250b371d3..000000000000 --- a/arch/x86/mach-voyager/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux kernel. -# - -EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := setup.o voyager_basic.o voyager_thread.o - -obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c deleted file mode 100644 index 66b7eb57d8e4..000000000000 --- a/arch/x86/mach-voyager/setup.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <asm/arch_hooks.h> -#include <asm/voyager.h> -#include <asm/e820.h> -#include <asm/io.h> -#include <asm/setup.h> -#include <asm/cpu.h> - -void __init pre_intr_init_hook(void) -{ - init_ISA_irqs(); -} - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - -void __init intr_init_hook(void) -{ -#ifdef CONFIG_SMP - voyager_smp_intr_init(); -#endif - - setup_irq(2, &irq2); -} - -static void voyager_disable_tsc(void) -{ - /* Voyagers run their CPUs from independent clocks, so disable - * the TSC code because we can't sync them */ - setup_clear_cpu_cap(X86_FEATURE_TSC); -} - -void __init pre_setup_arch_hook(void) -{ - voyager_disable_tsc(); -} - -void __init pre_time_init_hook(void) -{ - voyager_disable_tsc(); -} - -void __init trap_init_hook(void) -{ -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -void __init time_init_hook(void) -{ - irq0.mask = cpumask_of_cpu(safe_smp_processor_id()); - setup_irq(0, &irq0); -} - -/* Hook for machine specific memory setup. */ - -char *__init machine_specific_memory_setup(void) -{ - char *who; - int new_nr; - - who = "NOT VOYAGER"; - - if (voyager_level == 5) { - __u32 addr, length; - int i; - - who = "Voyager-SUS"; - - e820.nr_map = 0; - for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { - e820_add_region(addr, length, E820_RAM); - } - return who; - } else if (voyager_level == 4) { - __u32 tom; - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - /* select the DINO config space */ - outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT); - /* Read DINO top of memory register */ - tom = ((inb(catbase + 0x4) & 0xf0) << 16) - + ((inb(catbase + 0x5) & 0x7f) << 24); - - if (inb(catbase) != VOYAGER_DINO) { - printk(KERN_ERR - "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); - tom = (boot_params.screen_info.ext_mem_k) << 10; - } - who = "Voyager-TOM"; - e820_add_region(0, 0x9f000, E820_RAM); - /* map from 1M to top of memory */ - e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, - E820_RAM); - /* FIXME: Should check the ASICs to see if I need to - * take out the 8M window. Just do it at the moment - * */ - e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, - E820_RESERVED); - return who; - } - - return default_machine_specific_memory_setup(); -} diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c deleted file mode 100644 index 46d6f8067690..000000000000 --- a/arch/x86/mach-voyager/voyager_basic.c +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the voyager specific routines for getting - * initialisation of the architecture to function. For additional - * features see: - * - * voyager_cat.c - Voyager CAT bus interface - * voyager_smp.c - Voyager SMP hal (emulates linux smp.c) - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/reboot.h> -#include <linux/sysrq.h> -#include <linux/smp.h> -#include <linux/nodemask.h> -#include <asm/io.h> -#include <asm/voyager.h> -#include <asm/vic.h> -#include <linux/pm.h> -#include <asm/tlbflush.h> -#include <asm/arch_hooks.h> -#include <asm/i8253.h> - -/* - * Power off function, if any - */ -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -int voyager_level = 0; - -struct voyager_SUS *voyager_SUS = NULL; - -#ifdef CONFIG_SMP -static void voyager_dump(int dummy1, struct tty_struct *dummy3) -{ - /* get here via a sysrq */ - voyager_smp_dump(); -} - -static struct sysrq_key_op sysrq_voyager_dump_op = { - .handler = voyager_dump, - .help_msg = "Voyager", - .action_msg = "Dump Voyager Status", -}; -#endif - -void voyager_detect(struct voyager_bios_info *bios) -{ - if (bios->len != 0xff) { - int class = (bios->class_1 << 8) - | (bios->class_2 & 0xff); - - printk("Voyager System detected.\n" - " Class %x, Revision %d.%d\n", - class, bios->major, bios->minor); - if (class == VOYAGER_LEVEL4) - voyager_level = 4; - else if (class < VOYAGER_LEVEL5_AND_ABOVE) - voyager_level = 3; - else - voyager_level = 5; - printk(" Architecture Level %d\n", voyager_level); - if (voyager_level < 4) - printk - ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n"); - /* install the power off handler */ - pm_power_off = voyager_power_off; -#ifdef CONFIG_SMP - register_sysrq_key('v', &sysrq_voyager_dump_op); -#endif - } else { - printk("\n\n**WARNING**: No Voyager Subsystem Found\n"); - } -} - -void voyager_system_interrupt(int cpl, void *dev_id) -{ - printk("Voyager: detected system interrupt\n"); -} - -/* Routine to read information from the extended CMOS area */ -__u8 voyager_extended_cmos_read(__u16 addr) -{ - outb(addr & 0xff, 0x74); - outb((addr >> 8) & 0xff, 0x75); - return inb(0x76); -} - -/* internal definitions for the SUS Click Map of memory */ - -#define CLICK_ENTRIES 16 -#define CLICK_SIZE 4096 /* click to byte conversion for Length */ - -typedef struct ClickMap { - struct Entry { - __u32 Address; - __u32 Length; - } Entry[CLICK_ENTRIES]; -} ClickMap_t; - -/* This routine is pretty much an awful hack to read the bios clickmap by - * mapping it into page 0. There are usually three regions in the map: - * Base Memory - * Extended Memory - * zero length marker for end of map - * - * Returns are 0 for failure and 1 for success on extracting region. - */ -int __init voyager_memory_detect(int region, __u32 * start, __u32 * length) -{ - int i; - int retval = 0; - __u8 cmos[4]; - ClickMap_t *map; - unsigned long map_addr; - unsigned long old; - - if (region >= CLICK_ENTRIES) { - printk("Voyager: Illegal ClickMap region %d\n", region); - return 0; - } - - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = - voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i); - - map_addr = *(unsigned long *)cmos; - - /* steal page 0 for this */ - old = pg0[0]; - pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); - local_flush_tlb(); - /* now clear everything out but page 0 */ - map = (ClickMap_t *) (map_addr & (~PAGE_MASK)); - - /* zero length is the end of the clickmap */ - if (map->Entry[region].Length != 0) { - *length = map->Entry[region].Length * CLICK_SIZE; - *start = map->Entry[region].Address; - retval = 1; - } - - /* replace the mapping */ - pg0[0] = old; - local_flush_tlb(); - return retval; -} - -/* voyager specific handling code for timer interrupts. Used to hand - * off the timer tick to the SMP code, since the VIC doesn't have an - * internal timer (The QIC does, but that's another story). */ -void voyager_timer_interrupt(void) -{ - if ((jiffies & 0x3ff) == 0) { - - /* There seems to be something flaky in either - * hardware or software that is resetting the timer 0 - * count to something much higher than it should be - * This seems to occur in the boot sequence, just - * before root is mounted. Therefore, every 10 - * seconds or so, we sanity check the timer zero count - * and kick it back to where it should be. - * - * FIXME: This is the most awful hack yet seen. I - * should work out exactly what is interfering with - * the timer count settings early in the boot sequence - * and swiftly introduce it to something sharp and - * pointy. */ - __u16 val; - - spin_lock(&i8253_lock); - - outb_p(0x00, 0x43); - val = inb_p(0x40); - val |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - - if (val > LATCH) { - printk - ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", - val); - spin_lock(&i8253_lock); - outb(0x34, 0x43); - outb_p(LATCH & 0xff, 0x40); /* LSB */ - outb(LATCH >> 8, 0x40); /* MSB */ - spin_unlock(&i8253_lock); - } - } -#ifdef CONFIG_SMP - smp_vic_timer_interrupt(); -#endif -} - -void voyager_power_off(void) -{ - printk("VOYAGER Power Off\n"); - - if (voyager_level == 5) { - voyager_cat_power_off(); - } else if (voyager_level == 4) { - /* This doesn't apparently work on most L4 machines, - * but the specs say to do this to get automatic power - * off. Unfortunately, if it doesn't power off the - * machine, it ends up doing a cold restart, which - * isn't really intended, so comment out the code */ -#if 0 - int port; - - /* enable the voyager Configuration Space */ - outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP); - /* the port for the power off flag is an offset from the - floating base */ - port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21; - /* set the power off flag */ - outb(inb(port) | 0x1, port); -#endif - } - /* and wait for it to happen */ - local_irq_disable(); - for (;;) - halt(); -} - -/* copied from process.c */ -static inline void kb_wait(void) -{ - int i; - - for (i = 0; i < 0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; -} - -void machine_shutdown(void) -{ - /* Architecture specific shutdown needed before a kexec */ -} - -void machine_restart(char *cmd) -{ - printk("Voyager Warm Restart\n"); - kb_wait(); - - if (voyager_level == 5) { - /* write magic values to the RTC to inform system that - * shutdown is beginning */ - outb(0x8f, 0x70); - outb(0x5, 0x71); - - udelay(50); - outb(0xfe, 0x64); /* pull reset low */ - } else if (voyager_level == 4) { - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - __u8 basebd = inb(VOYAGER_MC_SETUP); - - outb(basebd | 0x08, VOYAGER_MC_SETUP); - outb(0x02, catbase + 0x21); - } - local_irq_disable(); - for (;;) - halt(); -} - -void machine_emergency_restart(void) -{ - /*for now, just hook this to a warm restart */ - machine_restart(NULL); -} - -void mca_nmi_hook(void) -{ - __u8 dumpval __maybe_unused = inb(0xf823); - __u8 swnmi __maybe_unused = inb(0xf813); - - /* FIXME: assume dump switch pressed */ - /* check to see if the dump switch was pressed */ - VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi)); - /* clear swnmi */ - outb(0xff, 0xf813); - /* tell SUS to ignore dump */ - if (voyager_level == 5 && voyager_SUS != NULL) { - if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) { - voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND; - voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS; - udelay(1000); - voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP; - voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS; - } - } - printk(KERN_ERR - "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", - smp_processor_id()); - show_stack(NULL, NULL); - show_state(); -} - -void machine_halt(void) -{ - /* treat a halt like a power off */ - machine_power_off(); -} - -void machine_power_off(void) -{ - if (pm_power_off) - pm_power_off(); -} diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c deleted file mode 100644 index 2ad598c104af..000000000000 --- a/arch/x86/mach-voyager/voyager_cat.c +++ /dev/null @@ -1,1197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the logic for manipulating the CAT bus - * in a level 5 machine. - * - * The CAT bus is a serial configuration and test bus. Its primary - * uses are to probe the initial configuration of the system and to - * diagnose error conditions when a system interrupt occurs. The low - * level interface is fairly primitive, so most of this file consists - * of bit shift manipulations to send and receive packets on the - * serial bus */ - -#include <linux/types.h> -#include <linux/completion.h> -#include <linux/sched.h> -#include <asm/voyager.h> -#include <asm/vic.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/delay.h> -#include <asm/io.h> - -#ifdef VOYAGER_CAT_DEBUG -#define CDEBUG(x) printk x -#else -#define CDEBUG(x) -#endif - -/* the CAT command port */ -#define CAT_CMD (sspb + 0xe) -/* the CAT data port */ -#define CAT_DATA (sspb + 0xd) - -/* the internal cat functions */ -static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits); -static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data, - __u16 num_bits); -static void cat_build_header(__u8 * header, const __u16 len, - const __u16 smallest_reg_bits, - const __u16 longest_reg_bits); -static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 op); -static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 * value); -static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, - __u8 pad_bits); -static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 value); -static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value); -static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, - __u16 offset, __u16 len, void *buf); -static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value); -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp); -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp); - -static inline const char *cat_module_name(int module_id) -{ - switch (module_id) { - case 0x10: - return "Processor Slot 0"; - case 0x11: - return "Processor Slot 1"; - case 0x12: - return "Processor Slot 2"; - case 0x13: - return "Processor Slot 4"; - case 0x14: - return "Memory Slot 0"; - case 0x15: - return "Memory Slot 1"; - case 0x18: - return "Primary Microchannel"; - case 0x19: - return "Secondary Microchannel"; - case 0x1a: - return "Power Supply Interface"; - case 0x1c: - return "Processor Slot 5"; - case 0x1d: - return "Processor Slot 6"; - case 0x1e: - return "Processor Slot 7"; - case 0x1f: - return "Processor Slot 8"; - default: - return "Unknown Module"; - } -} - -static int sspb = 0; /* stores the super port location */ -int voyager_8slot = 0; /* set to true if a 51xx monster */ - -voyager_module_t *voyager_cat_list; - -/* the I/O port assignments for the VIC and QIC */ -static struct resource vic_res = { - .name = "Voyager Interrupt Controller", - .start = 0xFC00, - .end = 0xFC6F -}; -static struct resource qic_res = { - .name = "Quad Interrupt Controller", - .start = 0xFC70, - .end = 0xFCFF -}; - -/* This function is used to pack a data bit stream inside a message. - * It writes num_bits of the data buffer in msg starting at start_bit. - * Note: This function assumes that any unused bit in the data stream - * is set to zero so that the ors will work correctly */ -static void -cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - __u16 residue = (num_bits % BITS_PER_BYTE) + offset; - int i; - - /* adjust if we have more than a byte of residue */ - if (residue >= BITS_PER_BYTE) { - residue -= BITS_PER_BYTE; - len++; - } - - /* clear out the bits. We assume here that if len==0 then - * residue >= offset. This is always true for the catbus - * operations */ - msg[byte] &= 0xff << (BITS_PER_BYTE - offset); - msg[byte++] |= data[0] >> offset; - if (len == 0) - return; - for (i = 1; i < len; i++) - msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset)) - | (data[i] >> offset); - if (residue != 0) { - __u8 mask = 0xff >> residue; - __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset) - | (data[i] >> offset); - - last_byte &= ~mask; - msg[byte] &= mask; - msg[byte] |= last_byte; - } - return; -} - -/* unpack the data again (same arguments as cat_pack()). data buffer - * must be zero populated. - * - * Function: given a message string move to start_bit and copy num_bits into - * data (starting at bit 0 in data). - */ -static void -cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - const __u8 last_bits = num_bits % BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - int i; - - if (last_bits != 0) - len++; - - /* special case: want < 8 bits from msg and we can get it from - * a single byte of the msg */ - if (len == 0 && BITS_PER_BYTE - offset >= num_bits) { - data[0] = msg[byte] << offset; - data[0] &= 0xff >> (BITS_PER_BYTE - num_bits); - return; - } - for (i = 0; i < len; i++) { - /* this annoying if has to be done just in case a read of - * msg one beyond the array causes a panic */ - if (offset != 0) { - data[i] = msg[byte++] << offset; - data[i] |= msg[byte] >> (BITS_PER_BYTE - offset); - } else { - data[i] = msg[byte++]; - } - } - /* do we need to truncate the final byte */ - if (last_bits != 0) { - data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits); - } - return; -} - -static void -cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits, - const __u16 longest_reg_bits) -{ - int i; - __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE; - __u8 *last_byte = &header[len - 1]; - - if (start_bit == 0) - start_bit = 1; /* must have at least one bit in the hdr */ - - for (i = 0; i < len; i++) - header[i] = 0; - - for (i = start_bit; i > 0; i--) - *last_byte = ((*last_byte) << 1) + 1; - -} - -static int -cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op) -{ - __u8 parity, inst, inst_buf[4] = { 0 }; - __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE]; - __u16 ibytes, hbytes, padbits; - int i; - - /* - * Parity is the parity of the register number + 1 (READ_REGISTER - * and WRITE_REGISTER always add '1' to the number of bits == 1) - */ - parity = (__u8) (1 + (reg & 0x01) + - ((__u8) (reg & 0x02) >> 1) + - ((__u8) (reg & 0x04) >> 2) + - ((__u8) (reg & 0x08) >> 3)) % 2; - - inst = ((parity << 7) | (reg << 2) | op); - - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - printk - ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n"); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(inst, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n")); - return 1; - } - return 0; - } - ibytes = modp->inst_bits / BITS_PER_BYTE; - if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - ibytes++; - } - hbytes = modp->largest_reg / BITS_PER_BYTE; - if (modp->largest_reg % BITS_PER_BYTE) - hbytes++; - CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes)); - /* initialise the instruction sequence to 0xff */ - for (i = 0; i < ibytes + hbytes; i++) - iseq[i] = 0xff; - cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg); - cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE); - inst_buf[0] = inst; - inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE); - cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("ins = 0x%x, iseq: ", inst); - for (i = 0; i < ibytes + hbytes; i++) - printk("0x%x ", iseq[i]); - printk("\n"); -#endif - if (cat_shiftout(iseq, ibytes, hbytes, padbits)) { - CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n")); - return 1; - } - CDEBUG(("CAT SHIFTOUT DONE\n")); - return 0; -} - -static int -cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - *value = inb(CAT_DATA); - outb(0xAA, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n")); - return 1; - } - return 0; - } else { - __u16 sbits = modp->num_asics - 1 + asicp->ireg_length; - __u16 sbytes = sbits / BITS_PER_BYTE; - __u16 tbytes; - __u8 string[VOYAGER_MAX_SCAN_PATH], - trailer[VOYAGER_MAX_REG_SIZE]; - __u8 padbits; - int i; - - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - - if ((padbits = sbits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - sbytes++; - } - tbytes = asicp->ireg_length / BITS_PER_BYTE; - if (asicp->ireg_length % BITS_PER_BYTE) - tbytes++; - CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n", - tbytes, sbytes, padbits)); - cat_build_header(trailer, tbytes, 1, asicp->ireg_length); - - for (i = tbytes - 1; i >= 0; i--) { - outb(trailer[i], CAT_DATA); - string[sbytes + i] = inb(CAT_DATA); - } - - for (i = sbytes - 1; i >= 0; i--) { - outb(0xaa, CAT_DATA); - string[i] = inb(CAT_DATA); - } - *value = 0; - cat_unpack(string, - padbits + (tbytes * BITS_PER_BYTE) + - asicp->asic_location, value, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("value=0x%x, string: ", *value); - for (i = 0; i < tbytes + sbytes; i++) - printk("0x%x ", string[i]); - printk("\n"); -#endif - - /* sanity check the rest of the return */ - for (i = 0; i < tbytes; i++) { - __u8 input = 0; - - cat_unpack(string, padbits + (i * BITS_PER_BYTE), - &input, BITS_PER_BYTE); - if (trailer[i] != input) { - CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i])); - return 1; - } - } - CDEBUG(("cat_getdata DONE\n")); - return 0; - } -} - -static int -cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits) -{ - int i; - - for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--) - outb(data[i], CAT_DATA); - - for (i = header_bytes - 1; i >= 0; i--) { - __u8 header = 0; - __u8 input; - - outb(data[i], CAT_DATA); - input = inb(CAT_DATA); - CDEBUG(("cat_shiftout: returned 0x%x\n", input)); - cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits, - &header, BITS_PER_BYTE); - if (input != header) { - CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header)); - return 1; - } - } - return 0; -} - -static int -cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value) -{ - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n")); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(value, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_senddata: failed to get correct header response to sent data\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) { - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - } - - return 0; - } else { - __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE; - __u16 dbytes = - (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE; - __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], - hseq[VOYAGER_MAX_REG_SIZE]; - int i; - - if ((padbits = (modp->num_asics - 1 - + asicp->ireg_length) % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - dbytes++; - } - if (asicp->ireg_length % BITS_PER_BYTE) - hbytes++; - - cat_build_header(hseq, hbytes, 1, asicp->ireg_length); - - for (i = 0; i < dbytes + hbytes; i++) - dseq[i] = 0xff; - CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n", - dbytes, hbytes, padbits)); - cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length, - hseq, hbytes * BITS_PER_BYTE); - cat_pack(dseq, asicp->asic_location, &value, - asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("dseq "); - for (i = 0; i < hbytes + dbytes; i++) { - printk("0x%x ", dseq[i]); - } - printk("\n"); -#endif - return cat_shiftout(dseq, dbytes, hbytes, padbits); - } -} - -static int -cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG)) - return 1; - return cat_senddata(modp, asicp, reg, value); -} - -static int -cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG)) - return 1; - return cat_getdata(modp, asicp, reg, value); -} - -static int -cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len) -{ - __u8 val; - - if (len > 1) { - /* set auto increment */ - __u8 newval; - - if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) { - CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", - val)); - newval = val | VOYAGER_AUTO_INC; - if (newval != val) { - if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) { - CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - } - } - if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n")); - return 1; - } - if (asicp->subaddr > VOYAGER_SUBADDR_LO) { - if (cat_write - (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n")); - return 1; - } - cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, - val)); - } - cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val)); - return 0; -} - -static int -cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - /* FIXME: need special actions for VOYAGER_CAT_ID here */ - if (asicp->asic_id == VOYAGER_CAT_ID) { - CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n")); - /* FIXME -- This is supposed to be handled better - * There is a problem writing to the cat asic in the - * PSI. The 30us delay seems to work, though */ - udelay(30); - } - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - printk("cat_subwrite: cat_subaddrsetup FAILED\n"); - return retval; - } - - if (cat_sendinst - (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) { - printk("cat_subwrite: cat_sendinst FAILED\n"); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) { - printk - ("cat_subwrite: cat_sendata element at %d FAILED\n", - i); - return 1; - } - } - return 0; -} -static int -cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n")); - return retval; - } - - if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) { - CDEBUG(("cat_subread: cat_sendinst failed\n")); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) { - CDEBUG(("cat_subread: cat_getdata element %d failed\n", - i)); - return 1; - } - } - return 0; -} - -/* buffer for storing EPROM data read in during initialisation */ -static __initdata __u8 eprom_buf[0xFFFF]; -static voyager_module_t *voyager_initial_module; - -/* Initialise the cat bus components. We assume this is called by the - * boot cpu *after* all memory initialisation has been done (so we can - * use kmalloc) but before smp initialisation, so we can probe the SMP - * configuration and pick up necessary information. */ -void __init voyager_cat_init(void) -{ - voyager_module_t **modpp = &voyager_initial_module; - voyager_asic_t **asicpp; - voyager_asic_t *qabc_asic = NULL; - int i, j; - unsigned long qic_addr = 0; - __u8 qabc_data[0x20]; - __u8 num_submodules, val; - voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0]; - - __u8 cmos[4]; - unsigned long addr; - - /* initiallise the SUS mailbox */ - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i); - addr = *(unsigned long *)cmos; - if ((addr & 0xff000000) != 0xff000000) { - printk(KERN_ERR - "Voyager failed to get SUS mailbox (addr = 0x%lx\n", - addr); - } else { - static struct resource res; - - res.name = "voyager SUS"; - res.start = addr; - res.end = addr + 0x3ff; - - request_resource(&iomem_resource, &res); - voyager_SUS = (struct voyager_SUS *) - ioremap(addr, 0x400); - printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n", - voyager_SUS->SUS_version); - voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION; - voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT; - } - - /* clear the processor counts */ - voyager_extended_vic_processors = 0; - voyager_quad_processors = 0; - - printk("VOYAGER: beginning CAT bus probe\n"); - /* set up the SuperSet Port Block which tells us where the - * CAT communication port is */ - sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100; - VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb)); - - /* now find out if were 8 slot or normal */ - if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER) - == EIGHT_SLOT_IDENTIFIER) { - voyager_8slot = 1; - printk(KERN_NOTICE - "Voyager: Eight slot 51xx configuration detected\n"); - } - - for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) { - __u8 input; - int asic; - __u16 eprom_size; - __u16 sp_offset; - - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(i, VOYAGER_CAT_CONFIG_PORT); - - /* check the presence of the module */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - /* stream series of alternating 1's and 0's to stimulate - * response */ - outb(0xAA, CAT_DATA); - input = inb(CAT_DATA); - outb(VOYAGER_CAT_END, CAT_CMD); - if (input != VOYAGER_CAT_HEADER) { - continue; - } - CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i, - cat_module_name(i))); - *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */ - if (*modpp == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset(*modpp, 0, sizeof(voyager_module_t)); - /* need temporary asic for cat_subread. It will be - * filled in correctly later */ - (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */ - if ((*modpp)->asic == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset((*modpp)->asic, 0, sizeof(voyager_asic_t)); - (*modpp)->asic->asic_id = VOYAGER_CAT_ID; - (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI; - (*modpp)->module_addr = i; - (*modpp)->scan_path_connected = 0; - if (i == VOYAGER_PSI) { - /* Exception leg for modules with no EEPROM */ - printk("Module \"%s\"\n", cat_module_name(i)); - continue; - } - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n", - cat_module_name(i), eprom_hdr->version_id, - *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics); - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* All we really care about are the Quad cards. We - * identify them because they are in a processor slot - * and have only four asics */ - if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) { - modpp = &((*modpp)->next); - continue; - } - /* Now we know it's in a processor slot, does it have - * a quad baseboard submodule */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT, - &num_submodules); - /* lowest two bits, active low */ - num_submodules = ~(0xfc | num_submodules); - CDEBUG(("VOYAGER CAT: %d submodules present\n", - num_submodules)); - if (num_submodules == 0) { - /* fill in the dyadic extended processors */ - __u8 cpu = i & 0x07; - - printk("Module \"%s\": Dyadic Processor Card\n", - cat_module_name(i)); - voyager_extended_vic_processors |= (1 << cpu); - cpu += 4; - voyager_extended_vic_processors |= (1 << cpu); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - - /* now we want to read the asics on the first submodule, - * which should be the quad base board */ - - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val); - CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val)); - val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD; - cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val); - - outb(VOYAGER_CAT_END, CAT_CMD); - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - /* Now do everything for the QBB submodule 1 */ - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* get rid of the dummy CAT asic and read the real one */ - kfree((*modpp)->asic); - for (asic = 0; asic < (*modpp)->num_asics; asic++) { - int j; - voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */ - voyager_sp_table_t *sp_table; - voyager_at_t *asic_table; - voyager_jtt_t *jtag_table; - - if (asicp == NULL) { - printk - ("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - asicpp = &(asicp->next); - asicp->asic_location = asic; - sp_table = - (voyager_sp_table_t *) (eprom_buf + sp_offset); - asicp->asic_id = sp_table->asic_id; - asic_table = - (voyager_at_t *) (eprom_buf + - sp_table->asic_data_offset); - for (j = 0; j < 4; j++) - asicp->jtag_id[j] = asic_table->jtag_id[j]; - jtag_table = - (voyager_jtt_t *) (eprom_buf + - asic_table->jtag_offset); - asicp->ireg_length = jtag_table->ireg_len; - asicp->bit_location = (*modpp)->inst_bits; - (*modpp)->inst_bits += asicp->ireg_length; - if (asicp->ireg_length > (*modpp)->largest_reg) - (*modpp)->largest_reg = asicp->ireg_length; - if (asicp->ireg_length < (*modpp)->smallest_reg || - (*modpp)->smallest_reg == 0) - (*modpp)->smallest_reg = asicp->ireg_length; - CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n", - asicp->asic_id, asicp->ireg_length, - asicp->bit_location)); - if (asicp->asic_id == VOYAGER_QUAD_QABC) { - CDEBUG(("VOYAGER CAT: QABC ASIC found\n")); - qabc_asic = asicp; - } - sp_offset += sizeof(voyager_sp_table_t); - } - CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg)); - /* OK, now we have the QUAD ASICs set up, use them. - * we need to: - * - * 1. Find the Memory area for the Quad CPIs. - * 2. Find the Extended VIC processor - * 3. Configure a second extended VIC processor (This - * cannot be done for the 51xx. - * */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_connect(*modpp, (*modpp)->asic); - CDEBUG(("CAT CONNECTED!!\n")); - cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data); - qic_addr = qabc_data[5] << 8; - qic_addr = (qic_addr | qabc_data[6]) << 8; - qic_addr = (qic_addr | qabc_data[7]) << 8; - printk - ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n", - cat_module_name(i), qic_addr, qabc_data[8]); -#if 0 /* plumbing fails---FIXME */ - if ((qabc_data[8] & 0xf0) == 0) { - /* FIXME: 32 way 8 CPU slot monster cannot be - * plumbed this way---need to check for it */ - - printk("Plumbing second Extended Quad Processor\n"); - /* second VIC line hardwired to Quad CPU 1 */ - qabc_data[8] |= 0x20; - cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]); -#ifdef VOYAGER_CAT_DEBUG - /* verify plumbing */ - cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]); - if ((qabc_data[8] & 0xf0) == 0) { - CDEBUG(("PLUMBING FAILED: 0x%x\n", - qabc_data[8])); - } -#endif - } -#endif - - { - struct resource *res = - kzalloc(sizeof(struct resource), GFP_KERNEL); - res->name = kmalloc(128, GFP_KERNEL); - sprintf((char *)res->name, "Voyager %s Quad CPI", - cat_module_name(i)); - res->start = qic_addr; - res->end = qic_addr + 0x3ff; - request_resource(&iomem_resource, res); - } - - qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400); - - for (j = 0; j < 4; j++) { - __u8 cpu; - - if (voyager_8slot) { - /* 8 slot has a different mapping, - * each slot has only one vic line, so - * 1 cpu in each slot must be < 8 */ - cpu = (i & 0x07) + j * 8; - } else { - cpu = (i & 0x03) + j * 4; - } - if ((qabc_data[8] & (1 << j))) { - voyager_extended_vic_processors |= (1 << cpu); - } - if (qabc_data[8] & (1 << (j + 4))) { - /* Second SET register plumbed: Quad - * card has two VIC connected CPUs. - * Secondary cannot be booted as a VIC - * CPU */ - voyager_extended_vic_processors |= (1 << cpu); - voyager_allowed_boot_processors &= - (~(1 << cpu)); - } - - voyager_quad_processors |= (1 << cpu); - voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *) - (qic_addr + (j << 8)); - CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu, - (unsigned long)voyager_quad_cpi_addr[cpu])); - } - outb(VOYAGER_CAT_END, CAT_CMD); - - *asicpp = NULL; - modpp = &((*modpp)->next); - } - *modpp = NULL; - printk - ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", - voyager_extended_vic_processors, voyager_quad_processors, - voyager_allowed_boot_processors); - request_resource(&ioport_resource, &vic_res); - if (voyager_quad_processors) - request_resource(&ioport_resource, &qic_res); - /* set up the front power switch */ -} - -int voyager_cat_readb(__u8 module, __u8 asic, int reg) -{ - return 0; -} - -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (!modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_disconnect: ASIC is not CAT\n")); - return 1; - } - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_disconnect: failed to read SCANPATH\n")); - return err; - } - val &= VOYAGER_DISCONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_disconnect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 0; - - return 0; -} - -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_connect: ASIC is not CAT\n")); - return 1; - } - - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_connect: failed to read SCANPATH\n")); - return err; - } - val |= VOYAGER_CONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_connect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 1; - - return 0; -} - -void voyager_cat_power_off(void) -{ - /* Power the machine off by writing to the PSI over the CAT - * bus */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - CDEBUG(("PSI STATUS 0x%x\n", data)); - /* These two writes are power off prep and perform */ - data = PSI_CLEAR; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - data = PSI_POWER_DOWN; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); -} - -struct voyager_status voyager_status = { 0 }; - -void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data) -{ - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - switch (cmd) { - case VOYAGER_PSI_READ: - cat_read(&psi, &psi_asic, reg, data); - break; - case VOYAGER_PSI_WRITE: - cat_write(&psi, &psi_asic, reg, *data); - break; - case VOYAGER_PSI_SUBREAD: - cat_subread(&psi, &psi_asic, reg, 1, data); - break; - case VOYAGER_PSI_SUBWRITE: - cat_subwrite(&psi, &psi_asic, reg, 1, data); - break; - default: - printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd); - break; - } - outb(VOYAGER_CAT_END, CAT_CMD); -} - -void voyager_cat_do_common_interrupt(void) -{ - /* This is caused either by a memory parity error or something - * in the PSI */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - struct voyager_psi psi_reg; - int i; - re_read: - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status. NOTE: Need to read *all* the PSI regs here - * otherwise the cmn int will be reasserted */ - for (i = 0; i < sizeof(psi_reg.regs); i++) { - cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); - if ((psi_reg.regs.checkbit & 0x02) == 0) { - psi_reg.regs.checkbit |= 0x02; - cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit); - printk("VOYAGER RE-READ PSI\n"); - goto re_read; - } - outb(VOYAGER_CAT_RUN, CAT_CMD); - for (i = 0; i < sizeof(psi_reg.subregs); i++) { - /* This looks strange, but the PSI doesn't do auto increment - * correctly */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, - 1, &((__u8 *) & psi_reg.subregs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); -#ifdef VOYAGER_CAT_DEBUG - printk("VOYAGER PSI: "); - for (i = 0; i < sizeof(psi_reg.regs); i++) - printk("%02x ", ((__u8 *) & psi_reg.regs)[i]); - printk("\n "); - for (i = 0; i < sizeof(psi_reg.subregs); i++) - printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]); - printk("\n"); -#endif - if (psi_reg.regs.intstatus & PSI_MON) { - /* switch off or power fail */ - - if (psi_reg.subregs.supply & PSI_SWITCH_OFF) { - if (voyager_status.switch_off) { - printk(KERN_ERR - "Voyager front panel switch turned off again---Immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else { - printk(KERN_ERR - "Voyager front panel switch turned off\n"); - voyager_status.switch_off = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - /* Tell the hardware we're taking care of the - * shutdown, otherwise it will power the box off - * within 3 seconds of the switch being pressed and, - * which is much more important to us, continue to - * assert the common interrupt */ - data = PSI_CLR_SWITCH_OFF; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - } else { - - VDEBUG(("Voyager ac fail reg 0x%x\n", - psi_reg.subregs.ACfail)); - if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) { - /* No further update */ - return; - } -#if 0 - /* Don't bother trying to find out who failed. - * FIXME: This probably makes the code incorrect on - * anything other than a 345x */ - for (i = 0; i < 5; i++) { - if (psi_reg.subregs.ACfail & (1 << i)) { - break; - } - } - printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i); -#endif - /* DON'T do this: it shuts down the AC PSI - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_MASK_MASK | i; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - */ - printk(KERN_ERR "Voyager AC power failure\n"); - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_COLD_START; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - voyager_status.power_fail = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - - } else if (psi_reg.regs.intstatus & PSI_FAULT) { - /* Major fault! */ - printk(KERN_ERR - "Voyager PSI Detected major fault, immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM - | PSI_CURRENT | PSI_DVM - | PSI_PSCFAULT | PSI_STAT_CHG)) { - /* other psi fault */ - - printk(KERN_WARNING "Voyager PSI status 0x%x\n", data); - /* clear the PSI fault */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0); - outb(VOYAGER_CAT_END, CAT_CMD); - } -} diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c deleted file mode 100644 index 98e3c2bc7563..000000000000 --- a/arch/x86/mach-voyager/voyager_smp.c +++ /dev/null @@ -1,1805 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file provides all the same external entries as smp.c but uses - * the voyager hal to provide the functionality - */ -#include <linux/cpu.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/kernel_stat.h> -#include <linux/delay.h> -#include <linux/mc146818rtc.h> -#include <linux/cache.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/bootmem.h> -#include <linux/completion.h> -#include <asm/desc.h> -#include <asm/voyager.h> -#include <asm/vic.h> -#include <asm/mtrr.h> -#include <asm/pgalloc.h> -#include <asm/tlbflush.h> -#include <asm/arch_hooks.h> -#include <asm/trampoline.h> - -/* TLB state -- visible externally, indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 }; - -/* CPU IRQ affinity -- set to all ones initially */ -static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = - {[0 ... NR_CPUS-1] = ~0UL }; - -/* per CPU data structure (for /proc/cpuinfo et al), visible externally - * indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - -/* physical ID of the CPU used to boot the system */ -unsigned char boot_cpu_id; - -/* The memory line addresses for the Quad CPIs */ -struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned; - -/* The masks for the Extended VIC processors, filled in by cat_init */ -__u32 voyager_extended_vic_processors = 0; - -/* Masks for the extended Quad processors which cannot be VIC booted */ -__u32 voyager_allowed_boot_processors = 0; - -/* The mask for the Quad Processors (both extended and non-extended) */ -__u32 voyager_quad_processors = 0; - -/* Total count of live CPUs, used in process.c to display - * the CPU information and in irq.c for the per CPU irq - * activity count. Finally exported by i386_ksyms.c */ -static int voyager_extended_cpus = 1; - -/* Used for the invalidate map that's also checked in the spinlock */ -static volatile unsigned long smp_invalidate_needed; - -/* Bitmask of CPUs present in the system - exported by i386_syms.c, used - * by scheduler but indexed physically */ -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; - -/* The internal functions */ -static void send_CPI(__u32 cpuset, __u8 cpi); -static void ack_CPI(__u8 cpi); -static int ack_QIC_CPI(__u8 cpi); -static void ack_special_QIC_CPI(__u8 cpi); -static void ack_VIC_CPI(__u8 cpi); -static void send_CPI_allbutself(__u8 cpi); -static void mask_vic_irq(unsigned int irq); -static void unmask_vic_irq(unsigned int irq); -static unsigned int startup_vic_irq(unsigned int irq); -static void enable_local_vic_irq(unsigned int irq); -static void disable_local_vic_irq(unsigned int irq); -static void before_handle_vic_irq(unsigned int irq); -static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask); -static void ack_vic_irq(unsigned int irq); -static void vic_enable_cpi(void); -static void do_boot_cpu(__u8 cpuid); -static void do_quad_bootstrap(void); -static void initialize_secondary(void); - -int hard_smp_processor_id(void); -int safe_smp_processor_id(void); - -/* Inline functions */ -static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi) -{ - voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi = - (smp_processor_id() << 16) + cpi; -} - -static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) { -#ifdef VOYAGER_DEBUG - if (!cpu_online(cpu)) - VDEBUG(("CPU%d sending cpi %d to CPU%d not in " - "cpu_online_map\n", - hard_smp_processor_id(), cpi, cpu)); -#endif - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - } - } -} - -static inline void wrapper_smp_local_timer_interrupt(void) -{ - irq_enter(); - smp_local_timer_interrupt(); - irq_exit(); -} - -static inline void send_one_CPI(__u8 cpu, __u8 cpi) -{ - if (voyager_quad_processors & (1 << cpu)) - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - else - send_CPI(1 << cpu, cpi); -} - -static inline void send_CPI_allbutself(__u8 cpi) -{ - __u8 cpu = smp_processor_id(); - __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu); - send_CPI(mask, cpi); -} - -static inline int is_cpu_quad(void) -{ - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER); -} - -static inline int is_cpu_extended(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors & (1 << cpu)); -} - -static inline int is_cpu_vic_boot(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors - & voyager_allowed_boot_processors & (1 << cpu)); -} - -static inline void ack_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CPU_BOOT_CPI: - if (is_cpu_quad() && !is_cpu_vic_boot()) - ack_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - case VIC_SYS_INT: - case VIC_CMN_INT: - /* These are slightly strange. Even on the Quad card, - * They are vectored as VIC CPIs */ - if (is_cpu_quad()) - ack_special_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - default: - printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi); - break; - } -} - -/* local variables */ - -/* The VIC IRQ descriptors -- these look almost identical to the - * 8259 IRQs except that masks and things must be kept per processor - */ -static struct irq_chip vic_chip = { - .name = "VIC", - .startup = startup_vic_irq, - .mask = mask_vic_irq, - .unmask = unmask_vic_irq, - .set_affinity = set_vic_irq_affinity, -}; - -/* used to count up as CPUs are brought on line (starts at 0) */ -static int cpucount = 0; - -/* The per cpu profile stuff - used in smp_local_timer_interrupt */ -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - -/* the map used to check if a CPU has booted */ -static __u32 cpu_booted_map; - -/* the synchronize flag used to hold all secondary CPUs spinning in - * a tight loop until the boot sequence is ready for them */ -static cpumask_t smp_commenced_mask = CPU_MASK_NONE; - -/* This is for the new dynamic CPU boot code */ - -/* The per processor IRQ masks (these are usually kept in sync) */ -static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; - -/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */ -static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Lock for enable/disable of VIC interrupts */ -static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock); - -/* The boot processor is correctly set up in PC mode when it - * comes up, but the secondaries need their master/slave 8259 - * pairs initializing correctly */ - -/* Interrupt counters (per cpu) and total - used to try to - * even up the interrupt handling routines */ -static long vic_intr_total = 0; -static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 }; -static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Since we can only use CPI0, we fake all the other CPIs */ -static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned; - -/* debugging routine to read the isr of the cpu's pic */ -static inline __u16 vic_read_isr(void) -{ - __u16 isr; - - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - - return isr; -} - -static __init void qic_setup(void) -{ - if (!is_cpu_quad()) { - /* not a quad, no setup */ - return; - } - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - - if (is_cpu_extended()) { - /* the QIC duplicate of the VIC base register */ - outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER); - outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER); - - /* FIXME: should set up the QIC timer and memory parity - * error vectors here */ - } -} - -static __init void vic_setup_pic(void) -{ - outb(1, VIC_REDIRECT_REGISTER_1); - /* clear the claim registers for dynamic routing */ - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - outb(0, VIC_PRIORITY_REGISTER); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - /* Now initiallise the master PIC belonging to this CPU by - * sending the four ICWs */ - - /* ICW1: level triggered, ICW4 needed */ - outb(0x19, 0x20); - - /* ICW2: vector base */ - outb(FIRST_EXTERNAL_VECTOR, 0x21); - - /* ICW3: slave at line 2 */ - outb(0x04, 0x21); - - /* ICW4: 8086 mode */ - outb(0x01, 0x21); - - /* now the same for the slave PIC */ - - /* ICW1: level trigger, ICW4 needed */ - outb(0x19, 0xA0); - - /* ICW2: slave vector base */ - outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1); - - /* ICW3: slave ID */ - outb(0x02, 0xA1); - - /* ICW4: 8086 mode */ - outb(0x01, 0xA1); -} - -static void do_quad_bootstrap(void) -{ - if (is_cpu_quad() && is_cpu_vic_boot()) { - int i; - unsigned long flags; - __u8 cpuid = hard_smp_processor_id(); - - local_irq_save(flags); - - for (i = 0; i < 4; i++) { - /* FIXME: this would be >>3 &0x7 on the 32 way */ - if (((cpuid >> 2) & 0x03) == i) - /* don't lower our own mask! */ - continue; - - /* masquerade as local Quad CPU */ - outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID); - /* enable the startup CPI */ - outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1); - /* restore cpu id */ - outb(0, QIC_PROCESSOR_ID); - } - local_irq_restore(flags); - } -} - -void prefill_possible_map(void) -{ - /* This is empty on voyager because we need a much - * earlier detection which is done in find_smp_config */ -} - -/* Set up all the basic stuff: read the SMP config and make all the - * SMP information reflect only the boot cpu. All others will be - * brought on-line later. */ -void __init find_smp_config(void) -{ - int i; - - boot_cpu_id = hard_smp_processor_id(); - - printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); - - /* initialize the CPU structures (moved from smp_boot_cpus) */ - for (i = 0; i < nr_cpu_ids; i++) - cpu_irq_affinity[i] = ~0; - cpu_online_map = cpumask_of_cpu(boot_cpu_id); - - /* The boot CPU must be extended */ - voyager_extended_vic_processors = 1 << boot_cpu_id; - /* initially, all of the first 8 CPUs can boot */ - voyager_allowed_boot_processors = 0xff; - /* set up everything for just this CPU, we can alter - * this as we start the other CPUs later */ - /* now get the CPU disposition from the extended CMOS */ - cpus_addr(phys_cpu_present_map)[0] = - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK); - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 2) << 16; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 3) << 24; - init_cpu_possible(&phys_cpu_present_map); - printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", - cpus_addr(phys_cpu_present_map)[0]); - /* Here we set up the VIC to enable SMP */ - /* enable the CPIs by writing the base vector to their register */ - outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER); - outb(1, VIC_REDIRECT_REGISTER_1); - /* set the claim registers for static routing --- Boot CPU gets - * all interrupts untill all other CPUs started */ - outb(0xff, VIC_CLAIM_REGISTER_0); - outb(0xff, VIC_CLAIM_REGISTER_1); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - - /* Finally tell the firmware that we're driving */ - outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG, - VOYAGER_SUS_IN_CONTROL_PORT); - - current_thread_info()->cpu = boot_cpu_id; - percpu_write(cpu_number, boot_cpu_id); -} - -/* - * The bootstrap kernel entry code has set these up. Save them - * for a given CPU, id is physical */ -void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - - identify_secondary_cpu(c); -} - -/* Routine initially called when a non-boot CPU is brought online */ -static void __init start_secondary(void *unused) -{ - __u8 cpuid = hard_smp_processor_id(); - - cpu_init(); - - /* OK, we're in the routine */ - ack_CPI(VIC_CPU_BOOT_CPI); - - /* setup the 8259 master slave pair belonging to this CPU --- - * we won't actually receive any until the boot CPU - * relinquishes it's static routing mask */ - vic_setup_pic(); - - qic_setup(); - - if (is_cpu_quad() && !is_cpu_vic_boot()) { - /* clear the boot CPI */ - __u8 dummy; - - dummy = - voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi; - printk("read dummy %d\n", dummy); - } - - /* lower the mask to receive CPIs */ - vic_enable_cpi(); - - VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); - - notify_cpu_starting(cpuid); - - /* enable interrupts */ - local_irq_enable(); - - /* get our bogomips */ - calibrate_delay(); - - /* save our processor parameters */ - smp_store_cpu_info(cpuid); - - /* if we're a quad, we may need to bootstrap other CPUs */ - do_quad_bootstrap(); - - /* FIXME: this is rather a poor hack to prevent the CPU - * activating softirqs while it's supposed to be waiting for - * permission to proceed. Without this, the new per CPU stuff - * in the softirqs will fail */ - local_irq_disable(); - cpu_set(cpuid, cpu_callin_map); - - /* signal that we're done */ - cpu_booted_map = 1; - - while (!cpu_isset(cpuid, smp_commenced_mask)) - rep_nop(); - local_irq_enable(); - - local_flush_tlb(); - - cpu_set(cpuid, cpu_online_map); - wmb(); - cpu_idle(); -} - -/* Routine to kick start the given CPU and wait for it to report ready - * (or timeout in startup). When this routine returns, the requested - * CPU is either fully running and configured or known to be dead. - * - * We call this routine sequentially 1 CPU at a time, so no need for - * locking */ - -static void __init do_boot_cpu(__u8 cpu) -{ - struct task_struct *idle; - int timeout; - unsigned long flags; - int quad_boot = (1 << cpu) & voyager_quad_processors - & ~(voyager_extended_vic_processors - & voyager_allowed_boot_processors); - - /* This is the format of the CPI IDT gate (in real mode) which - * we're hijacking to boot the CPU */ - union IDTFormat { - struct seg { - __u16 Offset; - __u16 Segment; - } idt; - __u32 val; - } hijack_source; - - __u32 *hijack_vector; - __u32 start_phys_address = setup_trampoline(); - - /* There's a clever trick to this: The linux trampoline is - * compiled to begin at absolute location zero, so make the - * address zero but have the data segment selector compensate - * for the actual address */ - hijack_source.idt.Offset = start_phys_address & 0x000F; - hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF; - - cpucount++; - alternatives_smp_switch(1); - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU%d", cpu); - idle->thread.ip = (unsigned long)start_secondary; - /* init_tasks (in sched.c) is indexed logically */ - stack_start.sp = (void *)idle->thread.sp; - - per_cpu(current_task, cpu) = idle; - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - irq_ctx_init(cpu); - - /* Note: Don't modify initial ss override */ - VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, - (unsigned long)hijack_source.val, hijack_source.idt.Segment, - hijack_source.idt.Offset, stack_start.sp)); - - /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); - flush_tlb_all(); - - if (quad_boot) { - printk("CPU %d: non extended Quad boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } else { - printk("CPU%d: extended VIC boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - /* VIC errata, may also receive interrupt at this address */ - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + - VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } - /* All non-boot CPUs start with interrupts fully masked. Need - * to lower the mask of the CPI we're about to send. We do - * this in the VIC by masquerading as the processor we're - * about to boot and lowering its interrupt mask */ - local_irq_save(flags); - if (quad_boot) { - send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI); - } else { - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - /* here we're altering registers belonging to `cpu' */ - - outb(VIC_BOOT_INTERRUPT_MASK, 0x21); - /* now go back to our original identity */ - outb(boot_cpu_id, VIC_PROCESSOR_ID); - - /* and boot the CPU */ - - send_CPI((1 << cpu), VIC_CPU_BOOT_CPI); - } - cpu_booted_map = 0; - local_irq_restore(flags); - - /* now wait for it to become ready (or timeout) */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_booted_map) - break; - udelay(100); - } - /* reset the page table */ - zap_low_mappings(); - - if (cpu_booted_map) { - VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n", - cpu, smp_processor_id())); - - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - wmb(); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_present_map); - } else { - printk("CPU%d FAILED TO BOOT: ", cpu); - if (* - ((volatile unsigned char *)phys_to_virt(start_phys_address)) - == 0xA5) - printk("Stuck.\n"); - else - printk("Not responding.\n"); - - cpucount--; - } -} - -void __init smp_boot_cpus(void) -{ - int i; - - /* CAT BUS initialisation must be done after the memory */ - /* FIXME: The L4 has a catbus too, it just needs to be - * accessed in a totally different way */ - if (voyager_level == 5) { - voyager_cat_init(); - - /* now that the cat has probed the Voyager System Bus, sanity - * check the cpu map */ - if (((voyager_quad_processors | voyager_extended_vic_processors) - & cpus_addr(phys_cpu_present_map)[0]) != - cpus_addr(phys_cpu_present_map)[0]) { - /* should panic */ - printk("\n\n***WARNING*** " - "Sanity check of CPU present map FAILED\n"); - } - } else if (voyager_level == 4) - voyager_extended_vic_processors = - cpus_addr(phys_cpu_present_map)[0]; - - /* this sets up the idle task to run on the current cpu */ - voyager_extended_cpus = 1; - /* Remove the global_irq_holder setting, it triggers a BUG() on - * schedule at the moment */ - //global_irq_holder = boot_cpu_id; - - /* FIXME: Need to do something about this but currently only works - * on CPUs with a tsc which none of mine have. - smp_tune_scheduling(); - */ - smp_store_cpu_info(boot_cpu_id); - /* setup the jump vector */ - initial_code = (unsigned long)initialize_secondary; - printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data(boot_cpu_id)); - - if (is_cpu_quad()) { - /* booting on a Quad CPU */ - printk("VOYAGER SMP: Boot CPU is Quad\n"); - qic_setup(); - do_quad_bootstrap(); - } - - /* enable our own CPIs */ - vic_enable_cpi(); - - cpu_set(boot_cpu_id, cpu_online_map); - cpu_set(boot_cpu_id, cpu_callout_map); - - /* loop over all the extended VIC CPUs and boot them. The - * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { - if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) - continue; - do_boot_cpu(i); - /* This udelay seems to be needed for the Quad boots - * don't remove unless you know what you're doing */ - udelay(1000); - } - /* we could compute the total bogomips here, but why bother?, - * Code added from smpboot.c */ - { - unsigned long bogosum = 0; - - for_each_online_cpu(i) - bogosum += cpu_data(i).loops_per_jiffy; - printk(KERN_INFO "Total of %d processors activated " - "(%lu.%02lu BogoMIPS).\n", - cpucount + 1, bogosum / (500000 / HZ), - (bogosum / (5000 / HZ)) % 100); - } - voyager_extended_cpus = hweight32(voyager_extended_vic_processors); - printk("VOYAGER: Extended (interrupt handling CPUs): " - "%d, non-extended: %d\n", voyager_extended_cpus, - num_booting_cpus() - voyager_extended_cpus); - /* that's it, switch to symmetric mode */ - outb(0, VIC_PRIORITY_REGISTER); - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus())); -} - -/* Reload the secondary CPUs task structure (this function does not - * return ) */ -static void __init initialize_secondary(void) -{ -#if 0 - // AC kernels only - set_current(hard_get_current()); -#endif - - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile ("movl %0,%%esp\n\t" - "jmp *%1"::"r" (current->thread.sp), - "r"(current->thread.ip)); -} - -/* handle a Voyager SYS_INT -- If we don't, the base board will - * panic the system. - * - * System interrupts occur because some problem was detected on the - * various busses. To find out what you have to probe all the - * hardware via the CAT bus. FIXME: At the moment we do nothing. */ -void smp_vic_sys_interrupt(struct pt_regs *regs) -{ - ack_CPI(VIC_SYS_INT); - printk("Voyager SYSTEM INTERRUPT\n"); -} - -/* Handle a voyager CMN_INT; These interrupts occur either because of - * a system status change or because a single bit memory error - * occurred. FIXME: At the moment, ignore all this. */ -void smp_vic_cmn_interrupt(struct pt_regs *regs) -{ - static __u8 in_cmn_int = 0; - static DEFINE_SPINLOCK(cmn_int_lock); - - /* common ints are broadcast, so make sure we only do this once */ - _raw_spin_lock(&cmn_int_lock); - if (in_cmn_int) - goto unlock_end; - - in_cmn_int++; - _raw_spin_unlock(&cmn_int_lock); - - VDEBUG(("Voyager COMMON INTERRUPT\n")); - - if (voyager_level == 5) - voyager_cat_do_common_interrupt(); - - _raw_spin_lock(&cmn_int_lock); - in_cmn_int = 0; - unlock_end: - _raw_spin_unlock(&cmn_int_lock); - ack_CPI(VIC_CMN_INT); -} - -/* - * Reschedule call back. Nothing to do, all the work is done - * automatically when we return from the interrupt. */ -static void smp_reschedule_interrupt(void) -{ - /* do nothing */ -} - -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -static inline void voyager_leave_mm(unsigned long cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} - -/* - * Invalidate call-back - */ -static void smp_invalidate_interrupt(void) -{ - __u8 cpu = smp_processor_id(); - - if (!test_bit(cpu, &smp_invalidate_needed)) - return; - /* This will flood messages. Don't uncomment unless you see - * Problems with cross cpu invalidation - VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n", - smp_processor_id())); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - voyager_leave_mm(cpu); - } - smp_mb__before_clear_bit(); - clear_bit(cpu, &smp_invalidate_needed); - smp_mb__after_clear_bit(); -} - -/* All the new flush operations for 2.4 */ - -/* This routine is called with a physical cpu mask */ -static void -voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm, - unsigned long va) -{ - int stuck = 50000; - - if (!cpumask) - BUG(); - if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask) - BUG(); - if (cpumask & (1 << smp_processor_id())) - BUG(); - if (!mm) - BUG(); - - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - atomic_set_mask(cpumask, &smp_invalidate_needed); - /* - * We have to send the CPI only to - * CPUs affected. - */ - send_CPI(cpumask, VIC_INVALIDATE_CPI); - - while (smp_invalidate_needed) { - mb(); - if (--stuck == 0) { - printk("***WARNING*** Stuck doing invalidate CPI " - "(CPU%d)\n", smp_processor_id()); - break; - } - } - - /* Uncomment only to debug invalidation problems - VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu)); - */ - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - local_flush_tlb(); - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - voyager_leave_mm(smp_processor_id()); - } - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - voyager_leave_mm(smp_processor_id()); - } - - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} - -EXPORT_SYMBOL(flush_tlb_page); - -/* enable the requested IRQs */ -static void smp_enable_irq_interrupt(void) -{ - __u8 irq; - __u8 cpu = get_cpu(); - - VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu, - vic_irq_enable_mask[cpu])); - - spin_lock(&vic_irq_lock); - for (irq = 0; irq < 16; irq++) { - if (vic_irq_enable_mask[cpu] & (1 << irq)) - enable_local_vic_irq(irq); - } - vic_irq_enable_mask[cpu] = 0; - spin_unlock(&vic_irq_lock); - - put_cpu_no_resched(); -} - -/* - * CPU halt call-back - */ -static void smp_stop_cpu_function(void *dummy) -{ - VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id())); - cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); - for (;;) - halt(); -} - -/* execute a thread on a new CPU. The function to be called must be - * previously set up. This is used to schedule a function for - * execution on all CPUs - set up the function then broadcast a - * function_interrupt CPI to come here on each CPU */ -static void smp_call_function_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -static void smp_call_function_single_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_single_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -/* Sorry about the name. In an APIC based system, the APICs - * themselves are programmed to send a timer interrupt. This is used - * by linux to reschedule the processor. Voyager doesn't have this, - * so we use the system clock to interrupt one processor, which in - * turn, broadcasts a timer CPI to all the others --- we receive that - * CPI here. We don't use this actually for counting so losing - * ticks doesn't matter - * - * FIXME: For those CPUs which actually have a local APIC, we could - * try to use it to trigger this interrupt instead of having to - * broadcast the timer tick. Unfortunately, all my pentium DYADs have - * no local APIC, so I can't do this - * - * This function is currently a placeholder and is unused in the code */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -/* All of the QUAD interrupt GATES */ -void smp_qic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - ack_QIC_CPI(QIC_TIMER_CPI); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -void smp_qic_invalidate_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_INVALIDATE_CPI); - smp_invalidate_interrupt(); -} - -void smp_qic_reschedule_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_RESCHEDULE_CPI); - smp_reschedule_interrupt(); -} - -void smp_qic_enable_irq_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_ENABLE_IRQ_CPI); - smp_enable_irq_interrupt(); -} - -void smp_qic_call_function_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_CPI); - smp_call_function_interrupt(); -} - -void smp_qic_call_function_single_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI); - smp_call_function_single_interrupt(); -} - -void smp_vic_cpi_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - __u8 cpu = smp_processor_id(); - - if (is_cpu_quad()) - ack_QIC_CPI(VIC_CPI_LEVEL0); - else - ack_VIC_CPI(VIC_CPI_LEVEL0); - - if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu])) - wrapper_smp_local_timer_interrupt(); - if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu])) - smp_invalidate_interrupt(); - if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu])) - smp_reschedule_interrupt(); - if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu])) - smp_enable_irq_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_single_interrupt(); - set_irq_regs(old_regs); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) - voyager_leave_mm(cpu); -} - -/* flush the TLB of every active CPU in the system */ -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, 0, 1); -} - -/* send a reschedule CPI to one CPU by physical CPU number*/ -static void voyager_smp_send_reschedule(int cpu) -{ - send_one_CPI(cpu, VIC_RESCHEDULE_CPI); -} - -int hard_smp_processor_id(void) -{ - __u8 i; - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER) - return cpumask & 0x1F; - - for (i = 0; i < 8; i++) { - if (cpumask & (1 << i)) - return i; - } - printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask); - return 0; -} - -int safe_smp_processor_id(void) -{ - return hard_smp_processor_id(); -} - -/* broadcast a halt to all other CPUs */ -static void voyager_smp_send_stop(void) -{ - smp_call_function(smp_stop_cpu_function, NULL, 1); -} - -/* this function is triggered in time.c when a clock tick fires - * we need to re-broadcast the tick to all CPUs */ -void smp_vic_timer_interrupt(void) -{ - send_CPI_allbutself(VIC_TIMER_CPI); - smp_local_timer_interrupt(); -} - -/* local (per CPU) timer interrupt. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ -void smp_local_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - long weight; - - profile_tick(CPU_PROFILING); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - /* FIXME: need to update the vic timer tick here */ - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - - update_process_times(user_mode_vm(get_irq_regs())); - } - - if (((1 << cpu) & voyager_extended_vic_processors) == 0) - /* only extended VIC processors participate in - * interrupt distribution */ - return; - - /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ - - if ((++vic_tick[cpu] & 0x7) != 0) - return; - /* get here every 16 ticks (about every 1/6 of a second) */ - - /* Change our priority to give someone else a chance at getting - * the IRQ. The algorithm goes like this: - * - * In the VIC, the dynamically routed interrupt is always - * handled by the lowest priority eligible (i.e. receiving - * interrupts) CPU. If >1 eligible CPUs are equal lowest, the - * lowest processor number gets it. - * - * The priority of a CPU is controlled by a special per-CPU - * VIC priority register which is 3 bits wide 0 being lowest - * and 7 highest priority.. - * - * Therefore we subtract the average number of interrupts from - * the number we've fielded. If this number is negative, we - * lower the activity count and if it is positive, we raise - * it. - * - * I'm afraid this still leads to odd looking interrupt counts: - * the totals are all roughly equal, but the individual ones - * look rather skewed. - * - * FIXME: This algorithm is total crap when mixed with SMP - * affinity code since we now try to even up the interrupt - * counts when an affinity binding is keeping them on a - * particular CPU*/ - weight = (vic_intr_count[cpu] * voyager_extended_cpus - - vic_intr_total) >> 4; - weight += 4; - if (weight > 7) - weight = 7; - if (weight < 0) - weight = 0; - - outb((__u8) weight, VIC_PRIORITY_REGISTER); - -#ifdef VOYAGER_DEBUG - if ((vic_tick[cpu] & 0xFFF) == 0) { - /* print this message roughly every 25 secs */ - printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n", - cpu, vic_tick[cpu], weight); - } -#endif -} - -/* setup the profiling timer */ -int setup_profiling_timer(unsigned int multiplier) -{ - int i; - - if ((!multiplier)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. - */ - for (i = 0; i < nr_cpu_ids; ++i) - per_cpu(prof_multiplier, i) = multiplier; - - return 0; -} - -/* This is a bit of a mess, but forced on us by the genirq changes - * there's no genirq handler that really does what voyager wants - * so hack it up with the simple IRQ handler */ -static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) -{ - before_handle_vic_irq(irq); - handle_simple_irq(irq, desc); - after_handle_vic_irq(irq); -} - -/* The CPIs are handled in the per cpu 8259s, so they must be - * enabled to be received: FIX: enabling the CPIs in the early - * boot sequence interferes with bug checking; enable them later - * on in smp_init */ -#define VIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector)) -#define QIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) - -void __init voyager_smp_intr_init(void) -{ - int i; - - /* initialize the per cpu irq mask to all disabled */ - for (i = 0; i < nr_cpu_ids; i++) - vic_irq_mask[i] = 0xFFFF; - - VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); - - VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt); - VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt); - - QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt); - QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt); - QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt); - QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt); - QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt); - - /* now put the VIC descriptor into the first 48 IRQs - * - * This is for later: first 16 correspond to PC IRQs; next 16 - * are Primary MC IRQs and final 16 are Secondary MC IRQs */ - for (i = 0; i < 48; i++) - set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq); -} - -/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per - * processor to receive CPI */ -static void send_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - __u32 quad_cpuset = (cpuset & voyager_quad_processors); - - if (cpi < VIC_START_FAKE_CPI) { - /* fake CPI are only used for booting, so send to the - * extended quads as well---Quads must be VIC booted */ - outb((__u8) (cpuset), VIC_CPI_Registers[cpi]); - return; - } - if (quad_cpuset) - send_QIC_CPI(quad_cpuset, cpi); - cpuset &= ~quad_cpuset; - cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */ - if (cpuset == 0) - return; - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) - set_bit(cpi, &vic_cpi_mailbox[cpu]); - } - if (cpuset) - outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]); -} - -/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and - * set the cache line to shared by reading it. - * - * DON'T make this inline otherwise the cache line read will be - * optimised away - * */ -static int ack_QIC_CPI(__u8 cpi) -{ - __u8 cpu = hard_smp_processor_id(); - - cpi &= 7; - - outb(1 << cpi, QIC_INTERRUPT_CLEAR1); - return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi; -} - -static void ack_special_QIC_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CMN_INT: - outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0); - break; - case VIC_SYS_INT: - outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0); - break; - } - /* also clear at the VIC, just in case (nop for non-extended proc) */ - ack_VIC_CPI(cpi); -} - -/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */ -static void ack_VIC_CPI(__u8 cpi) -{ -#ifdef VOYAGER_DEBUG - unsigned long flags; - __u16 isr; - __u8 cpu = smp_processor_id(); - - local_irq_save(flags); - isr = vic_read_isr(); - if ((isr & (1 << (cpi & 7))) == 0) { - printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi); - } -#endif - /* send specific EOI; the two system interrupts have - * bit 4 set for a separate vector but behave as the - * corresponding 3 bit intr */ - outb_p(0x60 | (cpi & 7), 0x20); - -#ifdef VOYAGER_DEBUG - if ((vic_read_isr() & (1 << (cpi & 7))) != 0) { - printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi); - } - local_irq_restore(flags); -#endif -} - -/* cribbed with thanks from irq.c */ -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu])) -#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu])) - -static unsigned int startup_vic_irq(unsigned int irq) -{ - unmask_vic_irq(irq); - - return 0; -} - -/* The enable and disable routines. This is where we run into - * conflicting architectural philosophy. Fundamentally, the voyager - * architecture does not expect to have to disable interrupts globally - * (the IRQ controllers belong to each CPU). The processor masquerade - * which is used to start the system shouldn't be used in a running OS - * since it will cause great confusion if two separate CPUs drive to - * the same IRQ controller (I know, I've tried it). - * - * The solution is a variant on the NCR lazy SPL design: - * - * 1) To disable an interrupt, do nothing (other than set the - * IRQ_DISABLED flag). This dares the interrupt actually to arrive. - * - * 2) If the interrupt dares to come in, raise the local mask against - * it (this will result in all the CPU masks being raised - * eventually). - * - * 3) To enable the interrupt, lower the mask on the local CPU and - * broadcast an Interrupt enable CPI which causes all other CPUs to - * adjust their masks accordingly. */ - -static void unmask_vic_irq(unsigned int irq) -{ - /* linux doesn't to processor-irq affinity, so enable on - * all CPUs we know about */ - int cpu = smp_processor_id(), real_cpu; - __u16 mask = (1 << irq); - __u32 processorList = 0; - unsigned long flags; - - VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n", - irq, cpu, cpu_irq_affinity[cpu])); - spin_lock_irqsave(&vic_irq_lock, flags); - for_each_online_cpu(real_cpu) { - if (!(voyager_extended_vic_processors & (1 << real_cpu))) - continue; - if (!(cpu_irq_affinity[real_cpu] & mask)) { - /* irq has no affinity for this CPU, ignore */ - continue; - } - if (real_cpu == cpu) { - enable_local_vic_irq(irq); - } else if (vic_irq_mask[real_cpu] & mask) { - vic_irq_enable_mask[real_cpu] |= mask; - processorList |= (1 << real_cpu); - } - } - spin_unlock_irqrestore(&vic_irq_lock, flags); - if (processorList) - send_CPI(processorList, VIC_ENABLE_IRQ_CPI); -} - -static void mask_vic_irq(unsigned int irq) -{ - /* lazy disable, do nothing */ -} - -static void enable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = ~(1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - vic_irq_mask[cpu] &= mask; - if (vic_irq_mask[cpu] == old_mask) - return; - - VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -static void disable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = (1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - if (irq == 7) - return; - - vic_irq_mask[cpu] |= mask; - if (old_mask == vic_irq_mask[cpu]) - return; - - VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -/* The VIC is level triggered, so the ack can only be issued after the - * interrupt completes. However, we do Voyager lazy interrupt - * handling here: It is an extremely expensive operation to mask an - * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If - * this interrupt actually comes in, then we mask and ack here to push - * the interrupt off to another CPU */ -static void before_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - __u8 cpu = smp_processor_id(); - - _raw_spin_lock(&vic_irq_lock); - vic_intr_total++; - vic_intr_count[cpu]++; - - if (!(cpu_irq_affinity[cpu] & (1 << irq))) { - /* The irq is not in our affinity mask, push it off - * onto another CPU */ - VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d " - "on cpu %d\n", irq, cpu)); - disable_local_vic_irq(irq); - /* set IRQ_INPROGRESS to prevent the handler in irq.c from - * actually calling the interrupt routine */ - desc->status |= IRQ_REPLAY | IRQ_INPROGRESS; - } else if (desc->status & IRQ_DISABLED) { - /* Damn, the interrupt actually arrived, do the lazy - * disable thing. The interrupt routine in irq.c will - * not handle a IRQ_DISABLED interrupt, so nothing more - * need be done here */ - VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n", - irq, cpu)); - disable_local_vic_irq(irq); - desc->status |= IRQ_REPLAY; - } else { - desc->status &= ~IRQ_REPLAY; - } - - _raw_spin_unlock(&vic_irq_lock); -} - -/* Finish the VIC interrupt: basically mask */ -static void after_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - - _raw_spin_lock(&vic_irq_lock); - { - unsigned int status = desc->status & ~IRQ_INPROGRESS; -#ifdef VOYAGER_DEBUG - __u16 isr; -#endif - - desc->status = status; - if ((status & IRQ_DISABLED)) - disable_local_vic_irq(irq); -#ifdef VOYAGER_DEBUG - /* DEBUG: before we ack, check what's in progress */ - isr = vic_read_isr(); - if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) { - int i; - __u8 cpu = smp_processor_id(); - __u8 real_cpu; - int mask; /* Um... initialize me??? --RR */ - - printk("VOYAGER SMP: CPU%d lost interrupt %d\n", - cpu, irq); - for_each_possible_cpu(real_cpu, mask) { - - outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu, - VIC_PROCESSOR_ID); - isr = vic_read_isr(); - if (isr & (1 << irq)) { - printk - ("VOYAGER SMP: CPU%d ack irq %d\n", - real_cpu, irq); - ack_vic_irq(irq); - } - outb(cpu, VIC_PROCESSOR_ID); - } - } -#endif /* VOYAGER_DEBUG */ - /* as soon as we ack, the interrupt is eligible for - * receipt by another CPU so everything must be in - * order here */ - ack_vic_irq(irq); - if (status & IRQ_REPLAY) { - /* replay is set if we disable the interrupt - * in the before_handle_vic_irq() routine, so - * clear the in progress bit here to allow the - * next CPU to handle this correctly */ - desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS); - } -#ifdef VOYAGER_DEBUG - isr = vic_read_isr(); - if ((isr & (1 << irq)) != 0) - printk("VOYAGER SMP: after_handle_vic_irq() after " - "ack irq=%d, isr=0x%x\n", irq, isr); -#endif /* VOYAGER_DEBUG */ - } - _raw_spin_unlock(&vic_irq_lock); - - /* All code after this point is out of the main path - the IRQ - * may be intercepted by another CPU if reasserted */ -} - -/* Linux processor - interrupt affinity manipulations. - * - * For each processor, we maintain a 32 bit irq affinity mask. - * Initially it is set to all 1's so every processor accepts every - * interrupt. In this call, we change the processor's affinity mask: - * - * Change from enable to disable: - * - * If the interrupt ever comes in to the processor, we will disable it - * and ack it to push it off to another CPU, so just accept the mask here. - * - * Change from disable to enable: - * - * change the mask and then do an interrupt enable CPI to re-enable on - * the selected processors */ - -void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - /* Only extended processors handle interrupts */ - unsigned long real_mask; - unsigned long irq_mask = 1 << irq; - int cpu; - - real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors; - - if (cpus_addr(*mask)[0] == 0) - /* can't have no CPUs to accept the interrupt -- extremely - * bad things will happen */ - return; - - if (irq == 0) - /* can't change the affinity of the timer IRQ. This - * is due to the constraint in the voyager - * architecture that the CPI also comes in on and IRQ - * line and we have chosen IRQ0 for this. If you - * raise the mask on this interrupt, the processor - * will no-longer be able to accept VIC CPIs */ - return; - - if (irq >= 32) - /* You can only have 32 interrupts in a voyager system - * (and 32 only if you have a secondary microchannel - * bus) */ - return; - - for_each_online_cpu(cpu) { - unsigned long cpu_mask = 1 << cpu; - - if (cpu_mask & real_mask) { - /* enable the interrupt for this cpu */ - cpu_irq_affinity[cpu] |= irq_mask; - } else { - /* disable the interrupt for this cpu */ - cpu_irq_affinity[cpu] &= ~irq_mask; - } - } - /* this is magic, we now have the correct affinity maps, so - * enable the interrupt. This will send an enable CPI to - * those CPUs who need to enable it in their local masks, - * causing them to correct for the new affinity . If the - * interrupt is currently globally disabled, it will simply be - * disabled again as it comes in (voyager lazy disable). If - * the affinity map is tightened to disable the interrupt on a - * cpu, it will be pushed off when it comes in */ - unmask_vic_irq(irq); -} - -static void ack_vic_irq(unsigned int irq) -{ - if (irq & 8) { - outb(0x62, 0x20); /* Specific EOI to cascade */ - outb(0x60 | (irq & 7), 0xA0); - } else { - outb(0x60 | (irq & 7), 0x20); - } -} - -/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259 - * but are not vectored by it. This means that the 8259 mask must be - * lowered to receive them */ -static __init void vic_enable_cpi(void) -{ - __u8 cpu = smp_processor_id(); - - /* just take a copy of the current mask (nop for boot cpu) */ - vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id]; - - enable_local_vic_irq(VIC_CPI_LEVEL0); - enable_local_vic_irq(VIC_CPI_LEVEL1); - /* for sys int and cmn int */ - enable_local_vic_irq(7); - - if (is_cpu_quad()) { - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, QIC_CPI_ENABLE)); - } - - VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, vic_irq_mask[cpu])); -} - -void voyager_smp_dump() -{ - int old_cpu = smp_processor_id(), cpu; - - /* dump the interrupt masks of each processor */ - for_each_online_cpu(cpu) { - __u16 imr, isr, irr; - unsigned long flags; - - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - imr = (inb(0xa1) << 8) | inb(0x21); - outb(0x0a, 0xa0); - irr = inb(0xa0) << 8; - outb(0x0a, 0x20); - irr |= inb(0x20); - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n", - cpu, vic_irq_mask[cpu], imr, irr, isr); -#if 0 - /* These lines are put in to try to unstick an un ack'd irq */ - if (isr != 0) { - int irq; - for (irq = 0; irq < 16; irq++) { - if (isr & (1 << irq)) { - printk("\tCPU%d: ack irq %d\n", - cpu, irq); - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, - VIC_PROCESSOR_ID); - ack_vic_irq(irq); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - } - } - } -#endif - } -} - -void smp_voyager_power_off(void *dummy) -{ - if (smp_processor_id() == boot_cpu_id) - voyager_power_off(); - else - smp_stop_cpu_function(NULL); -} - -static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) -{ - /* FIXME: ignore max_cpus for now */ - smp_boot_cpus(); -} - -static void __cpuinit voyager_smp_prepare_boot_cpu(void) -{ - int cpu = smp_processor_id(); - switch_to_new_gdt(cpu); - - cpu_set(cpu, cpu_online_map); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); - -} - -static int __cpuinit voyager_cpu_up(unsigned int cpu) -{ - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) - return -ENOSYS; - - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) - return -EIO; - /* Unleash the CPU! */ - cpu_set(cpu, smp_commenced_mask); - while (!cpu_online(cpu)) - mb(); - return 0; -} - -static void __init voyager_smp_cpus_done(unsigned int max_cpus) -{ - zap_low_mappings(); -} - -void __init smp_setup_processor_id(void) -{ - current_thread_info()->cpu = hard_smp_processor_id(); -} - -static void voyager_send_call_func(const struct cpumask *callmask) -{ - __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id()); - send_CPI(mask, VIC_CALL_FUNCTION_CPI); -} - -static void voyager_send_call_func_single(int cpu) -{ - send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI); -} - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, - .smp_prepare_cpus = voyager_smp_prepare_cpus, - .cpu_up = voyager_cpu_up, - .smp_cpus_done = voyager_smp_cpus_done, - - .smp_send_stop = voyager_smp_send_stop, - .smp_send_reschedule = voyager_smp_send_reschedule, - - .send_call_func_ipi = voyager_send_call_func, - .send_call_func_single_ipi = voyager_send_call_func_single, -}; diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c deleted file mode 100644 index 15464a20fb38..000000000000 --- a/arch/x86/mach-voyager/voyager_thread.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This module provides the machine status monitor thread for the - * voyager architecture. This allows us to monitor the machine - * environment (temp, voltage, fan function) and the front panel and - * internal UPS. If a fault is detected, this thread takes corrective - * action (usually just informing init) - * */ - -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/kernel_stat.h> -#include <linux/delay.h> -#include <linux/mc146818rtc.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/kmod.h> -#include <linux/completion.h> -#include <linux/sched.h> -#include <linux/kthread.h> -#include <asm/desc.h> -#include <asm/voyager.h> -#include <asm/vic.h> -#include <asm/mtrr.h> -#include <asm/msr.h> - -struct task_struct *voyager_thread; -static __u8 set_timeout; - -static int execute(const char *string) -{ - int ret; - - char *envp[] = { - "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL, - }; - char *argv[] = { - "/bin/bash", - "-c", - (char *)string, - NULL, - }; - - if ((ret = - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { - printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, - ret); - } - return ret; -} - -static void check_from_kernel(void) -{ - if (voyager_status.switch_off) { - - /* FIXME: This should be configurable via proc */ - execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); - } else if (voyager_status.power_fail) { - VDEBUG(("Voyager daemon detected AC power failure\n")); - - /* FIXME: This should be configureable via proc */ - execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1"); - set_timeout = 1; - } -} - -static void check_continuing_condition(void) -{ - if (voyager_status.power_fail) { - __u8 data; - voyager_cat_psi(VOYAGER_PSI_SUBREAD, - VOYAGER_PSI_AC_FAIL_REG, &data); - if ((data & 0x1f) == 0) { - /* all power restored */ - printk(KERN_NOTICE - "VOYAGER AC power restored, cancelling shutdown\n"); - /* FIXME: should be user configureable */ - execute - ("umask 600; echo O > /etc/powerstatus; kill -PWR 1"); - set_timeout = 0; - } - } -} - -static int thread(void *unused) -{ - printk(KERN_NOTICE "Voyager starting monitor thread\n"); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT); - - VDEBUG(("Voyager Daemon awoken\n")); - if (voyager_status.request_from_kernel == 0) { - /* probably awoken from timeout */ - check_continuing_condition(); - } else { - check_from_kernel(); - voyager_status.request_from_kernel = 0; - } - } -} - -static int __init voyager_thread_start(void) -{ - voyager_thread = kthread_run(thread, NULL, "kvoyagerd"); - if (IS_ERR(voyager_thread)) { - printk(KERN_ERR - "Voyager: Failed to create system monitor thread.\n"); - return PTR_ERR(voyager_thread); - } - return 0; -} - -static void __exit voyager_thread_stop(void) -{ - kthread_stop(voyager_thread); -} - -module_init(voyager_thread_start); -module_exit(voyager_thread_stop); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 29644175490f..a03b7279efa0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1,74 +1,79 @@ /* * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. + * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. + * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ - -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mmiotrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> #include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ +#include <linux/mmiotrace.h> +#include <linux/bootmem.h> #include <linux/compiler.h> #include <linux/highmem.h> -#include <linux/bootmem.h> /* for max_low_pfn */ -#include <linux/vmalloc.h> -#include <linux/module.h> #include <linux/kprobes.h> #include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/vt_kern.h> +#include <linux/signal.h> +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/string.h> +#include <linux/module.h> #include <linux/kdebug.h> +#include <linux/errno.h> #include <linux/magic.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/tty.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm-generic/sections.h> -#include <asm/system.h> -#include <asm/desc.h> -#include <asm/segment.h> -#include <asm/pgalloc.h> -#include <asm/smp.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> +#include <asm/segment.h> +#include <asm/system.h> #include <asm/proto.h> -#include <asm-generic/sections.h> #include <asm/traps.h> +#include <asm/desc.h> /* - * Page fault error code bits - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - * bit 3 == 1 means use of reserved bit detected - * bit 4 == 1 means fault was an instruction fetch + * Page fault error code bits: + * + * bit 0 == 0: no page found 1: protection fault + * bit 1 == 0: read access 1: write access + * bit 2 == 0: kernel-mode access 1: user-mode access + * bit 3 == 1: use of reserved bit detected + * bit 4 == 1: fault was an instruction fetch */ -#define PF_PROT (1<<0) -#define PF_WRITE (1<<1) -#define PF_USER (1<<2) -#define PF_RSVD (1<<3) -#define PF_INSTR (1<<4) +enum x86_pf_error_code { + PF_PROT = 1 << 0, + PF_WRITE = 1 << 1, + PF_USER = 1 << 2, + PF_RSVD = 1 << 3, + PF_INSTR = 1 << 4, +}; + +/* + * Returns 0 if mmiotrace is disabled, or if the fault is not + * handled by mmiotrace: + */ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; -#endif return 0; } static inline int notify_page_fault(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (!user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode_vm(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -76,29 +81,76 @@ static inline int notify_page_fault(struct pt_regs *regs) } return ret; -#else - return 0; -#endif } /* - * X86_32 - * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. - * Check that here and ignore it. + * Prefetch quirks: * - * X86_64 - * Sometimes the CPU reports invalid exceptions on prefetch. - * Check that here and ignore it. + * 32-bit mode: * - * Opcode checker based on code by Richard Brunner + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. + * Check that here and ignore it. + * + * 64-bit mode: + * + * Sometimes the CPU reports invalid exceptions on prefetch. + * Check that here and ignore it. + * + * Opcode checker based on code by Richard Brunner. */ -static int is_prefetch(struct pt_regs *regs, unsigned long error_code, - unsigned long addr) +static inline int +check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, + unsigned char opcode, int *prefetch) { + unsigned char instr_hi = opcode & 0xf0; + unsigned char instr_lo = opcode & 0x0f; + + switch (instr_hi) { + case 0x20: + case 0x30: + /* + * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. + * In X86_64 long mode, the CPU will signal invalid + * opcode if some of these prefixes are present so + * X86_64 will never get here anyway + */ + return ((instr_lo & 7) == 0x6); +#ifdef CONFIG_X86_64 + case 0x40: + /* + * In AMD64 long mode 0x40..0x4F are valid REX prefixes + * Need to figure out under what instruction mode the + * instruction was issued. Could check the LDT for lm, + * but for now it's good enough to assume that long + * mode only uses well known segments or kernel. + */ + return (!user_mode(regs)) || (regs->cs == __USER_CS); +#endif + case 0x60: + /* 0x64 thru 0x67 are valid prefixes in all modes. */ + return (instr_lo & 0xC) == 0x4; + case 0xF0: + /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ + return !instr_lo || (instr_lo>>1) == 1; + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + if (probe_kernel_address(instr, opcode)) + return 0; + + *prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); + return 0; + default: + return 0; + } +} + +static int +is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) +{ + unsigned char *max_instr; unsigned char *instr; - int scan_more = 1; int prefetch = 0; - unsigned char *max_instr; /* * If it was a exec (instruction fetch) fault on NX page, then @@ -107,106 +159,170 @@ static int is_prefetch(struct pt_regs *regs, unsigned long error_code, if (error_code & PF_INSTR) return 0; - instr = (unsigned char *)convert_ip_to_linear(current, regs); + instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; - while (scan_more && instr < max_instr) { + while (instr < max_instr) { unsigned char opcode; - unsigned char instr_hi; - unsigned char instr_lo; if (probe_kernel_address(instr, opcode)) break; - instr_hi = opcode & 0xf0; - instr_lo = opcode & 0x0f; instr++; - switch (instr_hi) { - case 0x20: - case 0x30: - /* - * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. - * In X86_64 long mode, the CPU will signal invalid - * opcode if some of these prefixes are present so - * X86_64 will never get here anyway - */ - scan_more = ((instr_lo & 7) == 0x6); - break; -#ifdef CONFIG_X86_64 - case 0x40: - /* - * In AMD64 long mode 0x40..0x4F are valid REX prefixes - * Need to figure out under what instruction mode the - * instruction was issued. Could check the LDT for lm, - * but for now it's good enough to assume that long - * mode only uses well known segments or kernel. - */ - scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); - break; -#endif - case 0x60: - /* 0x64 thru 0x67 are valid prefixes in all modes. */ - scan_more = (instr_lo & 0xC) == 0x4; - break; - case 0xF0: - /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ - scan_more = !instr_lo || (instr_lo>>1) == 1; - break; - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - scan_more = 0; - - if (probe_kernel_address(instr, opcode)) - break; - prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); + if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) break; - default: - scan_more = 0; - break; - } } return prefetch; } -static void force_sig_info_fault(int si_signo, int si_code, - unsigned long address, struct task_struct *tsk) +static void +force_sig_info_fault(int si_signo, int si_code, unsigned long address, + struct task_struct *tsk) { siginfo_t info; - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); } -#ifdef CONFIG_X86_64 -static int bad_address(void *p) +DEFINE_SPINLOCK(pgd_lock); +LIST_HEAD(pgd_list); + +#ifdef CONFIG_X86_32 +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) { - unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + /* + * set_pgd(pgd, *pgd_k); here would be useless on PAE + * and redundant with the set_pmd() on non-PAE. As would + * set_pud. + */ + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + + if (!pmd_present(*pmd)) { + set_pmd(pmd, *pmd_k); + arch_flush_lazy_mmu_mode(); + } else { + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + } + + return pmd_k; +} + +void vmalloc_sync_all(void) +{ + unsigned long address; + + if (SHARED_KERNEL_PMD) + return; + + for (address = VMALLOC_START & PMD_MASK; + address >= TASK_SIZE && address < FIXADDR_TOP; + address += PMD_SIZE) { + + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!vmalloc_sync_one(page_address(page), address)) + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); + } +} + +/* + * 32-bit: + * + * Handle a fault on the vmalloc or module mapping area + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} + +/* + * Did it hit the DOS screen memory VA from vm86 mode? + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ + unsigned long bit; + + if (!v8086_mode(regs)) + return; + + bit = (address - 0xA0000) >> PAGE_SHIFT; + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; } -#endif static void dump_pagetable(unsigned long address) { -#ifdef CONFIG_X86_32 __typeof__(pte_val(__pte(0))) page; page = read_cr3(); page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; + #ifdef CONFIG_X86_PAE printk("*pdpt = %016Lx ", page); if ((page >> PAGE_SHIFT) < max_low_pfn && page & _PAGE_PRESENT) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; + & (PTRS_PER_PMD - 1)]; printk(KERN_CONT "*pde = %016Lx ", page); page &= ~_PAGE_NX; } @@ -218,19 +334,145 @@ static void dump_pagetable(unsigned long address) * We must not directly access the pte in the highpte * case if the page table is located in highmem. * And let's rather not kmap-atomic the pte, just in case - * it's allocated already. + * it's allocated already: */ if ((page >> PAGE_SHIFT) < max_low_pfn && (page & _PAGE_PRESENT) && !(page & _PAGE_PSE)) { + page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; + & (PTRS_PER_PTE - 1)]; printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); } printk("\n"); -#else /* CONFIG_X86_64 */ +} + +#else /* CONFIG_X86_64: */ + +void vmalloc_sync_all(void) +{ + unsigned long address; + + for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; + address += PGDIR_SIZE) { + + const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } + spin_unlock_irqrestore(&pgd_lock, flags); + } +} + +/* + * 64-bit: + * + * Handle a fault on the vmalloc area + * + * This assumes no large pages in there. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + pgd_t *pgd, *pgd_ref; + pud_t *pud, *pud_ref; + pmd_t *pmd, *pmd_ref; + pte_t *pte, *pte_ref; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Copy kernel mappings over when needed. This can also + * happen within a race in page table update. In the later + * case just flush: + */ + pgd = pgd_offset(current->active_mm, address); + pgd_ref = pgd_offset_k(address); + if (pgd_none(*pgd_ref)) + return -1; + + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + + /* + * Below here mismatches are bugs because these lower tables + * are shared: + */ + + pud = pud_offset(pgd, address); + pud_ref = pud_offset(pgd_ref, address); + if (pud_none(*pud_ref)) + return -1; + + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + BUG(); + + pmd = pmd_offset(pud, address); + pmd_ref = pmd_offset(pud_ref, address); + if (pmd_none(*pmd_ref)) + return -1; + + if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) + BUG(); + + pte_ref = pte_offset_kernel(pmd_ref, address); + if (!pte_present(*pte_ref)) + return -1; + + pte = pte_offset_kernel(pmd, address); + + /* + * Don't use pte_page here, because the mappings can point + * outside mem_map, and the NUMA hash lookup cannot handle + * that: + */ + if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) + BUG(); + + return 0; +} + +static const char errata93_warning[] = +KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" +KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" +KERN_ERR "******* Please consider a BIOS update.\n" +KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; + +/* + * No vm86 mode in 64-bit mode: + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ +} + +static int bad_address(void *p) +{ + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +static void dump_pagetable(unsigned long address) +{ pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -239,102 +481,77 @@ static void dump_pagetable(unsigned long address) pgd = (pgd_t *)read_cr3(); pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); + pgd += pgd_index(address); - if (bad_address(pgd)) goto bad; + if (bad_address(pgd)) + goto bad; + printk("PGD %lx ", pgd_val(*pgd)); - if (!pgd_present(*pgd)) goto ret; + + if (!pgd_present(*pgd)) + goto out; pud = pud_offset(pgd, address); - if (bad_address(pud)) goto bad; + if (bad_address(pud)) + goto bad; + printk("PUD %lx ", pud_val(*pud)); if (!pud_present(*pud) || pud_large(*pud)) - goto ret; + goto out; pmd = pmd_offset(pud, address); - if (bad_address(pmd)) goto bad; + if (bad_address(pmd)) + goto bad; + printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) + goto out; pte = pte_offset_kernel(pmd, address); - if (bad_address(pte)) goto bad; + if (bad_address(pte)) + goto bad; + printk("PTE %lx", pte_val(*pte)); -ret: +out: printk("\n"); return; bad: printk("BAD\n"); -#endif } -#ifdef CONFIG_X86_32 -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; +#endif /* CONFIG_X86_64 */ - /* - * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. As would - * set_pud. - */ - - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - if (!pmd_present(*pmd)) { - set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - return pmd_k; -} -#endif - -#ifdef CONFIG_X86_64 -static const char errata93_warning[] = -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -KERN_ERR "******* Please consider a BIOS update.\n" -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; -#endif - -/* Workaround for K8 erratum #93 & buggy BIOS. - BIOS SMM functions are required to use a specific workaround - to avoid corruption of the 64bit RIP register on C stepping K8. - A lot of BIOS that didn't get tested properly miss this. - The OS sees this as a page fault with the upper 32bits of RIP cleared. - Try to work around it here. - Note we only handle faults in kernel here. - Does nothing for X86_32 +/* + * Workaround for K8 erratum #93 & buggy BIOS. + * + * BIOS SMM functions are required to use a specific workaround + * to avoid corruption of the 64bit RIP register on C stepping K8. + * + * A lot of BIOS that didn't get tested properly miss this. + * + * The OS sees this as a page fault with the upper 32bits of RIP cleared. + * Try to work around it here. + * + * Note we only handle faults in kernel here. + * Does nothing on 32-bit. */ static int is_errata93(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - static int warned; + static int once; + if (address != regs->ip) return 0; + if ((address >> 32) != 0) return 0; + address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { - if (!warned) { + if (!once) { printk(errata93_warning); - warned = 1; + once = 1; } regs->ip = address; return 1; @@ -344,16 +561,17 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) } /* - * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal - * addresses >4GB. We catch this in the page fault handler because these - * addresses are not reachable. Just detect this case and return. Any code + * Work around K8 erratum #100 K8 in compat mode occasionally jumps + * to illegal addresses >4GB. + * + * We catch this in the page fault handler because these addresses + * are not reachable. Just detect this case and return. Any code * segment in LDT is compatibility mode. */ static int is_errata100(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && - (address >> 32)) + if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) return 1; #endif return 0; @@ -363,8 +581,9 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG unsigned long nr; + /* - * Pentium F0 0F C7 C8 bug workaround. + * Pentium F0 0F C7 C8 bug workaround: */ if (boot_cpu_data.f00f_bug) { nr = (address - idt_descr.address) >> 3; @@ -378,80 +597,87 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) return 0; } -static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static const char nx_warning[] = KERN_CRIT +"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; + +static void +show_fault_oops(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { -#ifdef CONFIG_X86_32 if (!oops_may_print()) return; -#endif -#ifdef CONFIG_X86_PAE if (error_code & PF_INSTR) { unsigned int level; + pte_t *pte = lookup_address(address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(KERN_CRIT "kernel tried to execute " - "NX-protected page - exploit attempt? " - "(uid: %d)\n", current_uid()); + printk(nx_warning, current_uid()); } -#endif printk(KERN_ALERT "BUG: unable to handle kernel "); if (address < PAGE_SIZE) printk(KERN_CONT "NULL pointer dereference"); else printk(KERN_CONT "paging request"); + printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_ALERT "IP:"); printk_address(regs->ip, 1); + dump_pagetable(address); } -#ifdef CONFIG_X86_64 -static noinline void pgtable_bad(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +pgtable_bad(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { - unsigned long flags = oops_begin(); - int sig = SIGKILL; - struct task_struct *tsk = current; + struct task_struct *tsk; + unsigned long flags; + int sig; + + flags = oops_begin(); + tsk = current; + sig = SIGKILL; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", tsk->comm, address); dump_pagetable(address); - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + if (__die("Bad pagetable", regs, error_code)) sig = 0; + oops_end(flags, regs, sig); } -#endif -static noinline void no_context(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +no_context(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { struct task_struct *tsk = current; unsigned long *stackend; - -#ifdef CONFIG_X86_64 unsigned long flags; int sig; -#endif - /* Are we prepared to handle this kernel fault? */ + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. + * 32-bit: + * + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * 64-bit: * - * X86_64 - * Hall of shame of CPU/BIOS bugs. + * Hall of shame of CPU/BIOS bugs. */ if (is_prefetch(regs, error_code, address)) return; @@ -461,54 +687,70 @@ static noinline void no_context(struct pt_regs *regs, /* * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. + * terminate things with extreme prejudice: */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else flags = oops_begin(); -#endif show_fault_oops(regs, error_code, address); - stackend = end_of_stack(tsk); + stackend = end_of_stack(tsk); if (*stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else sig = SIGKILL; if (__die("Oops", regs, error_code)) sig = 0; + /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); -#endif } -static void __bad_area_nosemaphore(struct pt_regs *regs, - unsigned long error_code, unsigned long address, - int si_code) +/* + * Print out info about fatal segfaults, if the show_unhandled_signals + * sysctl is set: + */ +static inline void +show_signal_msg(struct pt_regs *regs, unsigned long error_code, + unsigned long address, struct task_struct *tsk) +{ + if (!unhandled_signal(tsk, SIGSEGV)) + return; + + if (!printk_ratelimit()) + return; + + printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *)regs->ip, (void *)regs->sp, error_code); + + print_vma_addr(KERN_CONT " in ", regs->ip); + + printk(KERN_CONT "\n"); +} + +static void +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) { struct task_struct *tsk = current; /* User mode accesses just cause a SIGSEGV */ if (error_code & PF_USER) { /* - * It's possible to have interrupts off here. + * It's possible to have interrupts off here: */ local_irq_enable(); /* * Valid to do another page fault here because this one came - * from user space. + * from user space: */ if (is_prefetch(regs, error_code, address)) return; @@ -516,22 +758,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs, if (is_errata100(regs, address)) return; - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } + if (unlikely(show_unhandled_signals)) + show_signal_msg(regs, error_code, address, tsk); + + /* Kernel addresses are always protection faults: */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; } @@ -541,15 +777,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs, no_context(regs, error_code, address); } -static noinline void bad_area_nosemaphore(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); } -static void __bad_area(struct pt_regs *regs, - unsigned long error_code, unsigned long address, - int si_code) +static void +__bad_area(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) { struct mm_struct *mm = current->mm; @@ -562,67 +799,75 @@ static void __bad_area(struct pt_regs *regs, __bad_area_nosemaphore(regs, error_code, address, si_code); } -static noinline void bad_area(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) { __bad_area(regs, error_code, address, SEGV_MAPERR); } -static noinline void bad_area_access_error(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area_access_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { __bad_area(regs, error_code, address, SEGV_ACCERR); } /* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void out_of_memory(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static void +out_of_memory(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { /* * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). + * (which will retry the fault, or kill us if we got oom-killed): */ up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); } -static void do_sigbus(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static void +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; up_read(&mm->mmap_sem); - /* Kernel mode? Handle exceptions or die */ + /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) no_context(regs, error_code, address); -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ + + /* User-space => ok to do another page fault: */ if (is_prefetch(regs, error_code, address)) return; -#endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; + + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } -static noinline void mm_fault_error(struct pt_regs *regs, - unsigned long error_code, unsigned long address, unsigned int fault) +static noinline void +mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) { - if (fault & VM_FAULT_OOM) + if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); - else if (fault & VM_FAULT_SIGBUS) - do_sigbus(regs, error_code, address); - else - BUG(); + } else { + if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); + } } static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) return 0; @@ -630,21 +875,25 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) } /* - * Handle a spurious fault caused by a stale TLB entry. This allows - * us to lazily refresh the TLB when increasing the permissions of a - * kernel page (RO -> RW or NX -> X). Doing it eagerly is very - * expensive since that implies doing a full cross-processor TLB - * flush, even if no stale TLB entries exist on other processors. + * Handle a spurious fault caused by a stale TLB entry. + * + * This allows us to lazily refresh the TLB when increasing the + * permissions of a kernel page (RO -> RW or NX -> X). Doing it + * eagerly is very expensive since that implies doing a full + * cross-processor TLB flush, even if no stale TLB entries exist + * on other processors. + * * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int spurious_fault(unsigned long error_code, - unsigned long address) +static noinline int +spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; + int ret; /* Reserved-bit violation or user access to kernel space? */ if (error_code & (PF_USER | PF_RSVD)) @@ -672,123 +921,46 @@ static noinline int spurious_fault(unsigned long error_code, if (!pte_present(*pte)) return 0; - return spurious_fault_check(error_code, pte); -} - -/* - * X86_32 - * Handle a fault on the vmalloc or module mapping area - * - * X86_64 - * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. - */ -static noinline int vmalloc_fault(unsigned long address) -{ -#ifdef CONFIG_X86_32 - unsigned long pgd_paddr; - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc area */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; + ret = spurious_fault_check(error_code, pte); + if (!ret) + return 0; /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. + * Make sure we have permissions in PMD. + * If not, then there's a bug in the page tables: */ - pgd_paddr = read_cr3(); - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); - if (!pmd_k) - return -1; - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - return 0; -#else - pgd_t *pgd, *pgd_ref; - pud_t *pud, *pud_ref; - pmd_t *pmd, *pmd_ref; - pte_t *pte, *pte_ref; + ret = spurious_fault_check(error_code, (pte_t *) pmd); + WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); - /* Make sure we are in vmalloc area */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* Copy kernel mappings over when needed. This can also - happen within a race in page table update. In the later - case just flush. */ - - pgd = pgd_offset(current->active_mm, address); - pgd_ref = pgd_offset_k(address); - if (pgd_none(*pgd_ref)) - return -1; - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - - /* Below here mismatches are bugs because these lower tables - are shared */ - - pud = pud_offset(pgd, address); - pud_ref = pud_offset(pgd_ref, address); - if (pud_none(*pud_ref)) - return -1; - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) - BUG(); - pmd = pmd_offset(pud, address); - pmd_ref = pmd_offset(pud_ref, address); - if (pmd_none(*pmd_ref)) - return -1; - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) - BUG(); - pte_ref = pte_offset_kernel(pmd_ref, address); - if (!pte_present(*pte_ref)) - return -1; - pte = pte_offset_kernel(pmd, address); - /* Don't use pte_page here, because the mappings can point - outside mem_map, and the NUMA hash lookup cannot handle - that. */ - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) - BUG(); - return 0; -#endif + return ret; } int show_unhandled_signals = 1; -static inline int access_error(unsigned long error_code, int write, - struct vm_area_struct *vma) +static inline int +access_error(unsigned long error_code, int write, struct vm_area_struct *vma) { if (write) { - /* write, present and write, not present */ + /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; - } else if (unlikely(error_code & PF_PROT)) { - /* read, present */ - return 1; - } else { - /* read, not present */ - if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) - return 1; + return 0; } + /* read, present: */ + if (unlikely(error_code & PF_PROT)) + return 1; + + /* read, not present: */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + return 0; } static int fault_in_kernel_space(unsigned long address) { -#ifdef CONFIG_X86_32 - return address >= TASK_SIZE; -#else /* !CONFIG_X86_32 */ - return address >= TASK_SIZE64; -#endif /* CONFIG_X86_32 */ + return address >= TASK_SIZE_MAX; } /* @@ -796,23 +968,22 @@ static int fault_in_kernel_space(unsigned long address) * and the problem, and then passes it off to one of the appropriate * routines. */ -#ifdef CONFIG_X86_64 -asmlinkage -#endif -void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) { - unsigned long address; + struct vm_area_struct *vma; struct task_struct *tsk; + unsigned long address; struct mm_struct *mm; - struct vm_area_struct *vma; int write; int fault; tsk = current; mm = tsk->mm; + prefetchw(&mm->mmap_sem); - /* get the address */ + /* Get the faulting address: */ address = read_cr2(); if (unlikely(kmmio_fault(regs, address))) @@ -836,22 +1007,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) vmalloc_fault(address) >= 0) return; - /* Can handle a stale RO->RW TLB */ + /* Can handle a stale RO->RW TLB: */ if (spurious_fault(error_code, address)) return; - /* kprobes don't want to hook the spurious faults. */ + /* kprobes don't want to hook the spurious faults: */ if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. + * fault we could otherwise deadlock: */ bad_area_nosemaphore(regs, error_code, address); + return; } - /* kprobes don't want to hook the spurious faults. */ + /* kprobes don't want to hook the spurious faults: */ if (unlikely(notify_page_fault(regs))) return; /* @@ -859,22 +1031,22 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * vmalloc fault has been handled. * * User-mode registers count as a user access even for any - * potential system fault or CPU buglet. + * potential system fault or CPU buglet: */ if (user_mode_vm(regs)) { local_irq_enable(); error_code |= PF_USER; - } else if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + } else { + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); + } -#ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); -#endif /* - * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault. + * If we're in an interrupt, have no user context or are running + * in an atomic region then we must not take the fault: */ if (unlikely(in_atomic() || !mm)) { bad_area_nosemaphore(regs, error_code, address); @@ -883,19 +1055,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* * When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunately, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user - * space from well defined areas of code, which are listed in the - * exceptions table. + * addresses in user space. All other faults represent errors in + * the kernel and should generate an OOPS. Unfortunately, in the + * case of an erroneous fault occurring in a code path which already + * holds mmap_sem we will deadlock attempting to validate the fault + * against the address space. Luckily the kernel only validly + * references user space from well defined areas of code, which are + * listed in the exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibility of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. + * the source reference check when there is a possibility of a + * deadlock. Attempt to lock the address space, if we cannot we then + * validate the source. If this is invalid we can skip the address + * space check, thus avoiding the deadlock: */ if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && @@ -906,8 +1078,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) down_read(&mm->mmap_sem); } else { /* - * The above down_read_trylock() might have succeeded in which - * case we'll have missed the might_sleep() from down_read(). + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): */ might_sleep(); } @@ -927,7 +1100,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535,$31" pushes + * and pusha to work. ("enter $65535, $31" pushes * 32 pointers and then decrements %sp by 65535.) */ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { @@ -946,6 +1119,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) */ good_area: write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { bad_area_access_error(regs, error_code, address); return; @@ -954,75 +1128,21 @@ good_area: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo - * the fault. + * the fault: */ fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } + if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else tsk->min_flt++; -#ifdef CONFIG_X86_32 - /* - * Did it hit the DOS screen memory VA from vm86 mode? - */ - if (v8086_mode(regs)) { - unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; - if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; - } -#endif - up_read(&mm->mmap_sem); -} - -DEFINE_SPINLOCK(pgd_lock); -LIST_HEAD(pgd_list); + check_v8086_mode(regs, address, tsk); -void vmalloc_sync_all(void) -{ - unsigned long address; - -#ifdef CONFIG_X86_32 - if (SHARED_KERNEL_PMD) - return; - - for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE && address < FIXADDR_TOP; - address += PMD_SIZE) { - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), - address)) - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); - } -#else /* CONFIG_X86_64 */ - for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; - address += PGDIR_SIZE) { - const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; - struct page *page; - - if (pgd_none(*pgd_ref)) - continue; - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - } - spin_unlock_irqrestore(&pgd_lock, flags); - } -#endif + up_read(&mm->mmap_sem); } diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ca53224fc56c..d5e28424622c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -20,6 +20,64 @@ #include <asm/pat.h> #include <linux/module.h> +#ifdef CONFIG_X86_PAE +static int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + return 1; +} +#else +static int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + /* There is no way to map greater than 1 << 32 address without PAE */ + if (base + size > 0x100000000ULL) + return 0; + + return 1; +} +#endif + +int +reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot) +{ + unsigned long ret_flag; + + if (!is_io_mapping_possible(base, size)) + goto out_err; + + if (!pat_enabled) { + *prot = pgprot_noncached(PAGE_KERNEL); + return 0; + } + + if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag)) + goto out_err; + + if (ret_flag == _PAGE_CACHE_WB) + goto out_free; + + if (kernel_map_sync_memtype(base, size, ret_flag)) + goto out_free; + + *prot = __pgprot(__PAGE_KERNEL | ret_flag); + return 0; + +out_free: + free_memtype(base, base + size); +out_err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(reserve_io_memtype_wc); + +void +free_io_memtype(u64 base, unsigned long size) +{ + if (pat_enabled) + free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(free_io_memtype); + /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 9cab18b0b857..0bcd7883d036 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -9,44 +9,44 @@ #include <asm/e820.h> -static void __init memtest(unsigned long start_phys, unsigned long size, - unsigned pattern) +static u64 patterns[] __initdata = { + 0, + 0xffffffffffffffffULL, + 0x5555555555555555ULL, + 0xaaaaaaaaaaaaaaaaULL, + 0x1111111111111111ULL, + 0x2222222222222222ULL, + 0x4444444444444444ULL, + 0x8888888888888888ULL, + 0x3333333333333333ULL, + 0x6666666666666666ULL, + 0x9999999999999999ULL, + 0xccccccccccccccccULL, + 0x7777777777777777ULL, + 0xbbbbbbbbbbbbbbbbULL, + 0xddddddddddddddddULL, + 0xeeeeeeeeeeeeeeeeULL, + 0x7a6c7258554e494cULL, /* yeah ;-) */ +}; + +static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) { - unsigned long i; - unsigned long *start; - unsigned long start_bad; - unsigned long last_bad; - unsigned long val; - unsigned long start_phys_aligned; - unsigned long count; - unsigned long incr; - - switch (pattern) { - case 0: - val = 0UL; - break; - case 1: - val = -1UL; - break; - case 2: -#ifdef CONFIG_X86_64 - val = 0x5555555555555555UL; -#else - val = 0x55555555UL; -#endif - break; - case 3: -#ifdef CONFIG_X86_64 - val = 0xaaaaaaaaaaaaaaaaUL; -#else - val = 0xaaaaaaaaUL; -#endif - break; - default: - return; - } + printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", + (unsigned long long) pattern, + (unsigned long long) start_bad, + (unsigned long long) end_bad); + reserve_early(start_bad, end_bad, "BAD RAM"); +} - incr = sizeof(unsigned long); +static void __init memtest(u64 pattern, u64 start_phys, u64 size) +{ + u64 i, count; + u64 *start; + u64 start_bad, last_bad; + u64 start_phys_aligned; + size_t incr; + + incr = sizeof(pattern); start_phys_aligned = ALIGN(start_phys, incr); count = (size - (start_phys_aligned - start_phys))/incr; start = __va(start_phys_aligned); @@ -54,25 +54,42 @@ static void __init memtest(unsigned long start_phys, unsigned long size, last_bad = 0; for (i = 0; i < count; i++) - start[i] = val; + start[i] = pattern; for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start != val) { - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - } else { - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad + incr, "BAD RAM"); - } - start_bad = last_bad = start_phys_aligned; - } + if (*start == pattern) + continue; + if (start_phys_aligned == last_bad + incr) { + last_bad += incr; + continue; } + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); + start_bad = last_bad = start_phys_aligned; } - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad + incr, "BAD RAM"); + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); +} + +static void __init do_one_pass(u64 pattern, u64 start, u64 end) +{ + u64 size = 0; + + while (start < end) { + start = find_e820_area_size(start, &size, 1); + + /* done ? */ + if (start >= end) + break; + if (start + size > end) + size = end - start; + + printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", + (unsigned long long) start, + (unsigned long long) start + size, + (unsigned long long) cpu_to_be64(pattern)); + memtest(pattern, start, size); + + start += size; } } @@ -90,33 +107,22 @@ early_param("memtest", parse_memtest); void __init early_memtest(unsigned long start, unsigned long end) { - u64 t_start, t_size; - unsigned pattern; + unsigned int i; + unsigned int idx = 0; if (!memtest_pattern) return; - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); - for (pattern = 0; pattern < memtest_pattern; pattern++) { - t_start = start; - t_size = 0; - while (t_start < end) { - t_start = find_e820_area_size(t_start, &t_size, 1); - - /* done ? */ - if (t_start >= end) - break; - if (t_start + t_size > end) - t_size = end - t_start; - - printk(KERN_CONT "\n %010llx - %010llx pattern %d", - (unsigned long long)t_start, - (unsigned long long)t_start + t_size, pattern); - - memtest(t_start, t_size, pattern); + printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); + for (i = 0; i < memtest_pattern; i++) { + idx = i % ARRAY_SIZE(patterns); + do_one_pass(patterns[idx], start, end); + } - t_start += t_size; - } + if (idx > 0) { + printk(KERN_INFO "early_memtest: wipe out " + "test pattern from memory\n"); + /* additional test with pattern 0 will do this */ + do_one_pass(0, start, end); } - printk(KERN_CONT "\n"); } diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d1f7439d173c..3957cd6d6454 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -194,7 +194,7 @@ void *alloc_remap(int nid, unsigned long size) size = ALIGN(size, L1_CACHE_BYTES); if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) - return 0; + return NULL; node_remap_alloc_vaddr[nid] += size; memset(allocation, 0, size); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index deb1c1ab7868..64c9cf043cdd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -166,7 +166,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, return shift; } -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { return phys_to_nid(pfn << PAGE_SHIFT); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8ca0d8566fc8..8253bc97587e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -482,6 +482,13 @@ static int split_large_page(pte_t *kpte, unsigned long address) pbase = (pte_t *)page_address(base); paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); + /* + * If we ever want to utilize the PAT bit, we need to + * update this function to make sure it's converted from + * bit 12 to bit 7 when we cross from the 2MB level to + * the 4K level: + */ + WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE); #ifdef CONFIG_X86_64 if (level == PG_LEVEL_1G) { @@ -508,18 +515,13 @@ static int split_large_page(pte_t *kpte, unsigned long address) #endif /* - * Install the new, split up pagetable. Important details here: - * - * On Intel the NX bit of all levels must be cleared to make a - * page executable. See section 4.13.2 of Intel 64 and IA-32 - * Architectures Software Developer's Manual). + * Install the new, split up pagetable. * - * Mark the entry present. The current mapping might be - * set to not present, which we preserved above. + * We use the standard kernel pagetable protections for the new + * pagetable protections, the actual ptes set above control the + * primary protection behavior: */ - ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); - pgprot_val(ref_prot) |= _PAGE_PRESENT; - __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); + __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE))); base = NULL; out_unlock: diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 05f9aef6818a..fdfedb65d45a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -634,6 +634,33 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) } /* + * Change the memory type for the physial address range in kernel identity + * mapping space if that range is a part of identity map. + */ +int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) +{ + unsigned long id_sz; + + if (!pat_enabled || base >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < base + size) ? + __pa(high_memory) - base : + size; + + if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { + printk(KERN_INFO + "%s:%d ioremap_change_attr failed %s " + "for %Lx-%Lx\n", + current->comm, current->pid, + cattr_name(flags), + base, (unsigned long long)(base + size)); + return -EINVAL; + } + return 0; +} + +/* * Internal interface to reserve a range of physical memory with prot. * Reserved non RAM regions only and after successful reserve_memtype, * this func also keeps identity mapping (if any) in sync with this new prot. @@ -642,7 +669,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { int is_ram = 0; - int id_sz, ret; + int ret; unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); @@ -679,23 +706,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags); } - /* Need to keep identity mapping in sync */ - if (paddr >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < paddr + size) ? - __pa(high_memory) - paddr : - size; - - if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { + if (kernel_map_sync_memtype(paddr, size, flags) < 0) { free_memtype(paddr, paddr + size); - printk(KERN_ERR - "%s:%d reserve_pfn_range ioremap_change_attr failed %s " - "for %Lx-%Lx\n", - current->comm, current->pid, - cattr_name(flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size)); return -EINVAL; } return 0; diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index d1e9b53f9d33..b641388d8286 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -8,7 +8,7 @@ #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/asm-offsets.h> #include <asm/processor-flags.h> diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 000415947d93..9356547d8c01 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -18,7 +18,7 @@ .text #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <asm/asm-offsets.h> #include <asm/processor-flags.h> diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 9c98cc6ba978..7133cdf9098b 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) unsigned long addr, end; unsigned offset; end = (start + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE64) - end = TASK_SIZE64; + if (end >= TASK_SIZE_MAX) + end = TASK_SIZE_MAX; end -= len; /* This loses some more bits than a modulo, but is cheaper */ offset = get_random_int() & (PTRS_PER_PTE - 1); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 87b9ab166423..b83e119fbeb0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK - depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) + depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_CMPXCHG && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 86497d5f44cd..c52f4034c7fd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -940,6 +940,9 @@ asmlinkage void __init xen_start_kernel(void) possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + local_irq_disable(); + early_boot_irqs_off(); + xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 63d49a523ed3..1a5ff24e29c0 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -8,7 +8,7 @@ #include <asm/boot.h> #include <asm/asm.h> -#include <asm/page.h> +#include <asm/page_types.h> #include <xen/interface/elfnote.h> #include <asm/xen/interface.h> |