From c8f730b1ab825f06733e1c074264f0078721f365 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Tue, 26 Oct 2010 16:27:28 -0500 Subject: x86, uv: Enable Westmere support on SGI UV Enable Westmere support on SGI UV. The UV initialization code is dependent on the APICID bits. Westmere-EX uses different APIC bit mapping than Nehalem-EX. This code reads the apic shift value from a UV MMR to do the proper bit decoding to determint the pnode. Signed-off-by: Russ Anderson LKML-Reference: <20101026212728.GB15071@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_hub.h | 21 ++++++++++++++++++--- arch/x86/kernel/apic/x2apic_uv_x.c | 25 +++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bf6b88ef8eeb..e969f691cbfd 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -5,7 +5,7 @@ * * SGI UV architectural definitions * - * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_HUB_H @@ -77,7 +77,8 @@ * * 1111110000000000 * 5432109876543210 - * pppppppppplc0cch + * pppppppppplc0cch Nehalem-EX + * ppppppppplcc0cch Westmere-EX * sssssssssss * * p = pnode bits @@ -148,12 +149,25 @@ struct uv_hub_info_s { unsigned char m_val; unsigned char n_val; struct uv_scir_s scir; + unsigned char apic_pnode_shift; }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) +union uvh_apicid { + unsigned long v; + struct uvh_apicid_s { + unsigned long local_apic_mask : 24; + unsigned long local_apic_shift : 5; + unsigned long unused1 : 3; + unsigned long pnode_mask : 24; + unsigned long pnode_shift : 5; + unsigned long unused2 : 3; + } s; +}; + /* * Local & Global MMR space macros. * Note: macros are intended to be used ONLY by inline functions @@ -182,6 +196,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ (((unsigned long)(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) +#define UVH_APICID 0x002D0E00L #define UV_APIC_PNODE_SHIFT 6 /* Local Bus from cpu's perspective */ @@ -280,7 +295,7 @@ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) */ static inline int uv_apicid_to_pnode(int apicid) { - return (apicid >> UV_APIC_PNODE_SHIFT); + return (apicid >> uv_hub_info->apic_pnode_shift); } /* diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f744f54cb248..0a2918eaab34 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. */ #include #include @@ -41,6 +41,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +static union uvh_apicid uvh_apicid; int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); static DEFINE_SPINLOCK(uv_nmi_lock); @@ -70,6 +71,22 @@ static int early_get_nodeid(void) return node_id.s.node_id; } +static int __init early_get_apic_pnode_shift(void) +{ + unsigned long *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); + uvh_apicid.v = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + if (!uvh_apicid.v) + /* + * Old bios, use default value + */ + uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; + + return uvh_apicid.s.pnode_shift; +} + static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int nodeid; @@ -84,7 +101,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - nodeid << (UV_APIC_PNODE_SHIFT - 1); + nodeid << (early_get_apic_pnode_shift() - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } @@ -716,6 +733,10 @@ void __init uv_system_init(void) int apicid = per_cpu(x86_cpu_to_apicid, cpu); nid = cpu_to_node(cpu); + /* + * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); + */ + uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; -- cgit v1.2.3 From 419db274bed4269f475a8e78cbe9c917192cfe8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Oct 2010 09:50:17 -0700 Subject: x86, memblock: Fix early_node_mem with big reserved region. Xen can reserve huge amounts of memory for pre-ballooning, but that still shows as RAM in the e820 memory map. early_node_mem could not find range because of start/end adjusting, and will go through the fallback path. However, the fallback patch is still using memblock_x86_find_range_node(), and it is partially top-down because it go through active_range entries from low to high. Let's use memblock_find_in_range instead memblock_x86_find_range_node. So get real top down in fallback path. We may still need to make memblock_x86_find_range_node to do overall top_down work. Reported-by: Jeremy Fitzhardinge Tested-by: Jeremy Fitzhardinge Tested-by: Konrad Rzeszutek Wilk Signed-off-by: Yinghai Lu LKML-Reference: <4CC9A9C9.8020700@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/mm/numa_64.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 60f498511dd6..7ffc9b727efd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -178,11 +178,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, /* extend the search scope */ end = max_pfn_mapped << PAGE_SHIFT; - if (end > (MAX_DMA32_PFN< Date: Thu, 28 Oct 2010 17:41:32 -0500 Subject: x86, uv: More Westmere support on SGI UV Enable Westmere support for all APIC modes on SGI UV. Signed-off-by: Russ Anderson LKML-Reference: <20101028224132.GB15804@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0a2918eaab34..ed4118de249e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -71,7 +71,7 @@ static int early_get_nodeid(void) return node_id.s.node_id; } -static int __init early_get_apic_pnode_shift(void) +static void __init early_get_apic_pnode_shift(void) { unsigned long *mmr; @@ -83,8 +83,6 @@ static int __init early_get_apic_pnode_shift(void) * Old bios, use default value */ uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; - - return uvh_apicid.s.pnode_shift; } static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) @@ -93,6 +91,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (!strcmp(oem_id, "SGI")) { nodeid = early_get_nodeid(); + early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; if (!strcmp(oem_table_id, "UVL")) @@ -101,7 +100,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - nodeid << (early_get_apic_pnode_shift() - 1); + nodeid << (uvh_apicid.s.pnode_shift - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3 From 5c1eb08936693cd78c71164c8bea0b086ae72c67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Oct 2010 16:40:54 +0200 Subject: x86-32: Restore irq stacks NUMA-aware allocations Commit 22d4cd4c4d ("Allocate irq stacks seperate from percpu area") removed NUMA affinity of IRQ stacks as side-effect of the fix. Using alloc_pages_node() instead of __get_free_pages() is safe, even if the target node has no available LOWMEM pages : alloc_pages_node() fallbacks to another node. Signed-off-by: Eric Dumazet Acked-by: Brian Gerst Cc: tj@kernel.org Cc: torvalds@linux-foundation.org Cc: Peter Zijlstra LKML-Reference: <1288276854.2649.607.camel@edumazet-laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 64668dbf00a4..96656f207751 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -125,7 +126,9 @@ void __cpuinit irq_ctx_init(int cpu) if (per_cpu(hardirq_ctx, cpu)) return; - irqctx = (union irq_ctx *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER); + irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + THREAD_FLAGS, + THREAD_ORDER)); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; @@ -134,7 +137,9 @@ void __cpuinit irq_ctx_init(int cpu) per_cpu(hardirq_ctx, cpu) = irqctx; - irqctx = (union irq_ctx *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER); + irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + THREAD_FLAGS, + THREAD_ORDER)); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; -- cgit v1.2.3 From 2d1d7126bbde53989f1d7de174816c123bb7ecb0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Oct 2010 21:09:15 -0700 Subject: x86, ftrace: Use safe noops, drop trap test Always use a safe 5-byte noop sequence. Drop the trap test, since it is known to return false negatives on some virtualization platforms on 32 bits. The resulting code is both simpler and safer. Cc: Daniel Drake Cc: Jason Baron Cc: Mathieu Desnoyers Signed-off-by: H. Peter Anvin Signed-off-by: Steven Rostedt --- arch/x86/kernel/alternative.c | 69 ++++++++++--------------------------------- 1 file changed, 15 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a36bb90aef53..0b30214282a8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -644,65 +644,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) -unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; +#ifdef CONFIG_X86_64 +unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 }; +#else +unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; +#endif void __init arch_init_ideal_nop5(void) { - extern const unsigned char ftrace_test_p6nop[]; - extern const unsigned char ftrace_test_nop5[]; - extern const unsigned char ftrace_test_jmp[]; - int faulted = 0; - /* - * There is no good nop for all x86 archs. - * We will default to using the P6_NOP5, but first we - * will test to make sure that the nop will actually - * work on this CPU. If it faults, we will then - * go to a lesser efficient 5 byte nop. If that fails - * we then just use a jmp as our nop. This isn't the most - * efficient nop, but we can not use a multi part nop - * since we would then risk being preempted in the middle - * of that nop, and if we enabled tracing then, it might - * cause a system crash. + * There is no good nop for all x86 archs. This selection + * algorithm should be unified with the one in find_nop_table(), + * but this should be good enough for now. * - * TODO: check the cpuid to determine the best nop. + * For cases other than the ones below, use the safe (as in + * always functional) defaults above. */ - asm volatile ( - "ftrace_test_jmp:" - "jmp ftrace_test_p6nop\n" - "nop\n" - "nop\n" - "nop\n" /* 2 byte jmp + 3 bytes */ - "ftrace_test_p6nop:" - P6_NOP5 - "jmp 1f\n" - "ftrace_test_nop5:" - ".byte 0x66,0x66,0x66,0x66,0x90\n" - "1:" - ".section .fixup, \"ax\"\n" - "2: movl $1, %0\n" - " jmp ftrace_test_nop5\n" - "3: movl $2, %0\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(ftrace_test_p6nop, 2b) - _ASM_EXTABLE(ftrace_test_nop5, 3b) - : "=r"(faulted) : "0" (faulted)); - - switch (faulted) { - case 0: - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); - break; - case 1: - pr_info("converting mcount calls to 66 66 66 66 90\n"); - memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); - break; - case 2: - pr_info("converting mcount calls to jmp . + 5\n"); - memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); - break; - } - +#ifdef CONFIG_X86_64 + /* Don't use these on 32 bits due to broken virtualizers */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + memcpy(ideal_nop5, p6_nops[5], 5); +#endif } #endif -- cgit v1.2.3 From d7ba979d45272385ce0fdf141d922e61ff48e07b Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Wed, 18 Aug 2010 06:02:00 -0500 Subject: debug_core,x86,blackfin: Clean up hw debug disable API The kgdb_disable_hw_debug() was an architecture specific function for disabling all hardware breakpoints on a per cpu basis when entering the debug core. This patch will remove the weak function kdbg_disable_hw_debug() and change it into a call back which lives with the rest of hw breakpoint call backs in struct kgdb_arch. Signed-off-by: Dongdong Deng Signed-off-by: Jason Wessel --- arch/blackfin/kernel/kgdb.c | 3 ++- arch/x86/kernel/kgdb.c | 3 ++- include/linux/kgdb.h | 13 +++---------- kernel/debug/debug_core.c | 16 +++------------- 4 files changed, 10 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index 08bc44ea6883..edae461b1c54 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -320,7 +320,7 @@ static void bfin_correct_hw_break(void) } } -void kgdb_disable_hw_debug(struct pt_regs *regs) +static void bfin_disable_hw_debug(struct pt_regs *regs) { /* Disable hardware debugging while we are in kgdb */ bfin_write_WPIACTL(0); @@ -406,6 +406,7 @@ struct kgdb_arch arch_kgdb_ops = { #endif .set_hw_breakpoint = bfin_set_hw_break, .remove_hw_breakpoint = bfin_remove_hw_break, + .disable_hw_break = bfin_disable_hw_debug, .remove_all_hw_break = bfin_remove_all_hw_break, .correct_hw_break = bfin_correct_hw_break, }; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index d81cfebb848f..ec592caac4b4 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -387,7 +387,7 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) * disable hardware debugging while it is processing gdb packets or * handling exception. */ -void kgdb_disable_hw_debug(struct pt_regs *regs) +static void kgdb_disable_hw_debug(struct pt_regs *regs) { int i; int cpu = raw_smp_processor_id(); @@ -724,6 +724,7 @@ struct kgdb_arch arch_kgdb_ops = { .flags = KGDB_HW_BREAKPOINT, .set_hw_breakpoint = kgdb_set_hw_break, .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_debug, .remove_all_hw_break = kgdb_remove_all_hw_break, .correct_hw_break = kgdb_correct_hw_break, }; diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index cc96f0f23e04..092e4250a458 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -35,16 +35,6 @@ struct pt_regs; */ extern int kgdb_skipexception(int exception, struct pt_regs *regs); -/** - * kgdb_disable_hw_debug - (optional) Disable hardware debugging hook - * @regs: Current &struct pt_regs. - * - * This function will be called if the particular architecture must - * disable hardware debugging while it is processing gdb packets or - * handling exception. - */ -extern void kgdb_disable_hw_debug(struct pt_regs *regs); - struct tasklet_struct; struct task_struct; struct uart_port; @@ -243,6 +233,8 @@ extern void kgdb_arch_late(void); * breakpoint. * @remove_hw_breakpoint: Allow an architecture to specify how to remove a * hardware breakpoint. + * @disable_hw_break: Allow an architecture to specify how to disable + * hardware breakpoints for a single cpu. * @remove_all_hw_break: Allow an architecture to specify how to remove all * hardware breakpoints. * @correct_hw_break: Allow an architecture to specify how to correct the @@ -256,6 +248,7 @@ struct kgdb_arch { int (*remove_breakpoint)(unsigned long, char *); int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); + void (*disable_hw_break)(struct pt_regs *regs); void (*remove_all_hw_break)(void); void (*correct_hw_break)(void); }; diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index fec596da9bd0..cefd4a11f6d9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs) return 0; } -/** - * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. - * @regs: Current &struct pt_regs. - * - * This function will be called if the particular architecture must - * disable hardware debugging while it is processing gdb packets or - * handling exception. - */ -void __weak kgdb_disable_hw_debug(struct pt_regs *regs) -{ -} - /* * Some architectures need cache flushes when we set/clear a * breakpoint: @@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, atomic_inc(&masters_in_kgdb); else atomic_inc(&slaves_in_kgdb); - kgdb_disable_hw_debug(ks->linux_regs); + + if (arch_kgdb_ops.disable_hw_break) + arch_kgdb_ops.disable_hw_break(regs); acquirelock: /* -- cgit v1.2.3 From 45f81b1c96d9793e47ce925d257ea693ce0b193e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Oct 2010 12:33:43 -0400 Subject: jump label: Add work around to i386 gcc asm goto bug On i386 (not x86_64) early implementations of gcc would have a bug with asm goto causing it to produce code like the following: (This was noticed by Peter Zijlstra) 56 pushl 0 67 nopl jmp 0x6f popl jmp 0x8c 6f mov test je 0x8c 8c mov call *(%esp) The jump added in the asm goto skipped over the popl that matched the pushl 0, which lead up to a quick crash of the system when the jump was enabled. The nopl is defined in the asm goto () statement and when tracepoints are enabled, the nop changes to a jump to the label that was specified by the asm goto. asm goto is suppose to tell gcc that the code in the asm might jump to an external label. Here gcc obviously fails to make that work. The bug report for gcc is here: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226 The bug only appears on x86 when not compiled with -maccumulate-outgoing-args. This option is always set on x86_64 and it is also the work around for a function graph tracer i386 bug. (See commit: 746357d6a526d6da9d89a2ec645b28406e959c2e) This explains why the bug only showed up on i386 when function graph tracer was not enabled. This patch now adds a CONFIG_JUMP_LABEL option that is default off instead of using jump labels by default. When jump labels are enabled, the -maccumulate-outgoing-args will be used (causing a slightly larger kernel image on i386). This option will exist until we have a way to detect if the gcc compiler in use is safe to use on all configurations without the work around. Note, there exists such a test, but for now we will keep the enabling of jump label as a manual option. Archs that know the compiler is safe with asm goto, may choose to select JUMP_LABEL and enable it by default. Reported-by: Ingo Molnar Cause-discovered-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Jason Baron Cc: H. Peter Anvin Cc: David Daney Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Cc: David Miller Cc: Richard Henderson LKML-Reference: <1288028746.3673.11.camel@laptop> Signed-off-by: Steven Rostedt --- arch/Kconfig | 14 ++++++++++++++ arch/x86/Makefile_32.cpu | 13 ++++++++++++- include/linux/jump_label.h | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index 53d7f619a1b9..8bf0fa652eb6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -42,6 +42,20 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config JUMP_LABEL + bool "Optimize trace point call sites" + depends on HAVE_ARCH_JUMP_LABEL + help + If it is detected that the compiler has support for "asm goto", + the kernel will compile trace point locations with just a + nop instruction. When trace points are enabled, the nop will + be converted to a jump to the trace function. This technique + lowers overhead and stress on the branch prediction of the + processor. + + On i386, options added to the compiler flags may increase + the size of the kernel slightly. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 1255d953c65d..f2ee1abb1df9 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -51,7 +51,18 @@ cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) # prologue (push %ebp, mov %esp, %ebp) which breaks the function graph # tracer assumptions. For i686, generic, core2 this is set by the # compiler anyway -cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args) +ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +# Work around to a bug with asm goto with first implementations of it +# in gcc causing gcc to mess up the push and pop of the stack in some +# uses of asm goto. +ifeq ($(CONFIG_JUMP_LABEL), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 1947a1212678..7880f18e4b86 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,7 +1,7 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) # include # define HAVE_JUMP_LABEL #endif -- cgit v1.2.3 From 404ba5d7bb958d3d788bdaa0debc0bdf60f13ffe Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 28 Oct 2010 11:20:27 -0400 Subject: x86, alternative: Call stop_machine_text_poke() on all cpus Currently, text_poke_smp() passes a NULL as the third argument to __stop_machine(), which will only run stop_machine_text_poke() on 1 cpu. Change NULL -> cpu_online_mask, as stop_machine_text_poke() is intended to be run on all cpus. I actually didn't notice any problems with stop_machine_text_poke() only being called on 1 cpu, but found this via code inspection. Signed-off-by: Jason Baron LKML-Reference: <20101028152026.GB2875@redhat.com> Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a36bb90aef53..5ceeca382820 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -638,7 +638,7 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) atomic_set(&stop_machine_first, 1); wrote_text = 0; /* Use __stop_machine() because the caller already got online_cpus. */ - __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); return addr; } -- cgit v1.2.3 From 7b79462a20826a7269322113c68ca78d5f67c0bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 30 Oct 2010 01:19:29 -0700 Subject: x86: Check irq_remapped instead of remapping_enabled in destroy_irq() Russ Anderson reported: | There is a regression that is causing a NULL pointer dereference | in free_irte when shutting down xpc. git bisect narrowed it down | to git commit d585d06(intr_remap: Simplify the code further), which | changed free_irte(). Reverse applying the patch fixes the problem. We need to use irq_remapped() for each irq instead of checking only intr_remapping_enabled as there might be non remapped irqs even when remapping is enabled. [ tglx: use cfg instead of retrieving it again. Massaged changelog ] Reported-bisected-and-tested-by: Russ Anderson Signed-off-by: Yinghai Lu Cc: Suresh Siddha LKML-Reference: <4CCBD511.40607@kernel.org> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0929191d83cf..7cc0a721f628 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3109,7 +3109,7 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - if (intr_remapping_enabled) + if (irq_remapped(cfg)) free_irte(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); -- cgit v1.2.3 From cf38d0ba7efdc476815768b2b999b27cfae69747 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 1 Nov 2010 12:53:50 +0600 Subject: x86, mm: Fix section mismatch in tlb.c Mark tlb_cpuhp_notify as __cpuinit. It's basically a callback function, which is called from __cpuinit init_smp_flash(). So - it's safe. We were warned by the following warning: WARNING: arch/x86/mm/built-in.o(.text+0x356d): Section mismatch in reference from the function tlb_cpuhp_notify() to the function .cpuinit.text:calculate_tlb_offset() The function tlb_cpuhp_notify() references the function __cpuinit calculate_tlb_offset(). This is often because tlb_cpuhp_notify lacks a __cpuinit annotation or the annotation of calculate_tlb_offset is wrong. Signed-off-by: Rakib Mullick Cc: Borislav Petkov Cc: Shaohua Li LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 49358481c733..12cdbb17ad18 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -251,7 +251,7 @@ static void __cpuinit calculate_tlb_offset(void) } } -static int tlb_cpuhp_notify(struct notifier_block *n, +static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) { switch (action & 0xf) { -- cgit v1.2.3 From edde99ce05290e50ce0b3495d209e54e6349ab47 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 25 Oct 2010 03:21:24 +0200 Subject: KVM: Write protect memory after slot swap I have observed the following bug trigger: 1. userspace calls GET_DIRTY_LOG 2. kvm_mmu_slot_remove_write_access is called and makes a page ro 3. page fault happens and makes the page writeable fault is logged in the bitmap appropriately 4. kvm_vm_ioctl_get_dirty_log swaps slot pointers a lot of time passes 5. guest writes into the page 6. userspace calls GET_DIRTY_LOG At point (5), bitmap is clean and page is writeable, thus, guest modification of memory is not logged and GET_DIRTY_LOG returns an empty bitmap. The rule is that all pages are either dirty in the current bitmap, or write-protected, which is violated here. It seems that just moving kvm_mmu_slot_remove_write_access down to after the slot pointer swap should fix this bug. KVM-Stable-Tag. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2288ad829b32..b0818f672064 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3169,10 +3169,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memslots *slots, *old_slots; unsigned long *dirty_bitmap; - spin_lock(&kvm->mmu_lock); - kvm_mmu_slot_remove_write_access(kvm, log->slot); - spin_unlock(&kvm->mmu_lock); - r = -ENOMEM; dirty_bitmap = vmalloc(n); if (!dirty_bitmap) @@ -3194,6 +3190,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; kfree(old_slots); + spin_lock(&kvm->mmu_lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); + spin_unlock(&kvm->mmu_lock); + r = -EFAULT; if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { vfree(dirty_bitmap); -- cgit v1.2.3 From eb45fda45f915c7ca3e81e005e853cb770da2642 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 25 Oct 2010 11:58:22 -0200 Subject: KVM: MMU: fix rmap_remove on non present sptes drop_spte should not attempt to rmap_remove a non present shadow pte. This fixes a BUG_ON seen on kvm-autotest. Signed-off-by: Marcelo Tosatti Reported-by: Lucas Meneghel Rodrigues Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 908ea5464a51..fb8b376bf28c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -720,7 +720,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) } } -static void set_spte_track_bits(u64 *sptep, u64 new_spte) +static int set_spte_track_bits(u64 *sptep, u64 new_spte) { pfn_t pfn; u64 old_spte = *sptep; @@ -731,19 +731,20 @@ static void set_spte_track_bits(u64 *sptep, u64 new_spte) old_spte = __xchg_spte(sptep, new_spte); if (!is_rmap_spte(old_spte)) - return; + return 0; pfn = spte_to_pfn(old_spte); if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) kvm_set_pfn_dirty(pfn); + return 1; } static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) { - set_spte_track_bits(sptep, new_spte); - rmap_remove(kvm, sptep); + if (set_spte_track_bits(sptep, new_spte)) + rmap_remove(kvm, sptep); } static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) -- cgit v1.2.3 From 97e69aa62f8b5d338d6cff49be09e37cc1262838 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sat, 30 Oct 2010 22:54:47 +0400 Subject: KVM: x86: fix information leak to userland Structures kvm_vcpu_events, kvm_debugregs, kvm_pit_state2 and kvm_clock_data are copied to userland with some padding and reserved fields unitialized. It leads to leaking of contents of kernel stack memory. We have to initialize them to zero. In patch v1 Jan Kiszka suggested to fill reserved fields with zeros instead of memset'ting the whole struct. It makes sense as these fields are explicitly marked as padding. No more fields need zeroing. KVM-Stable-Tag. Signed-off-by: Vasiliy Kulikov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0818f672064..463c65b8f93f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2560,6 +2560,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; + events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; events->interrupt.injected = @@ -2573,12 +2574,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); + events->nmi.pad = 0; events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW); + memset(&events->reserved, 0, sizeof(events->reserved)); } static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, @@ -2623,6 +2626,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, dbgregs->dr6 = vcpu->arch.dr6; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; + memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -3106,6 +3110,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) sizeof(ps->channels)); ps->flags = kvm->arch.vpit->pit_state.flags; mutex_unlock(&kvm->arch.vpit->pit_state.lock); + memset(&ps->reserved, 0, sizeof(ps->reserved)); return r; } @@ -3486,6 +3491,7 @@ long kvm_arch_vm_ioctl(struct file *filp, user_ns.clock = kvm->arch.kvmclock_offset + now_ns; local_irq_enable(); user_ns.flags = 0; + memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; if (copy_to_user(argp, &user_ns, sizeof(user_ns))) -- cgit v1.2.3 From 453d9c57e27b4401bc3e98906bcac31ae8be0165 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 1 Nov 2010 14:01:13 +0100 Subject: KVM: x86: Issue smp_call_function_many with preemption disabled smp_call_function_many is specified to be called only with preemption disabled. Fulfill this requirement. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 463c65b8f93f..cdac9e592aa5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3978,8 +3978,10 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) return X86EMUL_CONTINUE; if (kvm_x86_ops->has_wbinvd_exit()) { + preempt_disable(); smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, wbinvd_ipi, NULL, 1); + preempt_enable(); cpumask_clear(vcpu->arch.wbinvd_dirty_mask); } wbinvd(); -- cgit v1.2.3 From 07cf2a64c2ad3408a0e12aa4cd6040b30c09381d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Sat, 6 Nov 2010 10:06:49 +0100 Subject: xen: fix memory leak in Xen PCI MSI/MSI-X allocator. Stanse found that xen_setup_msi_irqs leaks memory when xen_allocate_pirq fails. Free the memory in that fail path. Signed-off-by: Jiri Slaby Signed-off-by: Konrad Rzeszutek Wilk Cc: xen-devel@lists.xensource.com Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org --- arch/x86/pci/xen.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 117f5b8daf75..d7b5109f7a9c 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -147,8 +147,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) irq = xen_allocate_pirq(v[i], 0, /* not sharable */ (type == PCI_CAP_ID_MSIX) ? "pcifront-msi-x" : "pcifront-msi"); - if (irq < 0) - return -1; + if (irq < 0) { + ret = -1; + goto free; + } ret = set_irq_msi(irq, msidesc); if (ret) @@ -164,7 +166,7 @@ error: if (ret == -ENODEV) dev_err(&dev->dev, "Xen PCI frontend has not registered" \ " MSI/MSI-X support!\n"); - +free: kfree(v); return ret; } -- cgit v1.2.3 From 0059b2436a86fedb2747f654f8e10a67e97d8614 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 Nov 2010 22:20:29 +0100 Subject: x86: Address gcc4.6 "set but not used" warnings in apic.h native_apic_msr_read() and x2apic_enabled() use rdmsr(msr, low, high), but only use the low part. gcc4.6 complains about this: .../apic.h:144:11: warning: variable 'high' set but not used [-Wunused-but-set-variable] rdmsr() is just a wrapper around rdmsrl() which splits the 64bit value into low and high, so using rdmsrl() directly solves this. [tglx: Changed the variables to u64 as suggested by Cyrill. It's less confusing and has no code impact as this is 64bit only anyway. Massaged changelog as well. ] Signed-off-by: Andi Kleen Cc: x86@kernel.org Cc: Cyrill Gorcunov LKML-Reference: <1289251229-19589-1-git-send-email-andi@firstfloor.org> Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 286de34b0ed6..f6ce0bda3b98 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -141,13 +141,13 @@ static inline void native_apic_msr_write(u32 reg, u32 v) static inline u32 native_apic_msr_read(u32 reg) { - u32 low, high; + u64 msr; if (reg == APIC_DFR) return -1; - rdmsr(APIC_BASE_MSR + (reg >> 4), low, high); - return low; + rdmsrl(APIC_BASE_MSR + (reg >> 4), msr); + return (u32)msr; } static inline void native_x2apic_wait_icr_idle(void) @@ -181,12 +181,12 @@ extern void enable_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); static inline int x2apic_enabled(void) { - int msr, msr2; + u64 msr; if (!cpu_has_x2apic) return 0; - rdmsr(MSR_IA32_APICBASE, msr, msr2); + rdmsrl(MSR_IA32_APICBASE, msr); if (msr & X2APIC_ENABLE) return 1; return 0; -- cgit v1.2.3 From 8e5e9521c13ff8cf6727999999c8d88cc64b5ff7 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 9 Nov 2010 00:08:11 +0100 Subject: x86: Remove unnecessary casts of void ptr returning alloc function return values The [vk][cmz]alloc(_node) family of functions return void pointers which it's completely unnecessary/pointless to cast to other pointer types since that happens implicitly. This patch removes such casts from arch/x86. Signed-off-by: Jesper Juhl Cc: trivial@kernel.org Cc: amd64-microcode@amd64.org Cc: Andreas Herrmann LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_amd.c | 2 +- arch/x86/platform/uv/tlb_uv.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index e1af7c055c7d..ce0cb4721c9a 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -212,7 +212,7 @@ static int install_equiv_cpu_table(const u8 *buf) return 0; } - equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); + equiv_cpu_table = vmalloc(size); if (!equiv_cpu_table) { pr_err("failed to allocate equivalent CPU table\n"); return 0; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 20ea20a39e2a..a318194002b5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1343,8 +1343,8 @@ uv_activation_descriptor_init(int node, int pnode) * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub */ - bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* - UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); + bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE + * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); BUG_ON(!bau_desc); pa = uv_gpa(bau_desc); /* need the real nasid*/ @@ -1402,9 +1402,9 @@ uv_payload_queue_init(int node, int pnode) struct bau_payload_queue_entry *pqp_malloc; struct bau_control *bcp; - pqp = (struct bau_payload_queue_entry *) kmalloc_node( - (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), - GFP_KERNEL, node); + pqp = kmalloc_node((DEST_Q_SIZE + 1) + * sizeof(struct bau_payload_queue_entry), + GFP_KERNEL, node); BUG_ON(!pqp); pqp_malloc = pqp; @@ -1520,8 +1520,7 @@ static void __init uv_init_per_cpu(int nuvhubs) timeout_us = calculate_destination_timeout(); - uvhub_descs = (struct uvhub_desc *) - kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); for_each_present_cpu(cpu) { -- cgit v1.2.3 From 62b0cfc240b1d4601333912ef8760e0ca9ec2cec Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Sat, 6 Nov 2010 15:41:04 -0500 Subject: x86, UV: Update node controller MMRs A new version of the SGI UV hub node controller is being developed. A few of the MMRs (control registers) that exist on the current hub no longer exist on the new hub. Fortunately, there are alternate MMRs that are are functionally equivalent and that exist on both hubs. This patch changes the UV code to use MMRs that exist in BOTH versions of the hub node controller. Signed-off-by: Jack Steiner LKML-Reference: <20101106204056.GA27584@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_mmrs.h | 189 +++++++++++++++++++------------------ arch/x86/kernel/apic/x2apic_uv_x.c | 12 +-- 2 files changed, 102 insertions(+), 99 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index b2f2d2e05cec..6d90adf4428a 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -805,6 +805,78 @@ union uvh_node_present_table_u { } s; }; +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL + +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_alias210_overlay_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL + +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_alias210_overlay_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL + +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_alias210_overlay_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ /* ========================================================================= */ @@ -856,6 +928,29 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { } s; }; +/* ========================================================================= */ +/* UVH_RH_GAM_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL + +#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL +#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 +#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL + +union uvh_rh_gam_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_config_mmr_s { + unsigned long m_skt : 6; /* RW */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_10_11: 2; /* */ + unsigned long mmiol_cfg : 1; /* RW */ + unsigned long rsvd_13_63: 51; /* */ + } s; +}; + /* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ @@ -987,97 +1082,5 @@ union uvh_rtc1_int_config_u { } s; }; -/* ========================================================================= */ -/* UVH_SI_ADDR_MAP_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL - -#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0 -#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL -#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8 -#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL - -union uvh_si_addr_map_config_u { - unsigned long v; - struct uvh_si_addr_map_config_s { - unsigned long m_skt : 6; /* RW */ - unsigned long rsvd_6_7: 2; /* */ - unsigned long n_skt : 4; /* RW */ - unsigned long rsvd_12_63: 52; /* */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL - -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias0_overlay_config_u { - unsigned long v; - struct uvh_si_alias0_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL - -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias1_overlay_config_u { - unsigned long v; - struct uvh_si_alias1_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -/* ========================================================================= */ -/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ -/* ========================================================================= */ -#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL - -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 -#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL - -union uvh_si_alias2_overlay_config_u { - unsigned long v; - struct uvh_si_alias2_overlay_config_s { - unsigned long rsvd_0_23: 24; /* */ - unsigned long base : 8; /* RW */ - unsigned long rsvd_32_47: 16; /* */ - unsigned long m_alias : 5; /* RW */ - unsigned long rsvd_53_62: 10; /* */ - unsigned long enable : 1; /* RW */ - } s; -}; - -#endif /* _ASM_X86_UV_UV_MMRS_H */ +#endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ed4118de249e..194539aea175 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -379,14 +379,14 @@ struct redir_addr { #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT static __initdata struct redir_addr redir_addrs[] = { - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, - {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR}, }; static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) { - union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; int i; @@ -660,7 +660,7 @@ void uv_nmi_init(void) void __init uv_system_init(void) { - union uvh_si_addr_map_config_u m_n_config; + union uvh_rh_gam_config_mmr_u m_n_config; union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; @@ -670,7 +670,7 @@ void __init uv_system_init(void) map_low_mmrs(); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; mmr_base = -- cgit v1.2.3 From 2f62bf7d238f6dfa39faf24c746d0b8dd60f85c5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 4 Nov 2010 15:23:58 +0000 Subject: x86: Adjust section annotations in AMD Fam10 MMCONF enabling code check_enable_amd_mmconf_dmi() gets called only for the BSP, hence everything hanging off of it can be __init*. Signed-off-by: Jan Beulich Acked-by: Yinghai Lu LKML-Reference: <4CD2DE1E0200007800020990@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/mmconf-fam10h_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 71825806cd44..6da143c2a6b8 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -217,13 +217,13 @@ void __cpuinit fam10h_check_enable_mmcfg(void) wrmsrl(address, val); } -static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) +static int __init set_check_enable_amd_mmconf(const struct dmi_system_id *d) { pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; return 0; } -static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { +static const struct dmi_system_id __initconst mmconf_dmi_table[] = { { .callback = set_check_enable_amd_mmconf, .ident = "Sun Microsystems Machine", @@ -234,7 +234,8 @@ static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = { {} }; -void __cpuinit check_enable_amd_mmconf_dmi(void) +/* Called from a __cpuinit function, but only on the BSP. */ +void __ref check_enable_amd_mmconf_dmi(void) { dmi_check_system(mmconf_dmi_table); } -- cgit v1.2.3 From 2a8dcbd6cd2270f912ca141547d9296ce08abe4a Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 7 Nov 2010 22:57:18 +0100 Subject: x86, apic: Remove double #include Remove the second inclusion. Signed-off-by: Jesper Juhl LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 850657d1b0ed..3f838d537392 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -52,7 +52,6 @@ #include #include #include -#include unsigned int num_processors; -- cgit v1.2.3 From 1f523bf36734375dd6e986c9f47f010d00a8caca Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Fri, 5 Nov 2010 20:04:42 +0900 Subject: x86, pvclock: Remove leftover scale_delta() function Commit 92580d64e16402762e2acc3022f065397c780425 ("x86: pvclock: Move scale_delta into common header") forgot to remove scale_delta. Signed-off-by: Kusanagi Kouichi Cc: Zachary Amsden Cc: Marcelo Tosatti Cc: Glauber Costa LKML-Reference: <20101105110444.BAF6D6FC03B@msa105.auone-net.jp> Signed-off-by: Ingo Molnar --- arch/x86/kernel/pvclock.c | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index bab3b9e6f66d..008b91eefa18 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -41,44 +41,6 @@ void pvclock_set_flags(u8 flags) valid_flags = flags; } -/* - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, - * yielding a 64-bit result. - */ -static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) -{ - u64 product; -#ifdef __i386__ - u32 tmp1, tmp2; -#endif - - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - -#ifdef __i386__ - __asm__ ( - "mul %5 ; " - "mov %4,%%eax ; " - "mov %%edx,%4 ; " - "mul %5 ; " - "xor %5,%5 ; " - "add %4,%%eax ; " - "adc %5,%%edx ; " - : "=A" (product), "=r" (tmp1), "=r" (tmp2) - : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); -#elif defined(__x86_64__) - __asm__ ( - "mul %%rdx ; shrd $32,%%rdx,%%rax" - : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); -#else -#error implement me! -#endif - - return product; -} - static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) { u64 delta = native_read_tsc() - shadow->tsc_timestamp; -- cgit v1.2.3 From 034c6efa4616e5ff6253549e973e7fef12899324 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Nov 2010 18:52:05 +0100 Subject: perf, amd: Use kmalloc_node(,__GFP_ZERO) for northbridge structure allocation Jasper suggested we use the zeroing capability of the allocators instead of calling memset ourselves. Add node affinity while we're at it. Reported-by: Jesper Juhl Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 46d58448c3af..e421b8cd6944 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -280,11 +280,11 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) struct amd_nb *nb; int i; - nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); + nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); if (!nb) return NULL; - memset(nb, 0, sizeof(*nb)); nb->nb_id = nb_id; /* -- cgit v1.2.3 From 4723d0f2f96e6c910f951d595067eb31e0dd2d01 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 22 Sep 2010 11:09:19 -0600 Subject: x86/PCI: coalesce overlapping host bridge windows Some BIOSes provide PCI host bridge windows that overlap, e.g., pci_root PNP0A03:00: host bridge window [mem 0xb0000000-0xffffffff] pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xdfffffff] pci_root PNP0A03:00: host bridge window [mem 0xf0000000-0xffffffff] If we simply insert these as children of iomem_resource, the second window fails because it conflicts with the first, and the third is inserted as a child of the first, i.e., b0000000-ffffffff PCI Bus 0000:00 f0000000-ffffffff PCI Bus 0000:00 When we claim PCI device resources, this can cause collisions like this if we put them in the first window: pci 0000:00:01.0: address space collision: [mem 0xff300000-0xff4fffff] conflicts with PCI Bus 0000:00 [mem 0xf0000000-0xffffffff] Host bridge windows are top-level resources by definition, so it doesn't make sense to make the third window a child of the first. This patch coalesces any host bridge windows that overlap. For the example above, the result is this single window: pci_root PNP0A03:00: host bridge window [mem 0xafffffff-0xffffffff] This fixes a 2.6.34 regression. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=17011 Reported-and-tested-by: Anisse Astier Reported-and-tested-by: Pramod Dematagoda Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 103 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba5..0972315c3860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) struct acpi_resource_address64 addr; acpi_status status; unsigned long flags; - struct resource *root, *conflict; u64 start, end; status = resource_to_addr(acpi_res, &addr); @@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; if (addr.resource_type == ACPI_MEMORY_RANGE) { - root = &iomem_resource; flags = IORESOURCE_MEM; if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) flags |= IORESOURCE_PREFETCH; } else if (addr.resource_type == ACPI_IO_RANGE) { - root = &ioport_resource; flags = IORESOURCE_IO; } else return AE_OK; @@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data) return AE_OK; } - conflict = insert_resource_conflict(root, res); - if (conflict) { - dev_err(&info->bridge->dev, - "address space collision: host bridge window %pR " - "conflicts with %s %pR\n", - res, conflict->name, conflict); - } else { - pci_bus_add_resource(info->bus, res, 0); - info->res_num++; - if (addr.translation_offset) - dev_info(&info->bridge->dev, "host bridge window %pR " - "(PCI address [%#llx-%#llx])\n", - res, res->start - addr.translation_offset, - res->end - addr.translation_offset); + info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, "host bridge window %pR\n", res); + + return AE_OK; +} + +static bool resource_contains(struct resource *res, resource_size_t point) +{ + if (res->start <= point && point <= res->end) + return true; + return false; +} + +static void coalesce_windows(struct pci_root_info *info, int type) +{ + int i, j; + struct resource *res1, *res2; + + for (i = 0; i < info->res_num; i++) { + res1 = &info->res[i]; + if (!(res1->flags & type)) + continue; + + for (j = i + 1; j < info->res_num; j++) { + res2 = &info->res[j]; + if (!(res2->flags & type)) + continue; + + /* + * I don't like throwing away windows because then + * our resources no longer match the ACPI _CRS, but + * the kernel resource tree doesn't allow overlaps. + */ + if (resource_contains(res1, res2->start) || + resource_contains(res1, res2->end) || + resource_contains(res2, res1->start) || + resource_contains(res2, res1->end)) { + res1->start = min(res1->start, res2->start); + res1->end = max(res1->end, res2->end); + dev_info(&info->bridge->dev, + "host bridge window expanded to %pR; %pR ignored\n", + res1, res2); + res2->flags = 0; + } + } + } +} + +static void add_resources(struct pci_root_info *info) +{ + int i; + struct resource *res, *root, *conflict; + + if (!pci_use_crs) + return; + + coalesce_windows(info, IORESOURCE_MEM); + coalesce_windows(info, IORESOURCE_IO); + + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; else - dev_info(&info->bridge->dev, - "host bridge window %pR\n", res); + continue; + + conflict = insert_resource_conflict(root, res); + if (conflict) + dev_err(&info->bridge->dev, + "address space collision: host bridge window %pR " + "conflicts with %s %pR\n", + res, conflict->name, conflict); + else + pci_bus_add_resource(info->bus, res, 0); } - return AE_OK; } static void @@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum, acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, &info); + add_resources(&info); return; name_alloc_fail: -- cgit v1.2.3 From e060e7af98182494b764d002eba7fa022fe91bdf Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 11 Nov 2010 12:37:43 -0800 Subject: xen: set vma flag VM_PFNMAP in the privcmd mmap file_op Set VM_PFNMAP in the privcmd mmap file_op, rather than later in xen_remap_domain_mfn_range when it is too late because vma_wants_writenotify has already been called and vm_page_prot has already been modified. Signed-off-by: Stefano Stabellini Signed-off-by: Jeremy Fitzhardinge --- arch/x86/xen/mmu.c | 3 ++- drivers/xen/xenfs/privcmd.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f08ea045620f..792de4349c79 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2299,7 +2299,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == + (VM_PFNMAP | VM_RESERVED | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c index 2eb04c842511..88474d460d82 100644 --- a/drivers/xen/xenfs/privcmd.c +++ b/drivers/xen/xenfs/privcmd.c @@ -380,8 +380,9 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) if (xen_feature(XENFEAT_auto_translated_physmap)) return -ENOSYS; - /* DONTCOPY is essential for Xen as copy_page_range is broken. */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; + /* DONTCOPY is essential for Xen because copy_page_range doesn't know + * how to recreate these mappings */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; vma->vm_ops = &privcmd_vm_ops; vma->vm_private_data = NULL; -- cgit v1.2.3