From 0e48026ae7abf871e51eaa9183c81ab5bef4c267 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 19 Mar 2013 16:10:38 +0100 Subject: perf/x86: Fix uninitialized pt_regs in intel_pmu_drain_bts_buffer() This patch fixes an uninitialized pt_regs struct in drain BTS function. The pt_regs struct is propagated all the way to the code_get_segment() function from perf_instruction_pointer() and may get garbage. We cannot simply inherit the actual pt_regs from the interrupt because BTS must be flushed on context-switch or when the associated event is disabled. And there we do not have a pt_regs handy. Setting pt_regs to all zeroes may not be the best option but it is not clear what else to do given where the drain_bts_buffer() is called from. In V2, we move the memset() later in the code to avoid doing it when we end up returning early without doing the actual BTS processing. Also dropped the reg.val initialization because it is redundant with the memset() as suggested by PeterZ. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: peterz@infradead.org Cc: sqazi@google.com Cc: ak@linux.intel.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/20130319151038.GA25439@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a4f2ee..f71c9f09fd4b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -314,10 +314,11 @@ int intel_pmu_drain_bts_buffer(void) if (top <= at) return 0; + memset(®s, 0, sizeof(regs)); + ds->bts_index = ds->bts_buffer_base; perf_sample_data_init(&data, 0, event->hw.last_period); - regs.ip = 0; /* * Prepare a generic sample, i.e. fill in the invariant fields. -- cgit v1.2.3 From 4cc3daaf3976bde5345718e86b67cace2f935a09 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jan 2013 20:48:55 +0000 Subject: ARM: tlbflush: remove ARMv3 support We no longer support any ARMv3 platforms, so remove the old tlbflushing code. Signed-off-by: Will Deacon --- arch/arm/include/asm/tlbflush.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..a223003b0f17 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -14,7 +14,6 @@ #include -#define TLB_V3_PAGE (1 << 0) #define TLB_V4_U_PAGE (1 << 1) #define TLB_V4_D_PAGE (1 << 2) #define TLB_V4_I_PAGE (1 << 3) @@ -22,7 +21,6 @@ #define TLB_V6_D_PAGE (1 << 5) #define TLB_V6_I_PAGE (1 << 6) -#define TLB_V3_FULL (1 << 8) #define TLB_V4_U_FULL (1 << 9) #define TLB_V4_D_FULL (1 << 10) #define TLB_V4_I_FULL (1 << 11) @@ -52,7 +50,6 @@ * ============= * * We have the following to choose from: - * v3 - ARMv3 * v4 - ARMv4 without write buffer * v4wb - ARMv4 with write buffer without I TLB flush entry instruction * v4wbi - ARMv4 with write buffer with I TLB flush entry instruction @@ -330,7 +327,6 @@ static inline void local_flush_tlb_all(void) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL | TLB_V6_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL | TLB_V6_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL | TLB_V6_I_FULL, "c8, c5, 0", zero); @@ -351,9 +347,8 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_FULL|TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { + if (possible_tlb_flags & (TLB_V4_U_FULL|TLB_V4_D_FULL|TLB_V4_I_FULL)) { if (cpumask_test_cpu(get_cpu(), mm_cpumask(mm))) { - tlb_op(TLB_V3_FULL, "c6, c0, 0", zero); tlb_op(TLB_V4_U_FULL, "c8, c7, 0", zero); tlb_op(TLB_V4_D_FULL, "c8, c6, 0", zero); tlb_op(TLB_V4_I_FULL, "c8, c5, 0", zero); @@ -385,9 +380,8 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) if (tlb_flag(TLB_WB)) dsb(); - if (possible_tlb_flags & (TLB_V3_PAGE|TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && + if (possible_tlb_flags & (TLB_V4_U_PAGE|TLB_V4_D_PAGE|TLB_V4_I_PAGE|TLB_V4_I_FULL) && cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { - tlb_op(TLB_V3_PAGE, "c6, c0, 0", uaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", uaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", uaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", uaddr); @@ -418,7 +412,6 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) if (tlb_flag(TLB_WB)) dsb(); - tlb_op(TLB_V3_PAGE, "c6, c0, 0", kaddr); tlb_op(TLB_V4_U_PAGE, "c8, c7, 1", kaddr); tlb_op(TLB_V4_D_PAGE, "c8, c6, 1", kaddr); tlb_op(TLB_V4_I_PAGE, "c8, c5, 1", kaddr); -- cgit v1.2.3 From 82d9b0d0c6c4cbd5d0022d7df5e6bf071a9eb6c7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Jan 2013 12:07:40 +0000 Subject: ARM: cache: remove ARMv3 support code This is only used by 740t, which is a v4 core and (by my reading of the datasheet for the CPU) ignores CRm for the cp15 cache flush operation, making the v4 cache implementation in cache-v4.S sufficient for this CPU. Tested with 740T core-tile on Integrator/AP baseboard. Acked-by: Hyok S. Choi Acked-by: Greg Ungerer Signed-off-by: Will Deacon --- arch/arm/include/asm/glue-cache.h | 8 --- arch/arm/mm/Kconfig | 5 +- arch/arm/mm/Makefile | 1 - arch/arm/mm/cache-v3.S | 137 -------------------------------------- arch/arm/mm/proc-arm740.S | 2 +- 5 files changed, 2 insertions(+), 151 deletions(-) delete mode 100644 arch/arm/mm/cache-v3.S (limited to 'arch') diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index cca9f15704ed..ea289e1435e7 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -19,14 +19,6 @@ #undef _CACHE #undef MULTI_CACHE -#if defined(CONFIG_CPU_CACHE_V3) -# ifdef _CACHE -# define MULTI_CACHE 1 -# else -# define _CACHE v3 -# endif -#endif - #if defined(CONFIG_CPU_CACHE_V4) # ifdef _CACHE # define MULTI_CACHE 1 diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 025d17328730..4045c4931a30 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -43,7 +43,7 @@ config CPU_ARM740T depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T - select CPU_CACHE_V3 # although the core is v4t + select CPU_CACHE_V4 select CPU_CP15_MPU select CPU_PABRT_LEGACY help @@ -469,9 +469,6 @@ config CPU_PABRT_V7 bool # The cache model -config CPU_CACHE_V3 - bool - config CPU_CACHE_V4 bool diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 4e333fa2756f..9e51be96f635 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o -obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S deleted file mode 100644 index 8a3fadece8d3..000000000000 --- a/arch/arm/mm/cache-v3.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * linux/arch/arm/mm/cache-v3.S - * - * Copyright (C) 1997-2002 Russell king - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include "proc-macros.S" - -/* - * flush_icache_all() - * - * Unconditionally clean and invalidate the entire icache. - */ -ENTRY(v3_flush_icache_all) - mov pc, lr -ENDPROC(v3_flush_icache_all) - -/* - * flush_user_cache_all() - * - * Invalidate all cache entries in a particular address - * space. - * - * - mm - mm_struct describing address space - */ -ENTRY(v3_flush_user_cache_all) - /* FALLTHROUGH */ -/* - * flush_kern_cache_all() - * - * Clean and invalidate the entire cache. - */ -ENTRY(v3_flush_kern_cache_all) - /* FALLTHROUGH */ - -/* - * flush_user_cache_range(start, end, flags) - * - * Invalidate a range of cache entries in the specified - * address space. - * - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) - * - flags - vma_area_struct flags describing address space - */ -ENTRY(v3_flush_user_cache_range) - mov ip, #0 - mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * coherent_kern_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_kern_range) - /* FALLTHROUGH */ - -/* - * coherent_user_range(start, end) - * - * Ensure coherency between the Icache and the Dcache in the - * region described by start. If you have non-snooping - * Harvard caches, you need to implement this function. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_coherent_user_range) - mov r0, #0 - mov pc, lr - -/* - * flush_kern_dcache_area(void *page, size_t size) - * - * Ensure no D cache aliasing occurs, either with itself or - * the I cache - * - * - addr - kernel address - * - size - region size - */ -ENTRY(v3_flush_kern_dcache_area) - /* FALLTHROUGH */ - -/* - * dma_flush_range(start, end) - * - * Clean and invalidate the specified virtual address range. - * - * - start - virtual start address - * - end - virtual end address - */ -ENTRY(v3_dma_flush_range) - mov r0, #0 - mcr p15, 0, r0, c7, c0, 0 @ flush ID cache - mov pc, lr - -/* - * dma_unmap_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_unmap_area) - teq r2, #DMA_TO_DEVICE - bne v3_dma_flush_range - /* FALLTHROUGH */ - -/* - * dma_map_area(start, size, dir) - * - start - kernel virtual start address - * - size - size of region - * - dir - DMA direction - */ -ENTRY(v3_dma_map_area) - mov pc, lr -ENDPROC(v3_dma_unmap_area) -ENDPROC(v3_dma_map_area) - - .globl v3_flush_kern_cache_louis - .equ v3_flush_kern_cache_louis, v3_flush_kern_cache_all - - __INITDATA - - @ define struct cpu_cache_fns (see and proc-macros.S) - define_cache_functions v3 diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index dc5de5d53f20..2088234978c4 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -145,5 +145,5 @@ __arm740_proc_info: .long arm740_processor_functions .long 0 .long 0 - .long v3_cache_fns @ cache model + .long v4_cache_fns @ cache model .size __arm740_proc_info, . - __arm740_proc_info -- cgit v1.2.3 From 3ef52f2a00efc5f83ae6d40e55cae96ce275893f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 10:37:51 +0000 Subject: ARM: mm: fix numerous hideous errors in proc-arm740.S The setup code in proc-arm740.S is completely broken and, as far as I can tell, always has been. I was >this< close to ripping it out, when a 740t core-tile materialised in the office, so I've had a crack at fixing things up: - Fix the ram/flash area calculations so that we actually set the condition flags before testing them... - Fix the proc_info structure so that __cpu_io_mmu_flags are defined as 0, placing the __cpu_flush pointer at the correct offset - Re-number the registers used during __arm740_setup so that we don't clobber the machine ID et al - Advertise Thumb support via the hwcaps, since 740T is the only 740 implementation. Acked-by: Hyok S. Choi Signed-off-by: Will Deacon --- arch/arm/mm/proc-arm740.S | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 2088234978c4..fde2d2a794cf 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -77,24 +77,27 @@ __arm740_setup: mcr p15, 0, r0, c6, c0 @ set area 0, default ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM - ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit mcr p15, 0, r0, c6, c1 @ set area 1, RAM ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH - ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) - mov r2, #10 @ 11 is the minimum (4KB) -1: add r2, r2, #1 @ area size *= 2 - mov r1, r1, lsr #1 + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 bne 1b @ count not zero r-shift - orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, r4, lsl #1 @ the area register value orr r0, r0, #1 @ set enable bit - mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH mov r0, #0x06 mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable @@ -137,10 +140,11 @@ __arm740_proc_info: .long 0x41807400 .long 0xfffffff0 .long 0 + .long 0 b __arm740_setup .long cpu_arch_name .long cpu_elf_name - .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT .long cpu_arm740_name .long arm740_processor_functions .long 0 -- cgit v1.2.3 From 794fe85da484353dedcb5dc6da14d923d0645fc3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 19:11:38 +0000 Subject: ARM: mm: remove broken condition check for v4 flushing There's no point having a conditional cache flush if we don't know the state of the condition beforehand. This patch makes the cacheflush in v4_flush_user_cache_range unconditional. signed-off-by: will deacon --- arch/arm/mm/cache-v4.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 43e5d77be677..a7ba68f59f0c 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -58,7 +58,7 @@ ENTRY(v4_flush_kern_cache_all) ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 - mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache mov pc, lr #else /* FALLTHROUGH */ -- cgit v1.2.3 From d455bac223d6f9858b29a29272756243ec3d3904 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 22 Jan 2013 11:00:54 +0000 Subject: ARM: modules: don't export cpu_set_pte_ext when !MMU cpu_set_pte_ext is only guaranteed to be defined when CONFIG_MMU, so don't export it to modules otherwise. Signed-off-by: Will Deacon --- arch/arm/mm/proc-syms.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c index 3e6210b4d6d4..054b491ff764 100644 --- a/arch/arm/mm/proc-syms.c +++ b/arch/arm/mm/proc-syms.c @@ -17,7 +17,9 @@ #ifndef MULTI_CPU EXPORT_SYMBOL(cpu_dcache_clean_area); +#ifdef CONFIG_MMU EXPORT_SYMBOL(cpu_set_pte_ext); +#endif #else EXPORT_SYMBOL(processor); #endif -- cgit v1.2.3 From 71196a26d85be2aae2e587dfd3c3190a651133c6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 27 Mar 2013 02:20:45 +0000 Subject: sparc:remove unused declaration smp_boot_cpus() smp_boot_cpus() was replaced smp_prepare_cpus() long ago, and it no longer needed, so delete it. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index b73da3c5f10a..68503d3d1b9e 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -46,7 +46,6 @@ void sun4m_init_smp(void); void sun4d_init_smp(void); void smp_callin(void); -void smp_boot_cpus(void); void smp_store_cpu_info(int); void smp_resched_interrupt(void); -- cgit v1.2.3 From 54df2db36c93bbb8c757f172d97c577040de6abb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:43 +0000 Subject: sparc/srmmu: clear trailing edge of bitmap properly srmmu_nocache_bitmap is cleared by bit_map_init(). But bit_map_init() attempts to clear by memset(), so it can't clear the trailing edge of bitmap properly on big-endian architecture if the number of bits is not a multiple of BITS_PER_LONG. Actually, the number of bits in srmmu_nocache_bitmap is not always a multiple of BITS_PER_LONG. It is calculated as below: bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT; srmmu_nocache_size is decided proportionally by the amount of system RAM and it is rounded to a multiple of PAGE_SIZE. SRMMU_NOCACHE_BITMAP_SHIFT is defined as (PAGE_SHIFT - 4). So it can only be said that bitmap_bits is a multiple of 16. This fixes the problem by using bitmap_clear() instead of memset() in bit_map_init() and this also uses BITS_TO_LONGS() to calculate correct size at bitmap allocation time. Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/lib/bitext.c | 6 +----- arch/sparc/mm/srmmu.c | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c index 48d00e72ce15..8ec4e9c0251a 100644 --- a/arch/sparc/lib/bitext.c +++ b/arch/sparc/lib/bitext.c @@ -119,11 +119,7 @@ void bit_map_clear(struct bit_map *t, int offset, int len) void bit_map_init(struct bit_map *t, unsigned long *map, int size) { - - if ((size & 07) != 0) - BUG(); - memset(map, 0, size>>3); - + bitmap_zero(map, size); memset(t, 0, sizeof *t); spin_lock_init(&t->lock); t->map = map; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index c38bb72e3e80..036c2797dece 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -280,7 +280,9 @@ static void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); -- cgit v1.2.3 From 9a0ac1b6af111f2fa0f2a39e9bbbebb74da7d26a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 29 Mar 2013 03:44:44 +0000 Subject: sparc/iommu: fix typo s/265KB/256KB/ IOMMU_NPTES is 64K PTEs, so the size is 256KB (= 64K * sizeof(iopte_t)) Signed-off-by: Akinobu Mita Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/mm/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 0f4f7191fbba..28f96f27c768 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -34,7 +34,7 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ /* srmmu.c */ -- cgit v1.2.3 From bf3aece8af91eabea2cfb2cbe528abc394695163 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 30 Mar 2013 00:28:26 +0000 Subject: sparc:cleanup unused code in smp_32.h After genirq and generic clockevent support at sparc32, smp4m_irq_rotate(), prof_multiplier() and prof_counter() are no longer used and should be removed. Find more info from commit 6baa9b20 & 62f08283. Signed-off-by: Kefeng Wang Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/smp_32.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 68503d3d1b9e..3c8917f054de 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -36,7 +36,6 @@ typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); void cpu_panic(void); -extern void smp4m_irq_rotate(int cpu); /* * General functions that each host system must provide. @@ -106,9 +105,6 @@ extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) -#define prof_multiplier(__cpu) cpu_data(__cpu).multiplier -#define prof_counter(__cpu) cpu_data(__cpu).counter - void smp_setup_cpu_possible_map(void); #endif /* !(__ASSEMBLY__) */ -- cgit v1.2.3 From a2d34dd41212032c03e77bc30c2023725def841a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 30 Mar 2013 11:44:22 +0000 Subject: sparc: use generic headers Use "generic-y" to add generic headers where possible Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 4 ++++ arch/sparc/include/asm/cputime.h | 6 ------ arch/sparc/include/asm/emergency-restart.h | 6 ------ arch/sparc/include/asm/mutex.h | 9 --------- arch/sparc/include/asm/serial.h | 6 ------ 5 files changed, 4 insertions(+), 27 deletions(-) delete mode 100644 arch/sparc/include/asm/cputime.h delete mode 100644 arch/sparc/include/asm/emergency-restart.h delete mode 100644 arch/sparc/include/asm/mutex.h delete mode 100644 arch/sparc/include/asm/serial.h (limited to 'arch') diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index e26d430ce2fd..f73884bb286a 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,11 +2,15 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += div64.h +generic-y += emergency-restart.h generic-y += exec.h generic-y += local64.h +generic-y += mutex.h generic-y += irq_regs.h generic-y += local.h generic-y += module.h +generic-y += serial.h generic-y += trace_clock.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/asm/cputime.h b/arch/sparc/include/asm/cputime.h deleted file mode 100644 index 1a642b81e019..000000000000 --- a/arch/sparc/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_CPUTIME_H -#define __SPARC_CPUTIME_H - -#include - -#endif /* __SPARC_CPUTIME_H */ diff --git a/arch/sparc/include/asm/emergency-restart.h b/arch/sparc/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c48e42e..000000000000 --- a/arch/sparc/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sparc/include/asm/mutex.h b/arch/sparc/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/sparc/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/sparc/include/asm/serial.h b/arch/sparc/include/asm/serial.h deleted file mode 100644 index f90d61c28059..000000000000 --- a/arch/sparc/include/asm/serial.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SERIAL_H -#define __SPARC_SERIAL_H - -#define BASE_BAUD ( 1843200 / 16 ) - -#endif /* __SPARC_SERIAL_H */ -- cgit v1.2.3 From cbf1ef6b3345d2cc7e62407eec6a6f72a8b1346f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 31 Mar 2013 07:01:47 +0000 Subject: sparc: use asm-generic version of types.h In sparc headers we use the following pattern: #if defined(__sparc__) && defined(__arch64__) sparc64 specific stuff #else sparc32 specific stuff #endif In types.h this pattern was not followed and here we only checked for __sparc__ for no good reason. It was a left-over from long time ago. I checked other architectures - and most of them do not have any such checks. And all the recently merged versions uses the asm-generic version. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/Kbuild | 1 - arch/sparc/include/uapi/asm/types.h | 17 ----------------- 3 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/sparc/include/uapi/asm/types.h (limited to 'arch') diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index f73884bb286a..ff18e3cfb6b1 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -13,4 +13,5 @@ generic-y += local.h generic-y += module.h generic-y += serial.h generic-y += trace_clock.h +generic-y += types.h generic-y += word-at-a-time.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index ce175aff71b7..b5843ee09fb5 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -44,7 +44,6 @@ header-y += swab.h header-y += termbits.h header-y += termios.h header-y += traps.h -header-y += types.h header-y += uctx.h header-y += unistd.h header-y += utrap.h diff --git a/arch/sparc/include/uapi/asm/types.h b/arch/sparc/include/uapi/asm/types.h deleted file mode 100644 index 383d156cde9c..000000000000 --- a/arch/sparc/include/uapi/asm/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _SPARC_TYPES_H -#define _SPARC_TYPES_H -/* - * This file is never included by application software unless - * explicitly requested (e.g., via linux/types.h) in which case the - * application is Linux specific so (user-) name space pollution is - * not a major issue. However, for interoperability, libraries still - * need to be careful to avoid a name clashes. - */ - -#if defined(__sparc__) - -#include - -#endif /* defined(__sparc__) */ - -#endif /* defined(_SPARC_TYPES_H) */ -- cgit v1.2.3 From 8f08d6667287241f6818d35e02b223fb5df97cf1 Mon Sep 17 00:00:00 2001 From: Nigel Roberts Date: Mon, 1 Apr 2013 23:03:22 +1100 Subject: ARM: Kirkwood: Fix typo in the definition of ix2-200 rebuild LED In the conversion to pinctrl, an error in the pins for the rebuild LED was introduced. This patch assigns the correct pins and includes the correct name for the LED in kirkwood-iomega_ix2_200.dts. Signed-off-by: Nigel Roberts Cc: # v3.8.x Signed-off-by: Jason Cooper --- arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts index 93c3afbef9ee..3694e94f6e99 100644 --- a/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts +++ b/arch/arm/boot/dts/kirkwood-iomega_ix2_200.dts @@ -96,11 +96,11 @@ marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_1: pmx-led-rebuild-brt-ctrl-1 { - marvell,pins = "mpp44"; + marvell,pins = "mpp46"; marvell,function = "gpio"; }; pmx_led_rebuild_brt_ctrl_2: pmx-led-rebuild-brt-ctrl-2 { - marvell,pins = "mpp45"; + marvell,pins = "mpp47"; marvell,function = "gpio"; }; @@ -157,14 +157,14 @@ gpios = <&gpio0 16 0>; linux,default-trigger = "default-on"; }; - health_led1 { + rebuild_led { + label = "status:white:rebuild_led"; + gpios = <&gpio1 4 0>; + }; + health_led { label = "status:red:health_led"; gpios = <&gpio1 5 0>; }; - health_led2 { - label = "status:white:health_led"; - gpios = <&gpio1 4 0>; - }; backup_led { label = "status:blue:backup_led"; gpios = <&gpio0 15 0>; -- cgit v1.2.3 From b22227944b8fe92b19150b4c36421e37979d9a16 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 29 Mar 2013 10:20:56 -0400 Subject: xen/mmu: On early bootup, flush the TLB when changing RO->RW bits Xen provided pagetables. Occassionaly on a DL380 G4 the guest would crash quite early with this: (XEN) d244:v0: unhandled page fault (ec=0003) (XEN) Pagetable walk from ffffffff84dc7000: (XEN) L4[0x1ff] = 00000000c3f18067 0000000000001789 (XEN) L3[0x1fe] = 00000000c3f14067 000000000000178d (XEN) L2[0x026] = 00000000dc8b2067 0000000000004def (XEN) L1[0x1c7] = 00100000dc8da067 0000000000004dc7 (XEN) domain_crash_sync called from entry.S (XEN) Domain 244 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.1.3OVM x86_64 debug=n Not tainted ]---- (XEN) CPU: 3 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000216 EM: 1 CONTEXT: pv guest (XEN) rax: 0000000000000000 rbx: ffffffff81785f88 rcx: 000000000000003f (XEN) rdx: 0000000000000000 rsi: 00000000dc8da063 rdi: ffffffff84dc7000 The offending code shows it to be a loop writting the value zero (%rax) in the %rdi (the L4 provided by Xen) register: 0: 44 00 00 add %r8b,(%rax) 3: 31 c0 xor %eax,%eax 5: b9 40 00 00 00 mov $0x40,%ecx a: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 11: 00 00 13: ff c9 dec %ecx 15:* 48 89 07 mov %rax,(%rdi) <-- trapping instruction 18: 48 89 47 08 mov %rax,0x8(%rdi) 1c: 48 89 47 10 mov %rax,0x10(%rdi) which fails. xen_setup_kernel_pagetable recycles some of the Xen's page-table entries when it has switched over to its Linux page-tables. Right before try to clear the page, we make a hypercall to change it from _RO to _RW and that works (otherwise we would hit an BUG()). And the _RW flag is set for that page: (XEN) L1[0x1c7] = 001000004885f067 0000000000004dc7 The error code is 3, so PFEC_page_present and PFEC_write_access, so page is present (correct), and we tried to write to the page, but a violation occurred. The one theory is that the the page entries in hardware (which are cached) are not up to date with what we just set. Especially as we have just done an CR3 write and flushed the multicalls. This patch does solve the problem by flusing out the TLB page entry after changing it from _RO to _RW and we don't hit this issue anymore. Fixed-Oracle-Bug: 16243091 [ON OCCASIONS VM START GOES INTO 'CRASH' STATE: CLEAR_PAGE+0X12 ON HP DL380 G4] Reported-and-Tested-by: Saar Maoz Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6afbb2ca9a0a..a4ea92477e01 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1748,14 +1748,18 @@ static void *m2v(phys_addr_t maddr) } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) +static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } +static void set_page_prot(void *addr, pgprot_t prot) +{ + return set_page_prot_flags(addr, prot, UVMF_NONE); +} #ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { @@ -1839,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, unsigned long addr) { if (*pt_base == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_base)++; } if (*pt_end == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_end)--; } -- cgit v1.2.3 From 8b4b9f27e57584f3d90e0bb84cf800ad81cfe3a1 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 15 Feb 2013 12:21:43 -0500 Subject: x86: remove the x32 syscall bitmask from syscall_get_nr() Commit fca460f95e928bae373daa8295877b6905bc62b8 simplified the x32 implementation by creating a syscall bitmask, equal to 0x40000000, that could be applied to x32 syscalls such that the masked syscall number would be the same as a x86_64 syscall. While that patch was a nice way to simplify the code, it went a bit too far by adding the mask to syscall_get_nr(); returning the masked syscall numbers can cause confusion with callers that expect syscall numbers matching the x32 ABI, e.g. unmasked syscall numbers. This patch fixes this by simply removing the mask from syscall_get_nr() while preserving the other changes from the original commit. While there are several syscall_get_nr() callers in the kernel, most simply check that the syscall number is greater than zero, in this case this patch will have no effect. Of those remaining callers, they appear to be few, seccomp and ftrace, and from my testing of seccomp without this patch the original commit definitely breaks things; the seccomp filter does not correctly filter the syscalls due to the difference in syscall numbers in the BPF filter and the value from syscall_get_nr(). Applying this patch restores the seccomp BPF filter functionality on x32. I've tested this patch with the seccomp BPF filters as well as ftrace and everything looks reasonable to me; needless to say general usage seemed fine as well. Signed-off-by: Paul Moore Link: http://lkml.kernel.org/r/20130215172143.12549.10292.stgit@localhost Cc: Cc: Will Drewry Cc: H. Peter Anvin Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 1ace47b62592..2e188d68397c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -29,13 +29,13 @@ extern const unsigned long sys_call_table[]; */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->orig_ax & __SYSCALL_MASK; + return regs->orig_ax; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - regs->ax = regs->orig_ax & __SYSCALL_MASK; + regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, -- cgit v1.2.3 From 55985e15b80beb339d3e332f00a26003deff14e6 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 2 Apr 2013 01:29:29 +0200 Subject: ARM: mxs: Slow down the I2C clock speed Slow down the I2C clock speed on M28 and SPS1 as it turns out the I2C block in i.MX28 can not operate stable enough with the bus running at 400kHz. Note that the driver used by Freescale runs the bus at 250kHz when 400kHz speed is selected, but the mainline Linux kernel runs the bus at actual 400kHz and that's where it is occasionally unstable. Play safe and run the bus at 100kHz. Signed-off-by: Marek Vasut Cc: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx28-m28evk.dts | 1 - arch/arm/boot/dts/imx28-sps1.dts | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts index 6ce3d17c3a29..fd36e1cca104 100644 --- a/arch/arm/boot/dts/imx28-m28evk.dts +++ b/arch/arm/boot/dts/imx28-m28evk.dts @@ -152,7 +152,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; sgtl5000: codec@0a { diff --git a/arch/arm/boot/dts/imx28-sps1.dts b/arch/arm/boot/dts/imx28-sps1.dts index e6cde8aa7fff..6c6a5442800a 100644 --- a/arch/arm/boot/dts/imx28-sps1.dts +++ b/arch/arm/boot/dts/imx28-sps1.dts @@ -70,7 +70,6 @@ i2c0: i2c@80058000 { pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins_a>; - clock-frequency = <400000>; status = "okay"; rtc: rtc@51 { -- cgit v1.2.3 From 918708245e92941df16a634dc201b407d12bcd91 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 3 Apr 2013 15:47:33 +0100 Subject: x86: Fix rebuild with EFI_STUB enabled eboot.o and efi_stub_$(BITS).o didn't get added to "targets", and hence their .cmd files don't get included by the build machinery, leading to the files always getting rebuilt. Rather than adding the two files individually, take the opportunity and add $(VMLINUX_OBJS) to "targets" instead, thus allowing the assignment at the top of the file to be shrunk quite a bit. At the same time, remove a pointless flags override line - the variable assigned to was misspelled anyway, and the options added are meaningless for assembly sources. [ hpa: the patch is not minimal, but I am taking it for -urgent anyway since the excess impact of the patch seems to be small enough. ] Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/515C5D2502000078000CA6AD@nat28.tlf.novell.com Cc: Matthew Garrett Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8a84501acb1b..5ef205c5f37b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -29,7 +29,6 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/piggy.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone -$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o @@ -43,7 +42,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -targets += vmlinux.bin.all vmlinux.relocs +targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs CMD_RELOCS = arch/x86/tools/relocs quiet_cmd_relocs = RELOCS $@ -- cgit v1.2.3 From 8f964525a121f2ff2df948dac908dcc65be21b5b Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Fri, 29 Mar 2013 09:35:21 -0700 Subject: KVM: Allow cross page reads and writes from cached translations. This patch adds support for kvm_gfn_to_hva_cache_init functions for reads and writes that will cross a page. If the range falls within the same memslot, then this will be a fast operation. If the range is split between two memslots, then the slower kvm_read_guest and kvm_write_guest are used. Tested: Test against kvm_clock unit tests. Signed-off-by: Andrew Honig Signed-off-by: Gleb Natapov --- arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/x86.c | 13 ++++++------- include/linux/kvm_host.h | 2 +- include/linux/kvm_types.h | 1 + virt/kvm/kvm_main.c | 47 +++++++++++++++++++++++++++++++++++++---------- 5 files changed, 46 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd4e4ad..f77df1c5de6e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) if (!pv_eoi_enabled(vcpu)) return 0; return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, - addr); + addr, sizeof(u8)); } void kvm_lapic_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f19ac0aca60d..e1721324c271 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1823,7 +1823,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + sizeof(u32))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); @@ -1952,12 +1953,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) gpa_offset = data & ~(PAGE_MASK | 1); - /* Check that the address is 32-byte aligned. */ - if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1)) - break; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, - &vcpu->arch.pv_time, data & ~1ULL)) + &vcpu->arch.pv_time, data & ~1ULL, + sizeof(struct pvclock_vcpu_time_info))) vcpu->arch.pv_time_enabled = false; else vcpu->arch.pv_time_enabled = true; @@ -1977,7 +1975,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS)) + data & KVM_STEAL_VALID_BITS, + sizeof(struct kvm_steal_time))) return 1; vcpu->arch.st.msr_val = data; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe09d77..c13958251927 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -518,7 +518,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa); + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index fa7cc7244cbd..b0bcce0ddc95 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -71,6 +71,7 @@ struct gfn_to_hva_cache { u64 generation; gpa_t gpa; unsigned long hva; + unsigned long len; struct kvm_memory_slot *memslot; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68feb5c5a..f18013f09e68 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1541,21 +1541,38 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, } int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa) + gpa_t gpa, unsigned long len) { struct kvm_memslots *slots = kvm_memslots(kvm); int offset = offset_in_page(gpa); - gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t start_gfn = gpa >> PAGE_SHIFT; + gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; + gfn_t nr_pages_needed = end_gfn - start_gfn + 1; + gfn_t nr_pages_avail; ghc->gpa = gpa; ghc->generation = slots->generation; - ghc->memslot = gfn_to_memslot(kvm, gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); - if (!kvm_is_error_hva(ghc->hva)) + ghc->len = len; + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { ghc->hva += offset; - else - return -EFAULT; - + } else { + /* + * If the requested region crosses two memslots, we still + * verify that the entire region is valid here. + */ + while (start_gfn <= end_gfn) { + ghc->memslot = gfn_to_memslot(kvm, start_gfn); + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, + &nr_pages_avail); + if (kvm_is_error_hva(ghc->hva)) + return -EFAULT; + start_gfn += nr_pages_avail; + } + /* Use the slow path for cross page reads and writes. */ + ghc->memslot = NULL; + } return 0; } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); @@ -1566,8 +1583,13 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -1587,8 +1609,13 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, struct kvm_memslots *slots = kvm_memslots(kvm); int r; + BUG_ON(len > ghc->len); + if (slots->generation != ghc->generation) - kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa); + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; -- cgit v1.2.3 From aa8b4be3ac049c8b1df2a87e4d1d902ccfc1f7a9 Mon Sep 17 00:00:00 2001 From: Jay Estabrook Date: Sun, 7 Apr 2013 21:36:09 +1200 Subject: alpha: Add irongate_io to PCI bus resources Fixes a NULL pointer dereference at boot on UP1500. Cc: stable@vger.kernel.org Reviewed-and-Tested-by: Matt Turner Signed-off-by: Jay Estabrook Signed-off-by: Matt Turner Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/kernel/sys_nautilus.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 4d4c046f708d..1383f8601a93 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -188,6 +188,10 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr) extern void free_reserved_mem(void *, void *); extern void pcibios_claim_one_bus(struct pci_bus *); +static struct resource irongate_io = { + .name = "Irongate PCI IO", + .flags = IORESOURCE_IO, +}; static struct resource irongate_mem = { .name = "Irongate PCI MEM", .flags = IORESOURCE_MEM, @@ -209,6 +213,7 @@ nautilus_init_pci(void) irongate = pci_get_bus_and_slot(0, 0); bus->self = irongate; + bus->resource[0] = &irongate_io; bus->resource[1] = &irongate_mem; pci_bus_size_bridges(bus); -- cgit v1.2.3 From cd8d2331756751b6aeb855a3c9cb0a92fbd9c725 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:10 +1200 Subject: alpha: makefile: don't enforce small data model for kernel builds Due to all of the goodness being packed into today's kernels, the resulting image isn't as slim as it once was. In light of this, don't pass -msmall-data to gcc, which otherwise results in link failures due to impossible relocations when compiling anything but the most trivial configurations. Cc: Richard Henderson Cc: Ivan Kokshaysky Reviewed-by: Matt Turner Tested-by: Thorsten Kranzkowski Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 4759fe751aa1..2cc3cc519c54 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -12,7 +12,7 @@ NM := $(NM) -B LDFLAGS_vmlinux := -static -N #-relax CHECKFLAGS += -D__alpha__ -m64 -cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data +cflags-y := -pipe -mno-fp-regs -ffixed-8 cflags-y += $(call cc-option, -fno-jump-tables) cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 -- cgit v1.2.3 From e20800fd5cec2a75639a32e956b1cdc023cb87ce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:11 +1200 Subject: alpha: irq: run all handlers with interrupts disabled Linux has expected that interrupt handlers are executed with local interrupts disabled for a while now, so ensure that this is the case on Alpha even for non-device interrupts such as IPIs. Without this patch, secondary boot results in the following backtrace: warning: at kernel/softirq.c:139 __local_bh_enable+0xb8/0xd0() trace: __local_bh_enable+0xb8/0xd0 irq_enter+0x74/0xa0 scheduler_ipi+0x50/0x100 handle_ipi+0x84/0x260 do_entint+0x1ac/0x2e0 irq_exit+0x60/0xa0 handle_irq+0x98/0x100 do_entint+0x2c8/0x2e0 ret_from_sys_call+0x0/0x10 load_balance+0x3e4/0x870 cpu_idle+0x24/0x80 rcu_eqs_enter_common.isra.38+0x0/0x120 cpu_idle+0x40/0x80 rest_init+0xc0/0xe0 _stext+0x1c/0x20 A similar dump occurs if you try to reboot using magic-sysrq. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Reviewed-by: Matt Turner Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/kernel/irq.c | 7 ------- arch/alpha/kernel/irq_alpha.c | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2872accd2215..7b2be251c30f 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -117,13 +117,6 @@ handle_irq(int irq) return; } - /* - * From here we must proceed with IPL_MAX. Note that we do not - * explicitly enable interrupts afterwards - some MILO PALcode - * (namely LX164 one) seems to have severe problems with RTI - * at IPL 0. - */ - local_irq_disable(); irq_enter(); generic_handle_irq_desc(irq, desc); irq_exit(); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 772ddfdb71a8..1216dfb4fcc5 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -45,6 +45,14 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr, struct pt_regs *regs) { struct pt_regs *old_regs; + + /* + * Disable interrupts during IRQ handling. + * Note that there is no matching local_irq_enable() due to + * severe problems with RTI at IPL0 and some MILO PALcode + * (namely LX164). + */ + local_irq_disable(); switch (type) { case 0: #ifdef CONFIG_SMP @@ -62,7 +70,6 @@ do_entInt(unsigned long type, unsigned long vector, { long cpu; - local_irq_disable(); smp_percpu_timer_interrupt(regs); cpu = smp_processor_id(); if (cpu != boot_cpuid) { -- cgit v1.2.3 From e74e25929ce4d4f5f5f4bae585b214bacef6960b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sun, 7 Apr 2013 21:36:12 +1200 Subject: alpha: irq: remove deprecated use of IRQF_DISABLED Interrupt handlers are always invoked with interrupts disabled, so remove all uses of the deprecated IRQF_DISABLED flag. Cc: Richard Henderson Cc: Ivan Kokshaysky Reviewed-by: Matt Turner Signed-off-by: Will Deacon Signed-off-by: Michael Cree Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/floppy.h | 2 +- arch/alpha/kernel/irq_alpha.c | 1 - arch/alpha/kernel/sys_titan.c | 14 +++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 46cefbd50e73..bae97eb19d26 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -26,7 +26,7 @@ #define fd_disable_irq() disable_irq(FLOPPY_IRQ) #define fd_cacheflush(addr,size) /* nothing */ #define fd_request_irq() request_irq(FLOPPY_IRQ, floppy_interrupt,\ - IRQF_DISABLED, "floppy", NULL) + 0, "floppy", NULL) #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) #ifdef CONFIG_PCI diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 1216dfb4fcc5..f433fc11877a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -229,7 +229,6 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr, struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, .name = "timer", }; diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 5cf4a481b8c5..a53cf03f49d5 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -280,15 +280,15 @@ titan_late_init(void) * all reported to the kernel as machine checks, so the handler * is a nop so it can be called to count the individual events. */ - titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(63+16, titan_intr_nop, 0, "CChip Error", NULL); - titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(62+16, titan_intr_nop, 0, "PChip 0 H_Error", NULL); - titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(61+16, titan_intr_nop, 0, "PChip 1 H_Error", NULL); - titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(60+16, titan_intr_nop, 0, "PChip 0 C_Error", NULL); - titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(59+16, titan_intr_nop, 0, "PChip 1 C_Error", NULL); /* @@ -348,9 +348,9 @@ privateer_init_pci(void) * Hook a couple of extra err interrupts that the * common titan code won't. */ - titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(53+16, titan_intr_nop, 0, "NMI", NULL); - titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED, + titan_request_irq(50+16, titan_intr_nop, 0, "Temperature Warning", NULL); /* -- cgit v1.2.3 From 9fb2640159f9d4f5a2a9d60e490482d4cbecafdb Mon Sep 17 00:00:00 2001 From: Michael Wolf Date: Fri, 5 Apr 2013 10:41:40 +0000 Subject: powerpc: pSeries_lpar_hpte_remove fails from Adjunct partition being performed before the ANDCOND test Some versions of pHyp will perform the adjunct partition test before the ANDCOND test. The result of this is that H_RESOURCE can be returned and cause the BUG_ON condition to occur. The HPTE is not removed. So add a check for H_RESOURCE, it is ok if this HPTE is not removed as pSeries_lpar_hpte_remove is looking for an HPTE to remove and not a specific HPTE to remove. So it is ok to just move on to the next slot and try again. Cc: stable@vger.kernel.org Signed-off-by: Michael Wolf Signed-off-by: Stephen Rothwell --- arch/powerpc/platforms/pseries/lpar.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0da39fed355a..299731e9036b 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -186,7 +186,13 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) (0x1UL << 4), &dummy1, &dummy2); if (lpar_rc == H_SUCCESS) return i; - BUG_ON(lpar_rc != H_NOT_FOUND); + + /* + * The test for adjunct partition is performed before the + * ANDCOND test. H_RESOURCE may be returned, so we need to + * check for that as well. + */ + BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); slot_offset++; slot_offset &= 0x7; -- cgit v1.2.3 From b6c7aabd923a17af993c5a5d5d7995f0b27c000a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 8 Apr 2013 11:44:57 +0100 Subject: ARM: Do 15e0d9e37c (ARM: pm: let platforms select cpu_suspend support) properly Let's do the changes properly and fix the same problem everywhere, not just for one case. Cc: # kernels containing 15e0d9e37c or equivalent Signed-off-by: Russell King --- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 2c3b9421ab5e..2556cf1c2da1 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -387,7 +387,7 @@ ENTRY(cpu_arm920_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size .equ cpu_arm920_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm920_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f1803f7e2972..344c8a548cc0 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -402,7 +402,7 @@ ENTRY(cpu_arm926_set_pte_ext) /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size .equ cpu_arm926_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_arm926_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c13, c0, 0 @ PID diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 82f9cdc751d6..0b60dd3d742a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -350,7 +350,7 @@ ENTRY(cpu_mohawk_set_pte_ext) .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_mohawk_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 3aa0da11fd84..d92dfd081429 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -172,7 +172,7 @@ ENTRY(cpu_sa1100_set_pte_ext) .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_sa1100_do_suspend) stmfd sp!, {r4 - r6, lr} mrc p15, 0, r4, c3, c0, 0 @ domain ID diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index bcaaa8de9325..5c07ee4fe3eb 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -138,7 +138,7 @@ ENTRY(cpu_v6_set_pte_ext) /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size .equ cpu_v6_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v6_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index eb93d6487f35..e8efd83b6f25 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -413,7 +413,7 @@ ENTRY(cpu_xsc3_set_pte_ext) .globl cpu_xsc3_suspend_size .equ cpu_xsc3_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xsc3_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 25510361aa18..e766f889bfd6 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -528,7 +528,7 @@ ENTRY(cpu_xscale_set_pte_ext) .globl cpu_xscale_suspend_size .equ cpu_xscale_suspend_size, 4 * 6 -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_xscale_do_suspend) stmfd sp!, {r4 - r9, lr} mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode -- cgit v1.2.3 From 79e5f05edcbf85825d19eb8a425cd6c36c6c66f1 Mon Sep 17 00:00:00 2001 From: Christian Ruppert Date: Mon, 8 Apr 2013 13:05:30 +0530 Subject: ARC: Add implicit compiler barrier to raw_local_irq* functions ARC irqsave/restore macros were missing the compiler barrier, causing a stale load in irq-enabled region be used in irq-safe region, despite being changed, because the register holding the value was still live. The problem manifested as random crashes in timer code when stress testing ARCLinux (3.9-rc3) on a !SMP && !PREEMPT_COUNT Here's the exact sequence which caused this: (0). tv1[x] <----> t1 <---> t2 (1). mod_timer(t1) interrupted after it calls timer_pending() (2). mod_timer(t2) completes (3). mod_timer(t1) resumes but messes up the list (4). __runt_timers( ) uses bogus timer_list entry / crashes in timer->function Essentially mod_timer() was racing against itself and while the spinlock serialized the tv1[] timer link list, timer_pending() called outside the spinlock, cached timer link list element in a register. With low register pressure (and a deep register file), lack of barrier in raw_local_irqsave() as well as preempt_disable (!PREEMPT_COUNT version), there was nothing to force gcc to reload across the spinlock, causing a stale value in reg be used for link list manipulation - ensuing a corruption. ARcompact disassembly which shows the culprit generated code: mod_timer: push_s blink mov_s r13,r0 # timer, timer .. ###### timer_pending( ) ld_s r3,[r13] # <------ .entry.next LOADED brne r3, 0, @.L163 .L163: .. ###### spin_lock_irq( ) lr r5, [status32] # flags bic r4, r5, 6 # temp, flags, and.f 0, r5, 6 # flags, flag.nz r4 ###### detach_if_pending( ) begins tst_s r3,r3 <-------------- # timer_pending( ) checks timer->entry.next # r3 is NOT reloaded by gcc, using stale value beq.d @.L169 mov.eq r0,0 ##### detach_timer( ): __list_del( ) ld r4,[r13,4] # .entry.prev, D.31439 st r4,[r3,4] # .prev, D.31439 st r3,[r4] # .next, D.30246 We initially tried to fix this by adding barrier() to preempt_* macros for !PREEMPT_COUNT but Linus clarified that it was anything but wrong. http://www.spinics.net/lists/kernel/msg1512709.html [vgupta: updated commitlog] Reported-by/Signed-off-by: Christian Ruppert Cc: Christian Ruppert Cc: Pierrick Hascoet Debugged-by/Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds --- arch/arc/include/asm/irqflags.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h index ccd84806b62f..eac071668201 100644 --- a/arch/arc/include/asm/irqflags.h +++ b/arch/arc/include/asm/irqflags.h @@ -39,7 +39,7 @@ static inline long arch_local_irq_save(void) " flag.nz %0 \n" : "=r"(temp), "=r"(flags) : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) - : "cc"); + : "memory", "cc"); return flags; } @@ -53,7 +53,8 @@ static inline void arch_local_irq_restore(unsigned long flags) __asm__ __volatile__( " flag %0 \n" : - : "r"(flags)); + : "r"(flags) + : "memory"); } /* @@ -73,7 +74,8 @@ static inline void arch_local_irq_disable(void) " and %0, %0, %1 \n" " flag %0 \n" : "=&r"(temp) - : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK))); + : "n"(~(STATUS_E1_MASK | STATUS_E2_MASK)) + : "memory"); } /* @@ -85,7 +87,9 @@ static inline long arch_local_save_flags(void) __asm__ __volatile__( " lr %0, [status32] \n" - : "=&r"(temp)); + : "=&r"(temp) + : + : "memory"); return temp; } -- cgit v1.2.3 From b2dfaa8d33cee9dd4ed78979f5d70063df546101 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 8 Apr 2013 14:21:31 +1000 Subject: m68k: define a local gpio_request_one() function Compiling for linux-3.9-rc1 and later fails with: drivers/gpio/devres.c: In function 'devm_gpio_request_one': drivers/gpio/devres.c:90:2: error: implicit declaration of function 'gpio_request_one' [-Werror=implicit-function-declaration] So provide a local gpio_request_one() function. Code largely borrowed from blackfin's local gpio_request_one() function. Signed-off-by: Greg Ungerer Acked-by: Linus Walleij --- arch/m68k/include/asm/gpio.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/m68k/include/asm/gpio.h b/arch/m68k/include/asm/gpio.h index 4395ffc51fdb..8cc83431805b 100644 --- a/arch/m68k/include/asm/gpio.h +++ b/arch/m68k/include/asm/gpio.h @@ -86,4 +86,24 @@ static inline int gpio_cansleep(unsigned gpio) return gpio < MCFGPIO_PIN_MAX ? 0 : __gpio_cansleep(gpio); } +static inline int gpio_request_one(unsigned gpio, unsigned long flags, const char *label) +{ + int err; + + err = gpio_request(gpio, label); + if (err) + return err; + + if (flags & GPIOF_DIR_IN) + err = gpio_direction_input(gpio); + else + err = gpio_direction_output(gpio, + (flags & GPIOF_INIT_HIGH) ? 1 : 0); + + if (err) + gpio_free(gpio); + + return err; +} + #endif -- cgit v1.2.3 From a6e4d5a03e9e3587e88aba687d8f225f4f04c792 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 25 Mar 2013 09:14:30 +0000 Subject: x86, efivars: firmware bug workarounds should be in platform code Let's not burden ia64 with checks in the common efivars code that we're not writing too much data to the variable store. That kind of thing is an x86 firmware bug, plain and simple. efi_query_variable_store() provides platforms with a wrapper in which they can perform checks and workarounds for EFI variable storage bugs. Cc: H. Peter Anvin Cc: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 25 +++++++++++++++++++++++++ drivers/firmware/efivars.c | 18 +++--------------- include/linux/efi.h | 9 ++++++++- 3 files changed, 36 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5f2ecaf3f9d8..c89c245eff40 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -999,3 +999,28 @@ u64 efi_mem_attributes(unsigned long phys_addr) } return 0; } + +/* + * Some firmware has serious problems when using more than 50% of the EFI + * variable store, i.e. it triggers bugs that can brick machines. Ensure that + * we never use more than this safe limit. + * + * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable + * store. + */ +efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + efi_status_t status; + u64 storage_size, remaining_size, max_size; + + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); + if (status != EFI_SUCCESS) + return status; + + if (!storage_size || size > remaining_size || size > max_size || + (remaining_size - size) < (storage_size / 2)) + return EFI_OUT_OF_RESOURCES; + + return EFI_SUCCESS; +} diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 7acafb80fd4c..bf15d81d74e1 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -436,24 +436,12 @@ static efi_status_t check_var_size_locked(struct efivars *efivars, u32 attributes, unsigned long size) { - u64 storage_size, remaining_size, max_size; - efi_status_t status; const struct efivar_operations *fops = efivars->ops; - if (!efivars->ops->query_variable_info) + if (!efivars->ops->query_variable_store) return EFI_UNSUPPORTED; - status = fops->query_variable_info(attributes, &storage_size, - &remaining_size, &max_size); - - if (status != EFI_SUCCESS) - return status; - - if (!storage_size || size > remaining_size || size > max_size || - (remaining_size - size) < (storage_size / 2)) - return EFI_OUT_OF_RESOURCES; - - return status; + return fops->query_variable_store(attributes, size); } @@ -2131,7 +2119,7 @@ efivars_init(void) ops.get_variable = efi.get_variable; ops.set_variable = efi.set_variable; ops.get_next_variable = efi.get_next_variable; - ops.query_variable_info = efi.query_variable_info; + ops.query_variable_store = efi_query_variable_store; error = register_efivars(&__efivars, &ops, efi_kobj); if (error) diff --git a/include/linux/efi.h b/include/linux/efi.h index 9bf2f1fcae27..3d7df3d32c66 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -333,6 +333,7 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules, unsigned long count, u64 *max_size, int *reset_type); +typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size); /* * EFI Configuration Table and GUID definitions @@ -575,9 +576,15 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos #ifdef CONFIG_X86 extern void efi_late_init(void); extern void efi_free_boot_services(void); +extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size); #else static inline void efi_late_init(void) {} static inline void efi_free_boot_services(void) {} + +static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) +{ + return EFI_SUCCESS; +} #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern u64 efi_get_iobase (void); @@ -731,7 +738,7 @@ struct efivar_operations { efi_get_variable_t *get_variable; efi_get_next_variable_t *get_next_variable; efi_set_variable_t *set_variable; - efi_query_variable_info_t *query_variable_info; + efi_query_variable_store_t *query_variable_store; }; struct efivars { -- cgit v1.2.3 From cab1e0a36c9dd0b0671fb84197ed294513f5adc1 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 29 Mar 2013 16:20:09 +0100 Subject: ARM: clk-imx35: Bugfix iomux clock This patch enables iomuxc_gate clock. It is necessary to be able to reconfigure iomux pads. Without this clock enabled, the clk_disable_unused function will disable this clock and the iomux pads are not configurable anymore. This happens at every boot. After a reboot (watchdog system reset) the clock is not enabled again, so all iomux pad reconfigurations in boot code are without effect. The iomux pads should be always configurable, so this patch always enables it. Signed-off-by: Markus Pargmann Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx35.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index e13a8fa5e62c..b95898a3912c 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -265,6 +265,7 @@ int __init mx35_clocks_init(void) clk_prepare_enable(clk[iim_gate]); clk_prepare_enable(clk[emi_gate]); clk_prepare_enable(clk[max_gate]); + clk_prepare_enable(clk[iomuxc_gate]); /* * SCC is needed to boot via mmc after a watchdog reset. The clock code -- cgit v1.2.3 From 75498083e25e96932ad998ffdeadb17234c68d3a Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 29 Mar 2013 16:20:10 +0100 Subject: ARM: imx35 Bugfix admux clock The admux clock seems to be the audmux clock as tests show. audmux does not work without this clock enabled. Currently imx35 does not register a clock device for audmux. This patch adds this registration. imx-audmux driver already handles a clock device, so no changes are necessary there. Signed-off-by: Markus Pargmann Cc: stable@vger.kernel.org Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx35.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c index b95898a3912c..2193c834f55c 100644 --- a/arch/arm/mach-imx/clk-imx35.c +++ b/arch/arm/mach-imx/clk-imx35.c @@ -257,6 +257,7 @@ int __init mx35_clocks_init(void) clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[nfc_div], NULL, "imx25-nand.0"); clk_register_clkdev(clk[csi_gate], NULL, "mx3-camera.0"); + clk_register_clkdev(clk[admux_gate], "audmux", NULL); clk_prepare_enable(clk[spba_gate]); clk_prepare_enable(clk[gpio1_gate]); -- cgit v1.2.3 From 2bb4b70b1dbb45f0c1a3ba98066e6635d8aa3fe0 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 3 Apr 2013 23:50:09 +0800 Subject: ARM: imx: provide twd clock lookup from device tree While booting from device tree, imx6q used to provide twd clock lookup by calling clk_register_clkdev() in clock driver. However, the commit bd60345 (ARM: use device tree to get smp_twd clock) forces DT boot to look up the clock from device tree. It causes the failure below when twd driver tries to get the clock, and hence kernel has to calibrate the local timer frequency. smp_twd: clock not found -2 ... Calibrating local timer... 396.13MHz. Fix the regression by providing twd clock lookup from device tree, and remove the unused twd clk_register_clkdev() call from clock driver. Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl.dtsi | 1 + arch/arm/mach-imx/clk-imx6q.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index 06ec460b4581..281a223591ff 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -91,6 +91,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x00a00600 0x20>; interrupts = <1 13 0xf01>; + clocks = <&clks 15>; }; L2: l2-cache@00a02000 { diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 2f9ff93a4e61..22a3021a455b 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -443,7 +443,6 @@ int __init mx6q_clocks_init(void) clk_register_clkdev(clk[gpt_ipg], "ipg", "imx-gpt.0"); clk_register_clkdev(clk[gpt_ipg_per], "per", "imx-gpt.0"); - clk_register_clkdev(clk[twd], NULL, "smp_twd"); clk_register_clkdev(clk[cko1_sel], "cko1_sel", NULL); clk_register_clkdev(clk[ahb], "ahb", NULL); clk_register_clkdev(clk[cko1], "cko1", NULL); -- cgit v1.2.3 From e8094b2c17126c7dfdeafa296f206a4a3b122d23 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Thu, 4 Apr 2013 16:03:29 +0200 Subject: ARM i.MX6: Fix ldb_di clock selection According to the recent i.MX6 Quad technical reference manual, mode 0x4 (100b) of the CCM_CS2DCR register (address 0x020C402C) bits [11-9] and [14-12] select the PLL3 clock, and not the PLL3 PFD1 540M clock. In our code, the PLL3 root clock is named 'pll3_usb_otg', select this instead of the 540M clock. Signed-off-by: Dirk Behme Signed-off-by: Shawn Guo --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 22a3021a455b..d38e54f5b6d7 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -115,7 +115,7 @@ static const char *gpu2d_core_sels[] = { "axi", "pll3_usb_otg", "pll2_pfd0_352m" static const char *gpu3d_core_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd2_396m", }; static const char *gpu3d_shader_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd9_720m", }; static const char *ipu_sels[] = { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", }; -static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_pfd1_540m", }; +static const char *ldb_di_sels[] = { "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "mmdc_ch1_axi", "pll3_usb_otg", }; static const char *ipu_di_pre_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll5_video", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll3_pfd1_540m", }; static const char *ipu1_di0_sels[] = { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; static const char *ipu1_di1_sels[] = { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", }; -- cgit v1.2.3 From b530f742ac27460d41d35b638ad6aad92044a982 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Mon, 8 Apr 2013 21:39:45 +0900 Subject: ARM: S3C24XX: Correct NR_IRQS definition for s3c2440 Due to NR_IRQS being incorrectly defined not all IRQ domains can be registered for S3C2440. It causes following errors on a s3c2440 SoC based board: NR_IRQS:89 S3C2440: IRQ Support irq: clearing pending status 00000002 ------------[ cut here ]------------ WARNING: at kernel/irq/irqdomain.c:234 0xc0056ed0() ... irq: could not create irq-domain ... s3c2410-wdt s3c2410-wdt: failed to install irq (-22) s3c2410-wdt: probe of s3c2410-wdt failed with error -22 ... samsung-uart s3c2440-uart.0: cannot get irq 74 Fix this by increasing NR_IRQS to at least (IRQ_S3C2443_AC97 + 1) if CPU_S3C2440 is selected, so the subintc IRQ domain gets properly registered. Signed-off-by: Tomasz Figa Signed-off-by: Sylwester Nawrocki Acked-by: Heiko Stuebner Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c24xx/include/mach/irqs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c24xx/include/mach/irqs.h b/arch/arm/mach-s3c24xx/include/mach/irqs.h index b7a9f4d469e8..1e73f5fa8659 100644 --- a/arch/arm/mach-s3c24xx/include/mach/irqs.h +++ b/arch/arm/mach-s3c24xx/include/mach/irqs.h @@ -188,10 +188,8 @@ #if defined(CONFIG_CPU_S3C2416) #define NR_IRQS (IRQ_S3C2416_I2S1 + 1) -#elif defined(CONFIG_CPU_S3C2443) -#define NR_IRQS (IRQ_S3C2443_AC97+1) #else -#define NR_IRQS (IRQ_S3C2440_AC97+1) +#define NR_IRQS (IRQ_S3C2443_AC97 + 1) #endif /* compatibility define. */ -- cgit v1.2.3 From 646dd2f0a980949b05042792fbadd72b735c3eda Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 9 Apr 2013 23:52:21 +0900 Subject: ARM: S3C24XX: Fix interrupt pending register offset of the EINT controller The external pending interrupt register address (EINTPEND) offset is 0xa8, not 0x08. Without this patch the external interrupts are not properly acknowledged, which may lead to an interrupt storm and the system hang as soon as any external interrupt is requested. Signed-off-by: Sylwester Nawrocki Reviewed-by: Heiko Stuebner Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c24xx/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-s3c24xx/irq.c b/arch/arm/mach-s3c24xx/irq.c index cb9f5e011e73..d8ba9bee4c7e 100644 --- a/arch/arm/mach-s3c24xx/irq.c +++ b/arch/arm/mach-s3c24xx/irq.c @@ -500,7 +500,7 @@ struct s3c_irq_intc *s3c24xx_init_intc(struct device_node *np, base = (void *)0xfd000000; intc->reg_mask = base + 0xa4; - intc->reg_pending = base + 0x08; + intc->reg_pending = base + 0xa8; irq_num = 20; irq_start = S3C2410_IRQ(32); irq_offset = 4; -- cgit v1.2.3 From 3e2e0d2c222bdf5bafd722dec1618fa6073ef372 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 9 Apr 2013 12:33:07 -0400 Subject: tile: comment assumption about __insn_mtspr for The arch_local_irq_save(), etc., routines are required to function as compiler barriers. They do, but it's subtle and requires knowing that the gcc builtin __insn_mtspr() is marked as a memory clobber. Provide a comment explaining the assumption. Signed-off-by: Chris Metcalf [ This came about from me wondering about the synchronization rules of __insn_mtspr() - Linus ] Signed-off-by: Linus Torvalds --- arch/tile/include/asm/irqflags.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 241c0bb60b12..c96f9bbb760d 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -40,7 +40,15 @@ #include #include -/* Set and clear kernel interrupt masks. */ +/* + * Set and clear kernel interrupt masks. + * + * NOTE: __insn_mtspr() is a compiler builtin marked as a memory + * clobber. We rely on it being equivalent to a compiler barrier in + * this code since arch_local_irq_save() and friends must act as + * compiler barriers. This compiler semantic is baked into enough + * places that the compiler will maintain it going forward. + */ #if CHIP_HAS_SPLIT_INTR_MASK() #if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32 # error Fix assumptions about which word various interrupts are in -- cgit v1.2.3 From ccf932042fa7785832d8989ba1369cd7c7f5d7a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 31 Mar 2013 22:34:37 -0400 Subject: palinfo fixes * check for proc_mkdir() failures * fix buffer overrun - sizeof(format string) is *not* enough to hold sprintf() result. * use proc_remove_subtree(); life's much easier with it Signed-off-by: Al Viro --- arch/ia64/kernel/palinfo.c | 77 ++++++++-------------------------------------- 1 file changed, 13 insertions(+), 64 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 77597e5ea60a..79521d5499f9 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -849,17 +849,6 @@ static palinfo_entry_t palinfo_entries[]={ #define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) -/* - * this array is used to keep track of the proc entries we create. This is - * required in the module mode when we need to remove all entries. The procfs code - * does not do recursion of deletion - * - * Notes: - * - +1 accounts for the cpuN directory entry in /proc/pal - */ -#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) - -static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; static struct proc_dir_entry *palinfo_dir; /* @@ -971,60 +960,32 @@ palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, voi static void __cpuinit create_palinfo_proc_entries(unsigned int cpu) { -# define CPUSTR "cpu%d" - pal_func_cpu_u_t f; - struct proc_dir_entry **pdir; struct proc_dir_entry *cpu_dir; int j; - char cpustr[sizeof(CPUSTR)]; - - - /* - * we keep track of created entries in a depth-first order for - * cleanup purposes. Each entry is stored into palinfo_proc_entries - */ - sprintf(cpustr,CPUSTR, cpu); + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return; f.req_cpu = cpu; - /* - * Compute the location to store per cpu entries - * We dont store the top level entry in this list, but - * remove it finally after removing all cpu entries. - */ - pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; - *pdir++ = cpu_dir; for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - *pdir = create_proc_read_entry( - palinfo_entries[j].name, 0, cpu_dir, - palinfo_read_entry, (void *)f.value); - pdir++; + create_proc_read_entry( + palinfo_entries[j].name, 0, cpu_dir, + palinfo_read_entry, (void *)f.value); } } static void remove_palinfo_proc_entries(unsigned int hcpu) { - int j; - struct proc_dir_entry *cpu_dir, **pdir; - - pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; - cpu_dir = *pdir; - *pdir++=NULL; - for (j=0; j < (NR_PALINFO_ENTRIES); j++) { - if ((*pdir)) { - remove_proc_entry ((*pdir)->name, cpu_dir); - *pdir ++= NULL; - } - } - - if (cpu_dir) { - remove_proc_entry(cpu_dir->name, palinfo_dir); - } + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); } static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, @@ -1058,6 +1019,8 @@ palinfo_init(void) printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; /* Create palinfo dirs in /proc for all online cpus */ for_each_online_cpu(i) { @@ -1073,22 +1036,8 @@ palinfo_init(void) static void __exit palinfo_exit(void) { - int i = 0; - - /* remove all nodes: depth first pass. Could optimize this */ - for_each_online_cpu(i) { - remove_palinfo_proc_entries(i); - } - - /* - * Remove the top level entry finally - */ - remove_proc_entry(palinfo_dir->name, NULL); - - /* - * Unregister from cpu notifier callbacks - */ unregister_hotcpu_notifier(&palinfo_cpu_notifier); + remove_proc_subtree("pal", NULL); } module_init(palinfo_init); -- cgit v1.2.3 From f934af05cb1bf20558542185299394a69060b829 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 9 Apr 2013 15:35:46 -0400 Subject: add memory barrier to arch_local_irq_restore arch_local_irq_save() and friends are required to act as compiler memory barriers. This patch adds a "memory" clobber to the inline asm code in arch_local_irq_restore() which is used as the building block for other functions needing to set/clear the interrupt enable in the CSR register. Signed-off-by: Mark Salter --- arch/c6x/include/asm/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/c6x/include/asm/irqflags.h b/arch/c6x/include/asm/irqflags.h index cf78e09e18c3..2c71d5634ec2 100644 --- a/arch/c6x/include/asm/irqflags.h +++ b/arch/c6x/include/asm/irqflags.h @@ -27,7 +27,7 @@ static inline unsigned long arch_local_save_flags(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags)); + asm volatile (" mvc .s2 %0,CSR\n" : : "b"(flags) : "memory"); } /* unconditionally enable interrupts */ -- cgit v1.2.3 From f110c0c1926028a233830c6166e4d40314420823 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Apr 2013 16:18:55 +1000 Subject: powerpc: fix compiling CONFIG_PPC_TRANSACTIONAL_MEM when CONFIG_ALTIVEC=n We can't compile a kernel with CONFIG_ALTIVEC=n when CONFIG_PPC_TRANSACTIONAL_MEM=y. We currently get: arch/powerpc/kernel/tm.S:320: Error: unsupported relocation against THREAD_VSCR arch/powerpc/kernel/tm.S:323: Error: unsupported relocation against THREAD_VR0 arch/powerpc/kernel/tm.S:323: Error: unsupported relocation against THREAD_VR0 etc. The below fixes this with a sprinkling of #ifdefs. This was found by mpe with kisskb: http://kisskb.ellerman.id.au/kisskb/buildresult/8539442/ Signed-off-by: Michael Neuling Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/process.c | 2 ++ arch/powerpc/kernel/signal_32.c | 2 ++ arch/powerpc/kernel/signal_64.c | 2 ++ arch/powerpc/kernel/tm.S | 2 ++ 4 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 59dd545fdde1..16e77a81ab4f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -555,10 +555,12 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) new->thread.regs->msr |= (MSR_FP | new->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(&new->thread); new->thread.regs->msr |= MSR_VEC; } +#endif /* We may as well turn on VSX too since all the state is restored now */ if (msr & MSR_VSX) new->thread.regs->msr |= MSR_VSX; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3acb28e245b4..95068bf569ad 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -866,10 +866,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 995f8543cb57..c1794286098c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -522,10 +522,12 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, do_load_up_transact_fpu(¤t->thread); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } +#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { do_load_up_transact_altivec(¤t->thread); regs->msr |= MSR_VEC; } +#endif return err; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 84dbace657ce..2da67e7a16d5 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -309,6 +309,7 @@ _GLOBAL(tm_recheckpoint) or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ mtmsr r5 +#ifdef CONFIG_ALTIVEC /* FP and VEC registers: These are recheckpointed from thread.fpr[] * and thread.vr[] respectively. The thread.transact_fpr[] version * is more modern, and will be loaded subsequently by any FPUnavailable @@ -323,6 +324,7 @@ _GLOBAL(tm_recheckpoint) REST_32VRS(0, r5, r3) /* r5 scratch, r3 THREAD ptr */ ld r5, THREAD_VRSAVE(r3) mtspr SPRN_VRSAVE, r5 +#endif dont_restore_vec: andi. r0, r4, MSR_FP -- cgit v1.2.3 From aaaf165b247a1a8ea5cd2936d9fd1eefe5e580f9 Mon Sep 17 00:00:00 2001 From: Nigel Roberts Date: Sun, 31 Mar 2013 15:13:24 +1100 Subject: Fix GE0/GE1 init on ix2-200 as GE0 has no PHY Signed-off-by: Jason Cooper --- arch/arm/mach-kirkwood/board-iomega_ix2_200.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c index f655b2637b0e..e5f70415905a 100644 --- a/arch/arm/mach-kirkwood/board-iomega_ix2_200.c +++ b/arch/arm/mach-kirkwood/board-iomega_ix2_200.c @@ -20,10 +20,15 @@ static struct mv643xx_eth_platform_data iomega_ix2_200_ge00_data = { .duplex = DUPLEX_FULL, }; +static struct mv643xx_eth_platform_data iomega_ix2_200_ge01_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR(11), +}; + void __init iomega_ix2_200_init(void) { /* * Basic setup. Needs to be called early. */ - kirkwood_ge01_init(&iomega_ix2_200_ge00_data); + kirkwood_ge00_init(&iomega_ix2_200_ge00_data); + kirkwood_ge01_init(&iomega_ix2_200_ge01_data); } -- cgit v1.2.3 From 600468d0686096ddc1000c8a2e69475931084414 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 5 Apr 2013 14:32:52 +0200 Subject: arm: mvebu: Fix the irq map function in SMP mode This patch fix the regression introduced by the commit 3202bf0157ccb "arm: mvebu: Improve the SMP support of the interrupt controller": GPIO IRQ were no longer delivered to the CPUs. To be delivered to a CPU an interrupt must be enabled at CPU level and at interrupt source level. Before the offending patch, all the interrupts were enabled at source level during map() function. Mask() and unmask() was done by handling the per-CPU part. It was fine when running in UP with only one CPU. The offending patch added support for SMP, in this case mask() and unmask() was done by handling the interrupt source level part. The per-CPU level part was handled by the affinity API to select the CPU which will receive the interrupt. (Due to some hardware limitation only one CPU at a time can received a given interrupt). For "normal" interrupt __setup_irq() was called when an irq was registered. irq_set_affinity() is called from this function, which enabled the interrupt on one of the CPUs. Whereas for GPIO IRQ which were chained interrupts, the irq_set_affinity() was never called and none of the CPUs was selected to receive the interrupt. With this patch all the interrupt are enable on the current CPU during map() function. Enabling the interrupts on a CPU doesn't depend anymore on irq_set_affinity() and then the chained irq are not anymore a special case. However the CPU which will receive the irq can still be modify later using irq_set_affinity(). Tested with Mirabox (A370) and Openblocks AX3 (AXP), rootfs mounted over NFS, compiled with CONFIG_SMP=y/N. Signed-off-by: Gregory CLEMENT Reported-by: Ryan Press Investigated-by: Ezequiel Garcia Tested-by: Ezequiel Garcia Tested-by: Ryan Press Signed-off-by: Jason Cooper --- arch/arm/mach-mvebu/irq-armada-370-xp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/irq-armada-370-xp.c b/arch/arm/mach-mvebu/irq-armada-370-xp.c index 6a9195e10579..d5970f5a1e8d 100644 --- a/arch/arm/mach-mvebu/irq-armada-370-xp.c +++ b/arch/arm/mach-mvebu/irq-armada-370-xp.c @@ -61,7 +61,6 @@ static struct irq_domain *armada_370_xp_mpic_domain; */ static void armada_370_xp_irq_mask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) @@ -70,15 +69,10 @@ static void armada_370_xp_irq_mask(struct irq_data *d) else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); -#endif } static void armada_370_xp_irq_unmask(struct irq_data *d) { -#ifdef CONFIG_SMP irq_hw_number_t hwirq = irqd_to_hwirq(d); if (hwirq != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) @@ -87,10 +81,6 @@ static void armada_370_xp_irq_unmask(struct irq_data *d) else writel(hwirq, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#else - writel(irqd_to_hwirq(d), - per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); -#endif } #ifdef CONFIG_SMP @@ -146,7 +136,11 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { armada_370_xp_irq_mask(irq_get_irq_data(virq)); - writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); + if (hw != ARMADA_370_XP_TIMER0_PER_CPU_IRQ) + writel(hw, per_cpu_int_base + + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); + else + writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); irq_set_status_flags(virq, IRQ_LEVEL); if (hw == ARMADA_370_XP_TIMER0_PER_CPU_IRQ) { -- cgit v1.2.3 From 1160c2779b826c6f5c08e5cc542de58fd1f667d5 Mon Sep 17 00:00:00 2001 From: Samu Kallio Date: Sat, 23 Mar 2013 09:36:35 -0400 Subject: x86, mm, paravirt: Fix vmalloc_fault oops during lazy MMU updates In paravirtualized x86_64 kernels, vmalloc_fault may cause an oops when lazy MMU updates are enabled, because set_pgd effects are being deferred. One instance of this problem is during process mm cleanup with memory cgroups enabled. The chain of events is as follows: - zap_pte_range enables lazy MMU updates - zap_pte_range eventually calls mem_cgroup_charge_statistics, which accesses the vmalloc'd mem_cgroup per-cpu stat area - vmalloc_fault is triggered which tries to sync the corresponding PGD entry with set_pgd, but the update is deferred - vmalloc_fault oopses due to a mismatch in the PUD entries The OOPs usually looks as so: ------------[ cut here ]------------ kernel BUG at arch/x86/mm/fault.c:396! invalid opcode: 0000 [#1] SMP .. snip .. CPU 1 Pid: 10866, comm: httpd Not tainted 3.6.10-4.fc18.x86_64 #1 RIP: e030:[] [] vmalloc_fault+0x11f/0x208 .. snip .. Call Trace: [] do_page_fault+0x399/0x4b0 [] ? xen_mc_extend_args+0xec/0x110 [] page_fault+0x25/0x30 [] ? mem_cgroup_charge_statistics.isra.13+0x13/0x50 [] __mem_cgroup_uncharge_common+0xd8/0x350 [] mem_cgroup_uncharge_page+0x57/0x60 [] page_remove_rmap+0xe0/0x150 [] ? vm_normal_page+0x1a/0x80 [] unmap_single_vma+0x531/0x870 [] unmap_vmas+0x52/0xa0 [] ? pte_mfn_to_pfn+0x72/0x100 [] exit_mmap+0x98/0x170 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e [] mmput+0x83/0xf0 [] exit_mm+0x104/0x130 [] do_exit+0x15a/0x8c0 [] do_group_exit+0x3f/0xa0 [] sys_exit_group+0x17/0x20 [] system_call_fastpath+0x16/0x1b Calling arch_flush_lazy_mmu_mode immediately after set_pgd makes the changes visible to the consistency checks. Cc: RedHat-Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=914737 Tested-by: Josh Boyer Reported-and-Tested-by: Krishna Raman Signed-off-by: Samu Kallio Link: http://lkml.kernel.org/r/1364045796-10720-1-git-send-email-konrad.wilk@oracle.com Tested-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b97525246d4..0e883364abb5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -378,10 +378,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) + if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_ref); - else + arch_flush_lazy_mmu_mode(); + } else { BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } /* * Below here mismatches are bugs because these lower tables -- cgit v1.2.3 From 511ba86e1d386f671084b5d0e6f110bb30b8eeb2 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sat, 23 Mar 2013 09:36:36 -0400 Subject: x86, mm: Patch out arch_flush_lazy_mmu_mode() when running on bare metal Invoking arch_flush_lazy_mmu_mode() results in calls to preempt_enable()/disable() which may have performance impact. Since lazy MMU is not used on bare metal we can patch away arch_flush_lazy_mmu_mode() so that it is never called in such environment. [ hpa: the previous patch "Fix vmalloc_fault oops during lazy MMU updates" may cause a minor performance regression on bare metal. This patch resolves that performance regression. It is somewhat unclear to me if this is a good -stable candidate. ] Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1364045796-10720-2-git-send-email-konrad.wilk@oracle.com Tested-by: Josh Boyer Tested-by: Konrad Rzeszutek Wilk Acked-by: Borislav Petkov Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: H. Peter Anvin Cc: SEE NOTE ABOVE --- arch/x86/include/asm/paravirt.h | 5 ++++- arch/x86/include/asm/paravirt_types.h | 2 ++ arch/x86/kernel/paravirt.c | 25 +++++++++++++------------ arch/x86/lguest/boot.c | 1 + arch/x86/xen/mmu.c | 1 + 5 files changed, 21 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 5edd1742cfd0..7361e47db79f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void) PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); } -void arch_flush_lazy_mmu_mode(void); +static inline void arch_flush_lazy_mmu_mode(void) +{ + PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush); +} static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 142236ed83af..b3b0ec1dac86 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -91,6 +91,7 @@ struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); + void (*flush)(void); }; struct pv_time_ops { @@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); +void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); u32 _paravirt_ident_32(u32); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 17fff18a1031..8bfb335f74bb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } +void paravirt_flush_lazy_mmu(void) +{ + preempt_disable(); + + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + arch_enter_lazy_mmu_mode(); + } + + preempt_enable(); +} + void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); @@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return this_cpu_read(paravirt_lazy_mode); } -void arch_flush_lazy_mmu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - arch_leave_lazy_mmu_mode(); - arch_enter_lazy_mmu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -475,6 +475,7 @@ struct pv_mmu_ops pv_mmu_ops = { .lazy_mode = { .enter = paravirt_nop, .leave = paravirt_nop, + .flush = paravirt_nop, }, .set_fixmap = native_set_fixmap, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 1cbd89ca5569..7114c63f047d 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1334,6 +1334,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; + pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu; pv_mmu_ops.pte_update = lguest_pte_update; pv_mmu_ops.pte_update_defer = lguest_pte_update; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6afbb2ca9a0a..2f5d6875555e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2196,6 +2196,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, + .flush = paravirt_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, -- cgit v1.2.3 From c5e6cb051c5f7d56f05bd6a4af22cb300a4ced79 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 18 Feb 2013 18:13:09 +0000 Subject: kvm/powerpc/e500mc: fix tlb invalidation on cpu migration The existing check handles the case where we've migrated to a different core than we last ran on, but it doesn't handle the case where we're still on the same cpu we last ran on, but some other vcpu has run on this cpu in the meantime. Without this, guest segfaults (and other misbehavior) have been seen in smp guests. Cc: stable@vger.kernel.org # 3.8.x Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500mc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 1f89d26e65fb..2f4baa074b2e 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -108,6 +108,8 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) { } +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu); + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -136,8 +138,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); mtspr(SPRN_GESR, vcpu->arch.shared->esr); - if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || + __get_cpu_var(last_vcpu_on_cpu) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); + __get_cpu_var(last_vcpu_on_cpu) = vcpu; + } kvmppc_load_guest_fp(vcpu); } -- cgit v1.2.3 From f76cfa3c2496c462b5bc01bd0c9340c2715b73ca Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 10 Apr 2013 15:28:25 +0200 Subject: x86/mm/cpa: Convert noop to functional fix Commit: a8aed3e0752b ("x86/mm/pageattr: Prevent PSE and GLOABL leftovers to confuse pmd/pte_present and pmd_huge") introduced a valid fix but one location that didn't trigger the bug that lead to finding those (small) problems, wasn't updated using the right variable. The wrong variable was also initialized for no good reason, that may have been the source of the confusion. Remove the noop initialization accordingly. Commit a8aed3e0752b also erroneously removed one canon_pgprot pass meant to clear pmd bitflags not supported in hardware by older CPUs, that automatically gets corrected by this patch too by applying it to the right variable in the new location. Reported-by: Stefan Bader Signed-off-by: Andrea Arcangeli Acked-by: Borislav Petkov Cc: Andy Whitcroft Cc: Mel Gorman Link: http://lkml.kernel.org/r/1365600505-19314-1-git-send-email-aarcange@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 091934e1d0d9..7896f7190fda 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -467,7 +467,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = req_prot = pte_pgprot(old_pte); + old_prot = req_prot = pte_pgprot(old_pte); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); @@ -478,12 +478,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL * for the ancient hardware that doesn't support it. */ - if (pgprot_val(new_prot) & _PAGE_PRESENT) - pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL; + if (pgprot_val(req_prot) & _PAGE_PRESENT) + pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; else - pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); + pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); - new_prot = canon_pgprot(new_prot); + req_prot = canon_pgprot(req_prot); /* * old_pte points to the large page base address. So we need -- cgit v1.2.3 From 6b2ba1a9129bd98b5268a4efb167c95189b3eabf Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 13 Feb 2013 19:37:48 +0000 Subject: kvm/ppc/e500: h2g_tlb1_rmap: esel 0 is valid Add one to esel values in h2g_tlb1_rmap, so that "no mapping" can be distinguished from "esel 0". Note that we're not saved by the fact that host esel 0 is reserved for non-KVM use, because KVM host esel numbering is not the raw host numbering (see to_htlb1_esel). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index a222edfb9a9b..35fb80ec1f57 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -511,10 +511,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; } -- cgit v1.2.3 From 66a5fecdccd4f32a22d2d110cf4f002755b520d8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 13 Feb 2013 19:37:49 +0000 Subject: kvm/ppc/e500: g2h_tlb1_map: clear old bit before setting new bit It's possible that we're using the same host TLB1 slot to map (a presumably different portion of) the same guest TLB1 entry. Clear the bit in the map before setting it, so that if the esels are the same the bit will remain set. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 35fb80ec1f57..8e72b2124f63 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -507,13 +507,14 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) vcpu_e500->host_tlb1_nv = 0; - vcpu_e500->tlb_refs[1][sesel] = *ref; - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; if (vcpu_e500->h2g_tlb1_rmap[sesel]) { unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel] - 1; vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; return sesel; -- cgit v1.2.3 From 4d2be6f7c75e814ee28b007dbf0c26dfcbbe20a1 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 6 Mar 2013 16:02:49 +0000 Subject: kvm/ppc/e500: eliminate tlb_refs Commit 523f0e5421c12610527c620b983b443f329e3a32 ("KVM: PPC: E500: Explicitly mark shadow maps invalid") began using E500_TLB_VALID for guest TLB1 entries, and skipping invalidations if it's not set. However, when E500_TLB_VALID was set for such entries, it was on a fake local ref, and so the invalidations never happen. gtlb_privs is documented as being only for guest TLB0, though we already violate that with E500_TLB_BITMAP. Now that we have MMU notifiers, and thus don't need to actually retain a reference to the mapped pages, get rid of tlb_refs, and use gtlb_privs for E500_TLB_VALID in TLB1. Since we can have more than one host TLB entry for a given tlbe_ref, be careful not to clear existing flags that are relevant to other host TLB entries when preparing a new host TLB entry. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 24 +++++-------- arch/powerpc/kvm/e500_mmu_host.c | 75 ++++++++++++---------------------------- 2 files changed, 30 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 41cefd43655f..33db48a8ce24 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -26,17 +26,20 @@ #define E500_PID_NUM 3 #define E500_TLB_NUM 2 -#define E500_TLB_VALID 1 -#define E500_TLB_BITMAP 2 +/* entry is mapped somewhere in host TLB */ +#define E500_TLB_VALID (1 << 0) +/* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ +#define E500_TLB_BITMAP (1 << 1) +/* TLB1 entry is mapped by host TLB0 */ #define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ + pfn_t pfn; /* valid only for TLB0, except briefly */ + unsigned int flags; /* E500_TLB_* */ }; struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ + struct tlbe_ref ref; }; #ifdef CONFIG_KVM_E500V2 @@ -63,17 +66,6 @@ struct kvmppc_vcpu_e500 { unsigned int gtlb_nv[E500_TLB_NUM]; - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; unsigned int host_tlb1_nv; u32 svr; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 8e72b2124f63..1c6a9d729df4 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -193,8 +193,11 @@ void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; /* Don't bother with unmapped entries */ - if (!(ref->flags & E500_TLB_VALID)) - return; + if (!(ref->flags & E500_TLB_VALID)) { + WARN(ref->flags & (E500_TLB_BITMAP | E500_TLB_TLB0), + "%s: flags %x\n", __func__, ref->flags); + WARN_ON(tlbsel == 1 && vcpu_e500->g2h_tlb1_map[esel]); + } if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; @@ -248,7 +251,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, pfn_t pfn) { ref->pfn = pfn; - ref->flags = E500_TLB_VALID; + ref->flags |= E500_TLB_VALID; if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); @@ -257,6 +260,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + /* FIXME: don't log bogus pfn for TLB1 */ trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } @@ -274,36 +278,23 @@ static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { - int tlbsel = 0; - int i; - - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } -} - -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; + int tlbsel; int i; - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); + for (tlbsel = 0; tlbsel <= 1; tlbsel++) { + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } } - - clear_tlb_privs(vcpu_e500); } void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); + clear_tlb_privs(vcpu_e500); clear_tlb1_bitmap(vcpu_e500); } @@ -458,8 +449,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); } - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, @@ -512,10 +501,10 @@ static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); } - vcpu_e500->tlb_refs[1][sesel] = *ref; vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; vcpu_e500->h2g_tlb1_rmap[sesel] = esel + 1; + WARN_ON(!(ref->flags & E500_TLB_VALID)); return sesel; } @@ -527,13 +516,12 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, struct kvm_book3e_206_tlb_entry *stlbe, int esel) { - struct tlbe_ref ref; + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[1][esel].ref; int sesel; int r; - ref.flags = 0; r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, - &ref); + ref); if (r) return r; @@ -545,7 +533,7 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, } /* Otherwise map into TLB1 */ - sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, ref, esel); write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); return 0; @@ -566,7 +554,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, case 0: priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - /* Triggers after clear_tlb_refs or on initial mapping */ + /* Triggers after clear_tlb_privs or on initial mapping */ if (!(priv->ref.flags & E500_TLB_VALID)) { kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); } else { @@ -666,35 +654,16 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * host_tlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->h2g_tlb1_rmap) - goto err; + return -EINVAL; return 0; - -err: - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); - return -EINVAL; } void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); } -- cgit v1.2.3 From 7791c8423f1f7f4dad94e753bae67461d5b80be8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 10 Apr 2013 10:59:34 +0200 Subject: x86,efi: Check max_size only if it is non-zero. Some EFI implementations return always a MaximumVariableSize of 0, check against max_size only if it is non-zero. My Intel DQ67SW desktop board has such an implementation. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index c89c245eff40..3f96a487aa2a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1018,7 +1018,12 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (status != EFI_SUCCESS) return status; - if (!storage_size || size > remaining_size || size > max_size || + if (!max_size && remaining_size > size) + printk_once(KERN_ERR FW_BUG "Broken EFI implementation" + " is returning MaxVariableSize=0\n"); + + if (!storage_size || size > remaining_size || + (max_size && size > max_size) || (remaining_size - size) < (storage_size / 2)) return EFI_OUT_OF_RESOURCES; -- cgit v1.2.3 From 18699739b60cb60230153ff5475b2ba92be185f9 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 11 Apr 2013 15:36:09 +0200 Subject: x86/mm/cpa/selftest: Fix false positive in CPA self test If the pmd is not present, _PAGE_PSE will not be set anymore. Fix the false positive. Reported-by: Ingo Molnar Signed-off-by: Andrea Arcangeli Cc: Stefan Bader Cc: Andy Whitcroft Cc: Mel Gorman Cc: Borislav Petkov Link: http://lkml.kernel.org/r/1365687369-30802-1-git-send-email-aarcange@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index b0086567271c..0e38951e65eb 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -68,7 +68,7 @@ static int print_split(struct split_state *s) s->gpg++; i += GPS/PAGE_SIZE; } else if (level == PG_LEVEL_2M) { - if (!(pte_val(*pte) & _PAGE_PSE)) { + if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) { printk(KERN_ERR "%lx level %d but not PSE %Lx\n", addr, level, (u64)pte_val(*pte)); -- cgit v1.2.3 From 26564600c9e88c6572a5e6ef5ae9121907edfb7f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 11 Apr 2013 13:59:52 -0400 Subject: x86/mm: Flush lazy MMU when DEBUG_PAGEALLOC is set When CONFIG_DEBUG_PAGEALLOC is set page table updates made by kernel_map_pages() are not made visible (via TLB flush) immediately if lazy MMU is on. In environments that support lazy MMU (e.g. Xen) this may lead to fatal page faults, for example, when zap_pte_range() needs to allocate pages in __tlb_remove_page() -> tlb_next_batch(). Signed-off-by: Boris Ostrovsky Cc: konrad.wilk@oracle.com Link: http://lkml.kernel.org/r/1365703192-2089-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7896f7190fda..fb4e73ec24d8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1413,6 +1413,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 1de14c3c5cbc9bb17e9dcc648cda51c0c85d54b9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Apr 2013 16:23:54 -0700 Subject: x86-32: Fix possible incomplete TLB invalidate with PAE pagetables This patch attempts to fix: https://bugzilla.kernel.org/show_bug.cgi?id=56461 The symptom is a crash and messages like this: chrome: Corrupted page table at address 34a03000 *pdpt = 0000000000000000 *pde = 0000000000000000 Bad pagetable: 000f [#1] PREEMPT SMP Ingo guesses this got introduced by commit 611ae8e3f520 ("x86/tlb: enable tlb flush range support for x86") since that code started to free unused pagetables. On x86-32 PAE kernels, that new code has the potential to free an entire PMD page and will clear one of the four page-directory-pointer-table (aka pgd_t entries). The hardware aggressively "caches" these top-level entries and invlpg does not actually affect the CPU's copy. If we clear one we *HAVE* to do a full TLB flush, otherwise we might continue using a freed pmd page. (note, we do this properly on the population side in pud_populate()). This patch tracks whenever we clear one of these entries in the 'struct mmu_gather', and ensures that we follow up with a full tlb flush. BTW, I disassembled and checked that: if (tlb->fullmm == 0) and if (!tlb->fullmm && !tlb->need_flush_all) generate essentially the same code, so there should be zero impact there to the !PAE case. Signed-off-by: Dave Hansen Cc: Peter Anvin Cc: Ingo Molnar Cc: Artem S Tashkinov Signed-off-by: Linus Torvalds --- arch/x86/include/asm/tlb.h | 2 +- arch/x86/mm/pgtable.c | 7 +++++++ include/asm-generic/tlb.h | 7 ++++++- mm/memory.c | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 4fef20773b8f..c7797307fc2b 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -7,7 +7,7 @@ #define tlb_flush(tlb) \ { \ - if (tlb->fullmm == 0) \ + if (!tlb->fullmm && !tlb->need_flush_all) \ flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \ else \ flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 193350b51f90..17fda6a8b3c2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); + /* + * NOTE! For PAE, any changes to the top page-directory-pointer-table + * entries need a full cr3 reload to flush. + */ +#ifdef CONFIG_X86_PAE + tlb->need_flush_all = 1; +#endif tlb_remove_page(tlb, virt_to_page(pmd)); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 25f01d0bc149..b1b1fa6ffffe 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -99,7 +99,12 @@ struct mmu_gather { unsigned int need_flush : 1, /* Did free PTEs */ fast_mode : 1; /* No batching */ - unsigned int fullmm; + /* we are in the middle of an operation to clear + * a full mm and can make some optimizations */ + unsigned int fullmm : 1, + /* we have performed an operation which + * requires a complete flush of the tlb */ + need_flush_all : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; diff --git a/mm/memory.c b/mm/memory.c index 494526ae024a..13cbc420fead 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -216,6 +216,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) tlb->mm = mm; tlb->fullmm = fullmm; + tlb->need_flush_all = 0; tlb->start = -1UL; tlb->end = 0; tlb->need_flush = 0; -- cgit v1.2.3 From 05e38e5d5d437c762c79428ae8434632a8ca2c5e Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 15 Apr 2013 11:44:14 +1000 Subject: powerpc: Fix audit crash due to save/restore PPR changes The current mainline crashes when hitting userspace with the following: kernel BUG at kernel/auditsc.c:1769! cpu 0x1: Vector: 700 (Program Check) at [c000000023883a60] pc: c0000000001047a8: .__audit_syscall_entry+0x38/0x130 lr: c00000000000ed64: .do_syscall_trace_enter+0xc4/0x270 sp: c000000023883ce0 msr: 8000000000029032 current = 0xc000000023800000 paca = 0xc00000000f080380 softe: 0 irq_happened: 0x01 pid = 1629, comm = start_udev kernel BUG at kernel/auditsc.c:1769! enter ? for help [c000000023883d80] c00000000000ed64 .do_syscall_trace_enter+0xc4/0x270 [c000000023883e30] c000000000009b08 syscall_dotrace+0xc/0x38 --- Exception: c00 (System Call) at 0000008010ec50dc Bisecting found the following patch caused it: commit 44e9309f1f357794b7ae93d5f3e3e6f11d2b8a7f Author: Haren Myneni powerpc: Implement PPR save/restore It was found this patch corrupted r9 when calling SET_DEFAULT_THREAD_PPR() Using r10 as a scratch register instead of r9 solved the problem. Signed-off-by: Alistair Popple Acked-by: Michael Neuling Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 256c5bf0adb7..3acb1a076d3f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -304,7 +304,7 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ - SET_DEFAULT_THREAD_PPR(r3, r9) /* Set thread.ppr = 3 */ + SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite -- cgit v1.2.3 From d8b92292408831d86ff7b781e66bf79301934b99 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 9 Apr 2013 22:31:24 +0000 Subject: powerpc: add a missing label in resume_kernel A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete kprobe and migrate exception frame). This will cause the kernel branch to an undetermined address if there really has a conflict when updating the thread flags. Signed-off-by: Kevin Hao Cc: stable@vger.kernel.org Acked-By: Tiejun Chen Signed-off-by: Stephen Rothwell --- arch/powerpc/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3acb1a076d3f..04d69c4a5ac2 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -657,7 +657,7 @@ resume_kernel: /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h addi r5,r9,TI_FLAGS - ldarx r4,0,r5 +0: ldarx r4,0,r5 andc r4,r4,r11 stdcx. r4,0,r5 bne- 0b -- cgit v1.2.3 From cc5a080c5d40c36089bb08a8a16fa3fc7047fe0f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:46 -0700 Subject: efi: Pass boot services variable info to runtime code EFI variables can be flagged as being accessible only within boot services. This makes it awkward for us to figure out how much space they use at runtime. In theory we could figure this out by simply comparing the results from QueryVariableInfo() to the space used by all of our variables, but that fails if the platform doesn't garbage collect on every boot. Thankfully, calling QueryVariableInfo() while still inside boot services gives a more reliable answer. This patch passes that information from the EFI boot stub up to the efi platform code. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 47 +++++++++++++++++++++++++++++++++++ arch/x86/include/asm/efi.h | 7 ++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/platform/efi/efi.c | 21 ++++++++++++++++ 4 files changed, 76 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035a6b96..8615f7581820 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } +static efi_status_t setup_efi_vars(struct boot_params *params) +{ + struct setup_data *data; + struct efi_var_bootdata *efidata; + u64 store_size, remaining_size, var_size; + efi_status_t status; + + if (!sys_table->runtime->query_variable_info) + return EFI_UNSUPPORTED; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + status = efi_call_phys4(sys_table->runtime->query_variable_info, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, &store_size, + &remaining_size, &var_size); + + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*efidata), &efidata); + + if (status != EFI_SUCCESS) + return status; + + efidata->data.type = SETUP_EFI_VARS; + efidata->data.len = sizeof(struct efi_var_bootdata) - + sizeof(struct setup_data); + efidata->data.next = 0; + efidata->store_size = store_size; + efidata->remaining_size = remaining_size; + efidata->max_var_size = var_size; + + if (data) + data->next = (unsigned long)efidata; + else + params->hdr.setup_data = (unsigned long)efidata; + +} + static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; @@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, setup_graphics(boot_params); + setup_efi_vars(boot_params); + setup_efi_pci(boot_params); status = efi_call_phys3(sys_table->boottime->allocate_pool, diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 60c89f30c727..2fb5d5884e23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); +struct efi_var_bootdata { + struct setup_data data; + u64 store_size; + u64 remaining_size; + u64 max_var_size; +}; + #ifdef CONFIG_EFI static inline bool efi_is_native(void) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index c15ddaf90710..08744242b8d2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -6,6 +6,7 @@ #define SETUP_E820_EXT 1 #define SETUP_DTB 2 #define SETUP_PCI 3 +#define SETUP_EFI_VARS 4 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 3f96a487aa2a..977d1ce7e173 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -69,6 +69,10 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_var_store_size; +static u64 efi_var_remaining_size; +static u64 efi_var_max_var_size; + unsigned long x86_efi_facility; /* @@ -682,6 +686,9 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; void *tmp; + struct setup_data *data; + struct efi_var_bootdata *efi_var_data; + u64 pa_data; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -699,6 +706,20 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*efi_var_data)); + if (data->type == SETUP_EFI_VARS) { + efi_var_data = (struct efi_var_bootdata *)data; + + efi_var_store_size = efi_var_data->store_size; + efi_var_remaining_size = efi_var_data->remaining_size; + efi_var_max_var_size = efi_var_data->max_var_size; + } + pa_data = data->next; + early_iounmap(data, sizeof(*efi_var_data)); + } + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* -- cgit v1.2.3 From 31ff2f20d9003e74991d135f56e503fe776c127c Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 15 Apr 2013 13:09:47 -0700 Subject: efi: Distinguish between "remaining space" and actually used space EFI implementations distinguish between space that is actively used by a variable and space that merely hasn't been garbage collected yet. Space that hasn't yet been garbage collected isn't available for use and so isn't counted in the remaining_space field returned by QueryVariableInfo(). Combined with commit 68d9298 this can cause problems. Some implementations don't garbage collect until the remaining space is smaller than the maximum variable size, and as a result check_var_size() will always fail once more than 50% of the variable store has been used even if most of that space is marked as available for garbage collection. The user is unable to create new variables, and deleting variables doesn't increase the remaining space. The problem that 68d9298 was attempting to avoid was one where certain platforms fail if the actively used space is greater than 50% of the available storage space. We should be able to calculate that by simply summing the size of each available variable and subtracting that from the total storage space. With luck this will fix the problem described in https://bugzilla.kernel.org/show_bug.cgi?id=55471 without permitting damage to occur to the machines 68d9298 was attempting to fix. Signed-off-by: Matthew Garrett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 106 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 977d1ce7e173..4959e3f89d74 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,13 @@ #define EFI_DEBUG 1 +/* + * There's some additional metadata associated with each + * variable. Intel's reference implementation is 60 bytes - bump that + * to account for potential alignment constraints + */ +#define VAR_METADATA_SIZE 64 + struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, @@ -72,6 +80,9 @@ static efi_system_table_t efi_systab __initdata; static u64 efi_var_store_size; static u64 efi_var_remaining_size; static u64 efi_var_max_var_size; +static u64 boot_used_size; +static u64 boot_var_size; +static u64 active_size; unsigned long x86_efi_facility; @@ -166,8 +177,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - return efi_call_virt3(get_next_variable, - name_size, name, vendor); + efi_status_t status; + static bool finished = false; + static u64 var_size; + + status = efi_call_virt3(get_next_variable, + name_size, name, vendor); + + if (status == EFI_NOT_FOUND) { + finished = true; + if (var_size < boot_used_size) { + boot_var_size = boot_used_size - var_size; + active_size += boot_var_size; + } else { + printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); + } + } + + if (boot_used_size && !finished) { + unsigned long size; + u32 attr; + efi_status_t s; + void *tmp; + + s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); + + if (s != EFI_BUFFER_TOO_SMALL || !size) + return status; + + tmp = kmalloc(size, GFP_ATOMIC); + + if (!tmp) + return status; + + s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); + + if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { + var_size += size; + var_size += ucs2_strsize(name, 1024); + active_size += size; + active_size += VAR_METADATA_SIZE; + active_size += ucs2_strsize(name, 1024); + } + + kfree(tmp); + } + + return status; } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -176,9 +232,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - return efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); + efi_status_t status; + u32 orig_attr = 0; + unsigned long orig_size = 0; + + status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, + NULL); + + if (status != EFI_BUFFER_TOO_SMALL) + orig_size = 0; + + status = efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); + + if (status == EFI_SUCCESS) { + if (orig_size) { + active_size -= orig_size; + active_size -= ucs2_strsize(name, 1024); + active_size -= VAR_METADATA_SIZE; + } + if (data_size) { + active_size += data_size; + active_size += ucs2_strsize(name, 1024); + active_size += VAR_METADATA_SIZE; + } + } + + return status; } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -720,6 +801,8 @@ void __init efi_init(void) early_iounmap(data, sizeof(*efi_var_data)); } + boot_used_size = efi_var_store_size - efi_var_remaining_size; + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* @@ -1042,10 +1125,21 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) if (!max_size && remaining_size > size) printk_once(KERN_ERR FW_BUG "Broken EFI implementation" " is returning MaxVariableSize=0\n"); + /* + * Some firmware implementations refuse to boot if there's insufficient + * space in the variable store. We account for that by refusing the + * write if permitting it would reduce the available space to under + * 50%. However, some firmware won't reclaim variable space until + * after the used (not merely the actively used) space drops below + * a threshold. We can approximate that case with the value calculated + * above. If both the firmware and our calculations indicate that the + * available space would drop below 50%, refuse the write. + */ if (!storage_size || size > remaining_size || (max_size && size > max_size) || - (remaining_size - size) < (storage_size / 2)) + ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && + (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; return EFI_SUCCESS; -- cgit v1.2.3 From f1923820c447e986a9da0fc6bf60c1dccdf0408e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 16 Apr 2013 13:51:43 +0200 Subject: perf/x86: Fix offcore_rsp valid mask for SNB/IVB The valid mask for both offcore_response_0 and offcore_response_1 was wrong for SNB/SNB-EP, IVB/IVB-EP. It was possible to write to reserved bit and cause a GP fault crashing the kernel. This patch fixes the problem by correctly marking the reserved bits in the valid mask for all the processors mentioned above. A distinction between desktop and server parts is introduced because bits 24-30 are only available on the server parts. This version of the patch is just a rebase to perf/urgent tree and should apply to older kernels as well. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: jolsa@redhat.com Cc: gregkh@linuxfoundation.org Cc: security@kernel.org Cc: ak@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dab7580c47ae..cc45deb791b0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -153,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = }; static struct extra_reg intel_snb_extra_regs[] __read_mostly = { - INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), - INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), + EVENT_EXTRA_END +}; + +static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), EVENT_EXTRA_END }; @@ -2097,7 +2103,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 45) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; @@ -2123,7 +2132,10 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - x86_pmu.extra_regs = intel_snb_extra_regs; + if (boot_cpu_data.x86_model == 62) + x86_pmu.extra_regs = intel_snbep_extra_regs; + else + x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; x86_pmu.er_flags |= ERF_NO_HT_SHARING; -- cgit v1.2.3 From f6ce5002629e08eaa777ced3872a98f76845143e Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:08 +0400 Subject: x86/Kconfig: Make EFI select UCS2_STRING The commit "efi: Distinguish between "remaining space" and actually used space" added usage of ucs2_*() functions to arch/x86/platform/efi/efi.c, but the only thing which selected UCS2_STRING was EFI_VARS, which is technically optional and can be built as a module. Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5b1218..01af8535f58e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1549,6 +1549,7 @@ config X86_SMAP config EFI bool "EFI runtime service support" depends on ACPI + select UCS2_STRING ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). -- cgit v1.2.3 From 3668011d4ad556224f7c012c1e870a6eaa0e59da Mon Sep 17 00:00:00 2001 From: Sergey Vlasov Date: Tue, 16 Apr 2013 18:31:09 +0400 Subject: efi: Export efi_query_variable_store() for efivars.ko Fixes build with CONFIG_EFI_VARS=m which was broken after the commit "x86, efivars: firmware bug workarounds should be in platform code". Signed-off-by: Sergey Vlasov Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4959e3f89d74..4f364c7c6111 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1144,3 +1144,4 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return EFI_SUCCESS; } +EXPORT_SYMBOL_GPL(efi_query_variable_store); -- cgit v1.2.3 From ca46e10fb239d4646cb19620695473f252a6ffc7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Apr 2013 10:43:13 +0100 Subject: ARM: KVM: fix KVM_CAP_ARM_SET_DEVICE_ADDR reporting Commit 3401d54696f9 (KVM: ARM: Introduce KVM_ARM_SET_DEVICE_ADDR ioctl) added support for the KVM_CAP_ARM_SET_DEVICE_ADDR capability, but failed to add a break in the relevant case statement, returning the number of CPUs instead. Luckilly enough, the CONFIG_NR_CPUS=0 patch hasn't been merged yet (https://lkml.org/lkml/diff/2012/3/31/131/1), so the bug wasn't noticed. Just give it a break! Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5a936988eb24..c1fe498983ac 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -201,6 +201,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; + break; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; -- cgit v1.2.3 From 865499ea90d399e0682bcce3ae7af24277633699 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 12 Apr 2013 14:00:16 +0100 Subject: ARM: KVM: fix L_PTE_S2_RDWR to actually be Read/Write Looks like our L_PTE_S2_RDWR definition is slightly wrong, and is actually write only (see ARM ARM Table B3-9, Stage 2 control of access permissions). Didn't make a difference for normal pages, as we OR the flags together, but I'm still wondering how it worked for Stage-2 mapped devices, such as the GIC. Brown paper bag time, again. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 6ef8afd1b64c..86b8fe398b95 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -111,7 +111,7 @@ #define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_MT_WRITEBACK (_AT(pteval_t, 0xf) << 2) /* MemAttr[3:0] */ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ -#define L_PTE_S2_RDWR (_AT(pteval_t, 2) << 6) /* HAP[2:1] */ +#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ /* * Hyp-mode PL2 PTE definitions for LPAE. -- cgit v1.2.3 From 8c58bf3eec3b8fc8162fe557e9361891c20758f2 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 17 Apr 2013 01:00:53 +0200 Subject: x86,efi: Implement efi_no_storage_paranoia parameter Using this parameter one can disable the storage_size/2 check if he is really sure that the UEFI does sane gc and fulfills the spec. This parameter is useful if a devices uses more than 50% of the storage by default. The Intel DQSW67 desktop board is such a sucker for exmaple. Signed-off-by: Richard Weinberger Signed-off-by: Matt Fleming --- Documentation/kernel-parameters.txt | 6 ++++++ arch/x86/platform/efi/efi.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..d1cc3a9fa14f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -788,6 +788,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. edd= [EDD] Format: {"off" | "on" | "skip[mbr]"} + efi_no_storage_paranoia [EFI; X86] + Using this parameter you can use more than 50% of + your efi variable storage. Use this parameter only if + you are really sure that your UEFI does sane gc and + fulfills the spec otherwise your board may brick. + eisa_irq_edge= [PARISC,HW] See header of drivers/parisc/eisa.c. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4f364c7c6111..e4a86a677ce1 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -113,6 +113,15 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); +static bool efi_no_storage_paranoia; + +static int __init setup_storage_paranoia(char *arg) +{ + efi_no_storage_paranoia = true; + return 0; +} +early_param("efi_no_storage_paranoia", setup_storage_paranoia); + static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { @@ -1137,7 +1146,10 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) */ if (!storage_size || size > remaining_size || - (max_size && size > max_size) || + (max_size && size > max_size)) + return EFI_OUT_OF_RESOURCES; + + if (!efi_no_storage_paranoia && ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && (remaining_size - size < storage_size / 2))) return EFI_OUT_OF_RESOURCES; -- cgit v1.2.3 From 4f2e29031e6c67802e7370292dd050fd62f337ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Apr 2013 08:46:19 -0700 Subject: s390: move dummy io_remap_pfn_range() to asm/pgtable.h Commit b4cbb197c7e7 ("vm: add vm_iomap_memory() helper function") added a helper function wrapper around io_remap_pfn_range(), and every other architecture defined it in . The s390 choice of may make sense, but is not very convenient for this case, and gratuitous differences like that cause unexpected errors like this: mm/memory.c: In function 'vm_iomap_memory': mm/memory.c:2439:2: error: implicit declaration of function 'io_remap_pfn_range' [-Werror=implicit-function-declaration] Glory be the kbuild test robot who noticed this, bisected it, and reported it to the guilty parties (ie me). Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Linus Torvalds --- arch/s390/include/asm/io.h | 4 ---- arch/s390/include/asm/pgtable.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 27cb32185ce1..379d96e2105e 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -50,10 +50,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr); #define ioremap_nocache(addr, size) ioremap(addr, size) #define ioremap_wc ioremap_nocache -/* TODO: s390 cannot support io_remap_pfn_range... */ -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - static inline void __iomem *ioremap(unsigned long offset, unsigned long size) { return (void __iomem *) offset; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4a5443118cfb..3cb47cf02530 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -57,6 +57,10 @@ extern unsigned long zero_page_mask; (((unsigned long)(vaddr)) &zero_page_mask)))) #define __HAVE_COLOR_ZERO_PAGE +/* TODO: s390 cannot support io_remap_pfn_range... */ +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #endif /* !__ASSEMBLY__ */ /* -- cgit v1.2.3 From f5d6a1441a5045824f36ff7c6b6bbae0373472a6 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Wed, 3 Apr 2013 22:28:41 +0100 Subject: ARM: 7692/1: iop3xx: move IOP3XX_PERIPHERAL_VIRT_BASE Currently IOP3XX_PERIPHERAL_VIRT_BASE conflicts with PCI_IO_VIRT_BASE: address size PCI_IO_VIRT_BASE 0xfee00000 0x200000 IOP3XX_PERIPHERAL_VIRT_BASE 0xfeffe000 0x2000 Fix by moving IOP3XX_PERIPHERAL_VIRT_BASE below PCI_IO_VIRT_BASE. The patch fixes the following kernel panic with 3.9-rc1 on iop3xx boards: [ 0.000000] Booting Linux on physical CPU 0x0 [ 0.000000] Initializing cgroup subsys cpu [ 0.000000] Linux version 3.9.0-rc1-iop32x (aaro@blackmetal) (gcc version 4.7.2 (GCC) ) #20 PREEMPT Tue Mar 5 16:44:36 EET 2013 [ 0.000000] bootconsole [earlycon0] enabled [ 0.000000] ------------[ cut here ]------------ [ 0.000000] kernel BUG at mm/vmalloc.c:1145! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT ARM [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 Not tainted (3.9.0-rc1-iop32x #20) [ 0.000000] PC is at vm_area_add_early+0x4c/0x88 [ 0.000000] LR is at add_static_vm_early+0x14/0x68 [ 0.000000] pc : [] lr : [] psr: 800000d3 [ 0.000000] sp : c03ffee4 ip : dfffdf88 fp : c03ffef4 [ 0.000000] r10: 00000002 r9 : 000000cf r8 : 00000653 [ 0.000000] r7 : c040eca8 r6 : c03e2408 r5 : dfffdf60 r4 : 00200000 [ 0.000000] r3 : dfffdfd8 r2 : feffe000 r1 : ff000000 r0 : dfffdf60 [ 0.000000] Flags: Nzcv IRQs off FIQs off Mode SVC_32 ISA ARM Segment kernel [ 0.000000] Control: 0000397f Table: a0004000 DAC: 00000017 [ 0.000000] Process swapper (pid: 0, stack limit = 0xc03fe1b8) [ 0.000000] Stack: (0xc03ffee4 to 0xc0400000) [ 0.000000] fee0: 00200000 c03fff0c c03ffef8 c03e1c40 c03e7468 00200000 fee00000 [ 0.000000] ff00: c03fff2c c03fff10 c03e23e4 c03e1c38 feffe000 c0408ee4 ff000000 c0408f04 [ 0.000000] ff20: c03fff3c c03fff30 c03e2434 c03e23b4 c03fff84 c03fff40 c03e2c94 c03e2414 [ 0.000000] ff40: c03f8878 c03f6410 ffff0000 000bffff 00001000 00000008 c03fff84 c03f6410 [ 0.000000] ff60: c04227e8 c03fffd4 a0008000 c03f8878 69052e30 c02f96eb c03fffbc c03fff88 [ 0.000000] ff80: c03e044c c03e268c 00000000 0000397f c0385130 00000001 ffffffff c03f8874 [ 0.000000] ffa0: dfffffff a0004000 69052e30 a03f61a0 c03ffff4 c03fffc0 c03dd5cc c03e0184 [ 0.000000] ffc0: 00000000 00000000 00000000 00000000 00000000 c03f8878 0000397d c040601c [ 0.000000] ffe0: c03f8874 c0408674 00000000 c03ffff8 a0008040 c03dd558 00000000 00000000 [ 0.000000] Backtrace: [ 0.000000] [] (vm_area_add_early+0x0/0x88) from [] (add_static_vm_early+0x14/0x68) Tested-by: Mikael Pettersson Signed-off-by: Aaro Koskinen Signed-off-by: Russell King --- arch/arm/include/asm/hardware/iop3xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/hardware/iop3xx.h b/arch/arm/include/asm/hardware/iop3xx.h index 02fe2fbe2477..ed94b1a366ae 100644 --- a/arch/arm/include/asm/hardware/iop3xx.h +++ b/arch/arm/include/asm/hardware/iop3xx.h @@ -37,7 +37,7 @@ extern int iop3xx_get_init_atu(void); * IOP3XX processor registers */ #define IOP3XX_PERIPHERAL_PHYS_BASE 0xffffe000 -#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfeffe000 +#define IOP3XX_PERIPHERAL_VIRT_BASE 0xfedfe000 #define IOP3XX_PERIPHERAL_SIZE 0x00002000 #define IOP3XX_PERIPHERAL_UPPER_PA (IOP3XX_PERIPHERAL_PHYS_BASE +\ IOP3XX_PERIPHERAL_SIZE - 1) -- cgit v1.2.3 From de40614e92bf1b0308d953387b0cb9d3a5710186 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 5 Apr 2013 03:16:51 +0100 Subject: ARM: 7694/1: ARM, TCM: initialize TCM in paging_init(), instead of setup_arch() tcm_init() call iotable_init() and it use early_alloc variants which do memblock allocation. Directly using memblock allocation after initializing bootmem should not permitted, because bootmem can't know where are additinally reserved. So move tcm_init() to a safe place before initalizing bootmem. (On the U300) Tested-by: Linus Walleij Signed-off-by: Joonsoo Kim Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 --- arch/arm/kernel/tcm.c | 1 - arch/arm/kernel/tcm.h | 17 ----------------- arch/arm/mm/mmu.c | 2 ++ arch/arm/mm/tcm.h | 17 +++++++++++++++++ 5 files changed, 19 insertions(+), 21 deletions(-) delete mode 100644 arch/arm/kernel/tcm.h create mode 100644 arch/arm/mm/tcm.h (limited to 'arch') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d343a6c3a6d1..234e339196c0 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -56,7 +56,6 @@ #include #include "atags.h" -#include "tcm.h" #if defined(CONFIG_FPE_NWFPE) || defined(CONFIG_FPE_FASTFPE) @@ -798,8 +797,6 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); - tcm_init(); - #ifdef CONFIG_MULTI_IRQ_HANDLER handle_arch_irq = mdesc->handle_irq; #endif diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 30ae6bb4a310..f50f19e5c138 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -17,7 +17,6 @@ #include #include #include -#include "tcm.h" static struct gen_pool *tcm_pool; static bool dtcm_present; diff --git a/arch/arm/kernel/tcm.h b/arch/arm/kernel/tcm.h deleted file mode 100644 index 8015ad434a40..000000000000 --- a/arch/arm/kernel/tcm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2008-2009 ST-Ericsson AB - * License terms: GNU General Public License (GPL) version 2 - * TCM memory handling for ARM systems - * - * Author: Linus Walleij - * Author: Rickard Andersson - */ - -#ifdef CONFIG_HAVE_TCM -void __init tcm_init(void); -#else -/* No TCM support, just blank inlines to be optimized out */ -inline void tcm_init(void) -{ -} -#endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 78978945492a..a84ff763ac39 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -34,6 +34,7 @@ #include #include "mm.h" +#include "tcm.h" /* * empty_zero_page is a special page that is used for @@ -1277,6 +1278,7 @@ void __init paging_init(struct machine_desc *mdesc) dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); + tcm_init(); top_pmd = pmd_off_k(0xffff0000); diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h new file mode 100644 index 000000000000..8015ad434a40 --- /dev/null +++ b/arch/arm/mm/tcm.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2008-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * TCM memory handling for ARM systems + * + * Author: Linus Walleij + * Author: Rickard Andersson + */ + +#ifdef CONFIG_HAVE_TCM +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +inline void tcm_init(void) +{ +} +#endif -- cgit v1.2.3 From cd272d1ea71583170e95dde02c76166c7f9017e6 Mon Sep 17 00:00:00 2001 From: Illia Ragozin Date: Wed, 10 Apr 2013 19:43:34 +0100 Subject: ARM: 7696/1: Fix kexec by setting outer_cache.inv_all for Feroceon On Feroceon the L2 cache becomes non-coherent with the CPU when the L1 caches are disabled. Thus the L2 needs to be invalidated after both L1 caches are disabled. On kexec before the starting the code for relocation the kernel, the L1 caches are disabled in cpu_froc_fin (cpu_v7_proc_fin for Feroceon), but after L2 cache is never invalidated, because inv_all is not set in cache-feroceon-l2.c. So kernel relocation and decompression may has (and usually has) errors. Setting the function enables L2 invalidation and fixes the issue. Cc: Signed-off-by: Illia Ragozin Acked-by: Jason Cooper Signed-off-by: Russell King --- arch/arm/mm/cache-feroceon-l2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index dd3d59122cc3..48bc3c0a87ce 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -343,6 +343,7 @@ void __init feroceon_l2_init(int __l2_wt_override) outer_cache.inv_range = feroceon_l2_inv_range; outer_cache.clean_range = feroceon_l2_clean_range; outer_cache.flush_range = feroceon_l2_flush_range; + outer_cache.inv_all = l2_inv_all; enable_l2(); -- cgit v1.2.3 From 50acff3c1f9ee9753684e676929b82926f15966c Mon Sep 17 00:00:00 2001 From: Bastian Hecht Date: Fri, 12 Apr 2013 19:03:50 +0100 Subject: ARM: 7697/1: hw_breakpoint: do not use __cpuinitdata for dbg_cpu_pm_nb We must not declare dbg_cpu_pm_nb as __cpuinitdata as we need it after system initialization for Suspend and CPUIdle. This was done in commit 9a6eb310eaa5 ("ARM: hw_breakpoint: Debug powerdown support for self-hosted debug"). Cc: stable@vger.kernel.org Cc: Dietmar Eggemann Signed-off-by: Bastian Hecht Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 5dc1aa6f0f7d..1fd749ee4a1b 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -1043,7 +1043,7 @@ static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata dbg_cpu_pm_nb = { +static struct notifier_block dbg_cpu_pm_nb = { .notifier_call = dbg_cpu_pm_notify, }; -- cgit v1.2.3 From cb2d8b342aa084d1f3ac29966245dec9163677fb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 12 Apr 2013 19:04:19 +0100 Subject: ARM: 7698/1: perf: fix group validation when using enable_on_exec Events may be created with attr->disabled == 1 and attr->enable_on_exec == 1, which confuses the group validation code because events with the PERF_EVENT_STATE_OFF are not considered candidates for scheduling, which may lead to failure at group scheduling time. This patch fixes the validation check for ARM, so that events in the OFF state are still considered when enable_on_exec is true. Cc: stable@vger.kernel.org Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Reported-by: Sudeep KarkadaNagesha Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 146157dfe27c..8c3094d0f7b7 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -253,7 +253,10 @@ validate_event(struct pmu_hw_events *hw_events, struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct pmu *leader_pmu = event->group_leader->pmu; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, event) >= 0; -- cgit v1.2.3 From c729de8fcea37a1c444e81857eace12494c804a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:45 -0700 Subject: x86, kdump: Set crashkernel_low automatically Chao said that kdump does does work well on his system on 3.8 without extra parameter, even iommu does not work with kdump. And now have to append crashkernel_low=Y in first kernel to make kdump work. We have now modified crashkernel=X to allocate memory beyong 4G (if available) and do not allocate low range for crashkernel if the user does not specify that with crashkernel_low=Y. This causes regression if iommu is not enabled. Without iommu, swiotlb needs to be setup in first 4G and there is no low memory available to second kernel. Set crashkernel_low automatically if the user does not specify that. For system that does support IOMMU with kdump properly, user could specify crashkernel_low=0 to save that 72M low ram. -v3: add swiotlb_size() according to Konrad. -v4: add comments what 8M is for according to hpa. also update more crashkernel_low= in kernel-parameters.txt -v5: update changelog according to Vivek. -v6: Change description about swiotlb referring according to HATAYAMA. Reported-by: WANG Chao Tested-by: WANG Chao Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-2-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 14 +++++++++++--- arch/x86/kernel/setup.c | 21 ++++++++++++++++++--- include/linux/swiotlb.h | 1 + lib/swiotlb.c | 19 +++++++++++++++---- 4 files changed, 45 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..cff672da2486 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -596,9 +596,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is selected automatically. Check Documentation/kdump/kdump.txt for further details. - crashkernel_low=size[KMG] - [KNL, x86] parts under 4G. - crashkernel=range1:size1[,range2:size2,...][@offset] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is @@ -606,6 +603,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_low=size[KMG] + [KNL, x86_64] range under 4G. When crashkernel= is + passed, kernel allocate physical memory region + above 4G, that cause second kernel crash on system + that require some amount of low memory, e.g. swiotlb + requires at least 64M+32K low memory. Kernel would + try to allocate 72M below 4G automatically. + This one let user to specify own low range under 4G + for second kernel instead. + 0: to disable low allocation. + cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 90d8cc930f5e..12349202cae7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -521,19 +521,34 @@ static void __init reserve_crashkernel_low(void) unsigned long long low_base = 0, low_size = 0; unsigned long total_low_mem; unsigned long long base; + bool auto_set = false; int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); - if (ret != 0 || low_size <= 0) - return; + if (ret != 0) { + /* + * two parts from lib/swiotlb.c: + * swiotlb size: user specified with swiotlb= or default. + * swiotlb overflow buffer: now is hardcoded to 32k. + * We round it to 8M for other buffers that + * may need to stay low too. + */ + low_size = swiotlb_size_or_default() + (8UL<<20); + auto_set = true; + } else { + /* passed with crashkernel_low=0 ? */ + if (!low_size) + return; + } low_base = memblock_find_in_range(low_size, (1ULL<<32), low_size, alignment); if (!low_base) { - pr_info("crashkernel low reservation failed - No suitable area found.\n"); + if (!auto_set) + pr_info("crashkernel low reservation failed - No suitable area found.\n"); return; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 2de42f9401d2..a5ffd32642fd 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -25,6 +25,7 @@ extern int swiotlb_force; extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); +unsigned long swiotlb_size_or_default(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); /* diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bfe02b8fc55b..d23762e6652c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -105,9 +105,9 @@ setup_io_tlb_npages(char *str) if (!strcmp(str, "force")) swiotlb_force = 1; - return 1; + return 0; } -__setup("swiotlb=", setup_io_tlb_npages); +early_param("swiotlb", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ unsigned long swiotlb_nr_tbl(void) @@ -115,6 +115,18 @@ unsigned long swiotlb_nr_tbl(void) return io_tlb_nslabs; } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); + +/* default to 64MB */ +#define IO_TLB_DEFAULT_SIZE (64UL<<20) +unsigned long swiotlb_size_or_default(void) +{ + unsigned long size; + + size = io_tlb_nslabs << IO_TLB_SHIFT; + + return size ? size : (IO_TLB_DEFAULT_SIZE); +} + /* Note that this doesn't work with highmem page */ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, volatile void *address) @@ -188,8 +200,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) void __init swiotlb_init(int verbose) { - /* default to 64MB */ - size_t default_size = 64UL<<20; + size_t default_size = IO_TLB_DEFAULT_SIZE; unsigned char *vstart; unsigned long bytes; -- cgit v1.2.3 From 55a20ee7804ab64ac90bcdd4e2868a42829e2784 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:47 -0700 Subject: x86, kdump: Retore crashkernel= to allocate under 896M Vivek found old kexec-tools does not work new kernel anymore. So change back crashkernel= back to old behavoir, and add crashkernel_high= to let user decide if buffer could be above 4G, and also new kexec-tools will be needed. -v2: let crashkernel=X override crashkernel_high= update description about _high will be ignored by crashkernel=X -v3: update description about kernel-parameters.txt according to Vivek. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-4-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 13 +++++++++++-- arch/x86/kernel/setup.c | 24 +++++++++++++++++++----- include/linux/kexec.h | 2 ++ kernel/kexec.c | 9 +++++++++ 4 files changed, 41 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cff672da2486..709eb3edc6b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,9 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. + crashkernel_high=size[KMG] + [KNL, x86_64] range could be above 4G. Allow kernel + to allocate physical memory region from top, so could + be above 4G if system have more than 4G ram installed. + Otherwise memory region will be allocated below 4G, if + available. + It will be ignored if crashkernel=X is specified. crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel= is - passed, kernel allocate physical memory region + [KNL, x86_64] range under 4G. When crashkernel_high= is + passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -613,6 +620,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. + It will be ignored when crashkernel_high=X is not used + or memory reserved is below 4G. cs89x0_dma= [HW,NET] Format: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 12349202cae7..a85a144f2052 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -507,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. + * On 64bit, old kexec-tools need to under 896MiB. */ #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) +# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_MAX MAXMEM +# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) +# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM #endif static void __init reserve_crashkernel_low(void) @@ -525,6 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + /* crashkernel_low=YM */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -569,14 +573,22 @@ static void __init reserve_crashkernel(void) const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; + bool high = false; int ret; total_mem = memblock_phys_mem_size(); + /* crashkernel=XM */ ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret != 0 || crash_size <= 0) - return; + if (ret != 0 || crash_size <= 0) { + /* crashkernel_high=XM */ + ret = parse_crashkernel_high(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret != 0 || crash_size <= 0) + return; + high = true; + } /* 0 means: find the address automatically */ if (crash_base <= 0) { @@ -584,7 +596,9 @@ static void __init reserve_crashkernel(void) * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ crash_base = memblock_find_in_range(alignment, - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); + high ? CRASH_KERNEL_ADDR_HIGH_MAX : + CRASH_KERNEL_ADDR_LOW_MAX, + crash_size, alignment); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d2e6927bbaae..d78d28a733b1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -200,6 +200,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b6..1b2f73f5f9b9 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1422,6 +1422,15 @@ int __init parse_crashkernel(char *cmdline, "crashkernel="); } +int __init parse_crashkernel_high(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel_high="); +} + int __init parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, -- cgit v1.2.3 From adbc742bf78695bb98c79d18c558b61571748b99 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 15 Apr 2013 22:23:48 -0700 Subject: x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low Per hpa, use crashkernel=X,high crashkernel=Y,low instead of crashkernel_hign=X crashkernel_low=Y. As that could be extensible. -v2: according to Vivek, change delimiter to ; -v3: let hign and low only handle simple form and it conforms to description in kernel-parameters.txt still keep crashkernel=X override any crashkernel=X,high crashkernel=Y,low -v4: update get_last_crashkernel returning and add more strict checking in parse_crashkernel_simple() found by HATAYAMA. -v5: Change delimiter back to , according to HPA. also separate parse_suffix from parse_simper according to vivek. so we can avoid @pos in that path. -v6: Tight the checking about crashkernel=X,highblahblah,high found by HTYAYAMA. Cc: HATAYAMA Daisuke Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1366089828-19692-5-git-send-email-yinghai@kernel.org Acked-by: Vivek Goyal Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 10 ++-- arch/x86/kernel/setup.c | 6 +- kernel/kexec.c | 109 +++++++++++++++++++++++++++++++----- 3 files changed, 104 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 709eb3edc6b2..a1ac1f1d6d34 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -603,16 +603,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a memory unit (amount[KMG]). See also Documentation/kdump/kdump.txt for an example. - crashkernel_high=size[KMG] + crashkernel=size[KMG],high [KNL, x86_64] range could be above 4G. Allow kernel to allocate physical memory region from top, so could be above 4G if system have more than 4G ram installed. Otherwise memory region will be allocated below 4G, if available. It will be ignored if crashkernel=X is specified. - crashkernel_low=size[KMG] - [KNL, x86_64] range under 4G. When crashkernel_high= is - passed, kernel could allocate physical memory region + crashkernel=size[KMG],low + [KNL, x86_64] range under 4G. When crashkernel=X,high + is passed, kernel could allocate physical memory region above 4G, that cause second kernel crash on system that require some amount of low memory, e.g. swiotlb requires at least 64M+32K low memory. Kernel would @@ -620,7 +620,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. This one let user to specify own low range under 4G for second kernel instead. 0: to disable low allocation. - It will be ignored when crashkernel_high=X is not used + It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. cs89x0_dma= [HW,NET] diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a85a144f2052..fae9134a2de9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -528,7 +528,7 @@ static void __init reserve_crashkernel_low(void) int ret; total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); - /* crashkernel_low=YM */ + /* crashkernel=Y,low */ ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); if (ret != 0) { @@ -542,7 +542,7 @@ static void __init reserve_crashkernel_low(void) low_size = swiotlb_size_or_default() + (8UL<<20); auto_set = true; } else { - /* passed with crashkernel_low=0 ? */ + /* passed with crashkernel=0,low ? */ if (!low_size) return; } @@ -582,7 +582,7 @@ static void __init reserve_crashkernel(void) ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { - /* crashkernel_high=XM */ + /* crashkernel=X,high */ ret = parse_crashkernel_high(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) diff --git a/kernel/kexec.c b/kernel/kexec.c index 1b2f73f5f9b9..401fdb041f35 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline, return 0; } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW 1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { + [SUFFIX_HIGH] = ",high", + [SUFFIX_LOW] = ",low", + [SUFFIX_NULL] = NULL, +}; + /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix" crashkernel command lines like + * + * crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure. */ +static int __init parse_crashkernel_suffix(char *cmdline, + unsigned long long *crash_size, + unsigned long long *crash_base, + const char *suffix) +{ + char *cur = cmdline; + + *crash_size = memparse(cmdline, &cur); + if (cmdline == cur) { + pr_warn("crashkernel: memory value expected\n"); + return -EINVAL; + } + + /* check with suffix */ + if (strncmp(cur, suffix, strlen(suffix))) { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + cur += strlen(suffix); + if (*cur != ' ' && *cur != '\0') { + pr_warn("crashkernel: unrecognized char\n"); + return -EINVAL; + } + + return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, + const char *name, + const char *suffix) +{ + char *p = cmdline, *ck_cmdline = NULL; + + /* find crashkernel and use the last one if there are more */ + p = strstr(p, name); + while (p) { + char *end_p = strchr(p, ' '); + char *q; + + if (!end_p) + end_p = p + strlen(p); + + if (!suffix) { + int i; + + /* skip the one with any known suffix */ + for (i = 0; suffix_tbl[i]; i++) { + q = end_p - strlen(suffix_tbl[i]); + if (!strncmp(q, suffix_tbl[i], + strlen(suffix_tbl[i]))) + goto next; + } + ck_cmdline = p; + } else { + q = end_p - strlen(suffix); + if (!strncmp(q, suffix, strlen(suffix))) + ck_cmdline = p; + } +next: + p = strstr(p+1, name); + } + + if (!ck_cmdline) + return NULL; + + return ck_cmdline; +} + static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, - const char *name) + const char *name, + const char *suffix) { - char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; + char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; - /* find crashkernel and use the last one if there are more */ - p = strstr(p, name); - while (p) { - ck_cmdline = p; - p = strstr(p+1, name); - } + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); + if (suffix) + return parse_crashkernel_suffix(ck_cmdline, crash_size, + crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,17 @@ static int __init __parse_crashkernel(char *cmdline, return 0; } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel="); + "crashkernel=", NULL); } int __init parse_crashkernel_high(char *cmdline, @@ -1428,7 +1511,7 @@ int __init parse_crashkernel_high(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_high="); + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, @@ -1437,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, - "crashkernel_low="); + "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) -- cgit v1.2.3 From 15bbc1b28ff65767922f78c266821cc138b90a47 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Apr 2013 12:09:09 -0700 Subject: ARM: KVM: fix unbalanced get_cpu() in access_dcsw In the very unlikely event where a guest would be foolish enough to *read* from a write-only cache maintainance register, we end up with preemption disabled, due to a misplaced get_cpu(). Just move the "is_write" test outside of the critical section. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Linus Torvalds --- arch/arm/kvm/coproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4ea9a982269c..7bed7556077a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -79,11 +79,11 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, u32 val; int cpu; - cpu = get_cpu(); - if (!p->is_write) return read_from_write_only(vcpu, p); + cpu = get_cpu(); + cpumask_setall(&vcpu->arch.require_dcache_flush); cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); -- cgit v1.2.3 From 472d326677db37625903265428582694394d2df7 Mon Sep 17 00:00:00 2001 From: Josh Wu Date: Wed, 17 Apr 2013 15:58:31 -0700 Subject: avr32: fix build error in atstk1006_defconfig fixed the following compile error when use avr32 atstk1006_defconfig: drivers/mtd/nand/atmel_nand.c: In function 'pmecc_err_location': drivers/mtd/nand/atmel_nand.c:639: error: implicit declaration of function 'writel_relaxed' which was introduced by commit 1c7b874d33b4 ("mtd: at91: atmel_nand: add Programmable Multibit ECC controller support"). The PMECC for nand flash code uses writel_relaxed(). But in avr32, there is no macro "writel_relaxed" defined. This patch add writex_relaxed macro definitions. Signed-off-by: Josh Wu Acked-by: Havard Skinnemoen Acked-by: Hans-Christian Egtvedt Cc: David Woodhouse Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/include/asm/io.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/avr32/include/asm/io.h b/arch/avr32/include/asm/io.h index cf60d0a9f176..fc6483f83ccc 100644 --- a/arch/avr32/include/asm/io.h +++ b/arch/avr32/include/asm/io.h @@ -165,6 +165,10 @@ BUILDIO_IOPORT(l, u32) #define readw_be __raw_readw #define readl_be __raw_readl +#define writeb_relaxed writeb +#define writew_relaxed writew +#define writel_relaxed writel + #define writeb_be __raw_writeb #define writew_be __raw_writew #define writel_be __raw_writel -- cgit v1.2.3 From 7eff7ded02d1b15ba8321664839b353fa6c0c1e4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 18 Apr 2013 08:44:46 -0700 Subject: x86, hyperv: Handle Xen emulation of Hyper-V more gracefully Install the Hyper-V specific interrupt handler only when needed. This would permit us to get rid of the Xen check. Note that when the vmbus drivers invokes the call to register its handler, we are sure to be running on Hyper-V. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1366299886-6399-1-git-send-email-kys@microsoft.com Acked-by: Michael S. Tsirkin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a7d26d83fb70..8f4be53ea04b 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - /* - * Xen emulates Hyper-V to support enlightened Windows. - * Check to see first if we are on a Xen Hypervisor. - */ - if (xen_cpuid_base()) - return false; - cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); @@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); -#if IS_ENABLED(CONFIG_HYPERV) - /* - * Setup the IDT for hypervisor callback. - */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); -#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr; void hv_register_vmbus_handler(int irq, irq_handler_t handler) { + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + vmbus_irq = irq; vmbus_isr = handler; } -- cgit v1.2.3 From 73053d973dd6f56472309cffa5a5d15a62dd6f96 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 17 Apr 2013 10:46:52 -0500 Subject: ARM: highbank: fix cache flush ordering for cpu hotplug The L1 data cache flush needs to be after highbank_set_cpu_jump call which pollutes the cache with the l2x0_lock. This causes other cores to deadlock waiting for the l2x0_lock. Moving the flush of the entire data cache after highbank_set_cpu_jump fixes the problem. Use flush_cache_louis instead of flush_cache_all are that is sufficient to flush only the L1 data cache. flush_cache_louis did not exist when highbank_cpu_die was originally written. With PL310 errata 769419 enabled, a wmb is inserted into idle which takes the l2x0_lock. This makes the problem much more easily hit and causes reset to hang. Reported-by: Paolo Pisati Signed-off-by: Rob Herring Signed-off-by: Olof Johansson --- arch/arm/mach-highbank/hotplug.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-highbank/hotplug.c b/arch/arm/mach-highbank/hotplug.c index f30c52843396..890cae23c12a 100644 --- a/arch/arm/mach-highbank/hotplug.c +++ b/arch/arm/mach-highbank/hotplug.c @@ -28,13 +28,11 @@ extern void secondary_startup(void); */ void __ref highbank_cpu_die(unsigned int cpu) { - flush_cache_all(); - highbank_set_cpu_jump(cpu, phys_to_virt(0)); - highbank_set_core_pwr(); - cpu_do_idle(); + flush_cache_louis(); + highbank_set_core_pwr(); - /* We should never return from idle */ - panic("highbank: cpu %d unexpectedly exit from shutdown\n", cpu); + while (1) + cpu_do_idle(); } -- cgit v1.2.3 From cea15092f098b7018e89f64a5a14bb71955965d5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 18 Apr 2013 17:33:40 +0100 Subject: ARM: 7699/1: sched_clock: Add more notrace to prevent recursion cyc_to_sched_clock() is called by sched_clock() and cyc_to_ns() is called by cyc_to_sched_clock(). I suspect that some compilers inline both of these functions into sched_clock() and so we've been getting away without having a notrace marking. It seems that my compiler isn't inlining cyc_to_sched_clock() though, so I'm hitting a recursion bug when I enable the function graph tracer, causing my system to crash. Marking these functions notrace fixes it. Technically cyc_to_ns() doesn't need the notrace because it's already marked inline, but let's just add it so that if we ever remove inline from that function it doesn't blow up. Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/sched_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c index bd6f56b9ec21..59d2adb764a9 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/arch/arm/kernel/sched_clock.c @@ -45,12 +45,12 @@ static u32 notrace jiffy_sched_clock_read(void) static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; -static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { return (cyc * mult) >> shift; } -static unsigned long long cyc_to_sched_clock(u32 cyc, u32 mask) +static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) { u64 epoch_ns; u32 epoch_cyc; -- cgit v1.2.3 From f36391d2790d04993f48da6a45810033a2cdf847 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 19 Apr 2013 17:26:26 -0400 Subject: sparc64: Fix race in TLB batch processing. As reported by Dave Kleikamp, when we emit cross calls to do batched TLB flush processing we have a race because we do not synchronize on the sibling cpus completing the cross call. So meanwhile the TLB batch can be reset (tb->tlb_nr set to zero, etc.) and either flushes are missed or flushes will flush the wrong addresses. Fix this by using generic infrastructure to synchonize on the completion of the cross call. This first required getting the flush_tlb_pending() call out from switch_to() which operates with locks held and interrupts disabled. The problem is that smp_call_function_many() cannot be invoked with IRQs disabled and this is explicitly checked for with WARN_ON_ONCE(). We get the batch processing outside of locked IRQ disabled sections by using some ideas from the powerpc port. Namely, we only batch inside of arch_{enter,leave}_lazy_mmu_mode() calls. If we're not in such a region, we flush TLBs synchronously. 1) Get rid of xcall_flush_tlb_pending and per-cpu type implementations. 2) Do TLB batch cross calls instead via: smp_call_function_many() tlb_pending_func() __flush_tlb_pending() 3) Batch only in lazy mmu sequences: a) Add 'active' member to struct tlb_batch b) Define __HAVE_ARCH_ENTER_LAZY_MMU_MODE c) Set 'active' in arch_enter_lazy_mmu_mode() d) Run batch and clear 'active' in arch_leave_lazy_mmu_mode() e) Check 'active' in tlb_batch_add_one() and do a synchronous flush if it's clear. 4) Add infrastructure for synchronous TLB page flushes. a) Implement __flush_tlb_page and per-cpu variants, patch as needed. b) Likewise for xcall_flush_tlb_page. c) Implement smp_flush_tlb_page() to invoke the cross-call. d) Wire up global_flush_tlb_page() to the right routine based upon CONFIG_SMP 5) It turns out that singleton batches are very common, 2 out of every 3 batch flushes have only a single entry in them. The batch flush waiting is very expensive, both because of the poll on sibling cpu completeion, as well as because passing the tlb batch pointer to the sibling cpus invokes a shared memory dereference. Therefore, in flush_tlb_pending(), if there is only one entry in the batch perform a completely asynchronous global_flush_tlb_page() instead. Reported-by: Dave Kleikamp Signed-off-by: David S. Miller Acked-by: Dave Kleikamp --- arch/sparc/include/asm/pgtable_64.h | 1 + arch/sparc/include/asm/switch_to_64.h | 3 +- arch/sparc/include/asm/tlbflush_64.h | 37 +++++++++-- arch/sparc/kernel/smp_64.c | 41 ++++++++++-- arch/sparc/mm/tlb.c | 38 +++++++++-- arch/sparc/mm/tsb.c | 57 +++++++++++----- arch/sparc/mm/ultra.S | 119 +++++++++++++++++++++++++++------- 7 files changed, 241 insertions(+), 55 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 08fcce90316b..7619f2f792af 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -915,6 +915,7 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot); } +#include #include /* We provide our own get_unmapped_area to cope with VA holes and diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index cad36f56fa03..c7de3323819c 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -18,8 +18,7 @@ do { \ * and 2 stores in this critical code path. -DaveM */ #define switch_to(prev, next, last) \ -do { flush_tlb_pending(); \ - save_and_clear_fpu(); \ +do { save_and_clear_fpu(); \ /* If you are tempted to conditionalize the following */ \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index 2ef463494153..f0d6a9700f4c 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -11,24 +11,40 @@ struct tlb_batch { struct mm_struct *mm; unsigned long tlb_nr; + unsigned long active; unsigned long vaddrs[TLB_BATCH_NR]; }; extern void flush_tsb_kernel_range(unsigned long start, unsigned long end); extern void flush_tsb_user(struct tlb_batch *tb); +extern void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr); /* TLB flush operations. */ -extern void flush_tlb_pending(void); +static inline void flush_tlb_mm(struct mm_struct *mm) +{ +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE -#define flush_tlb_range(vma,start,end) \ - do { (void)(start); flush_tlb_pending(); } while (0) -#define flush_tlb_page(vma,addr) flush_tlb_pending() -#define flush_tlb_mm(mm) flush_tlb_pending() +extern void flush_tlb_pending(void); +extern void arch_enter_lazy_mmu_mode(void); +extern void arch_leave_lazy_mmu_mode(void); +#define arch_flush_lazy_mmu_mode() do {} while (0) /* Local cpu only. */ extern void __flush_tlb_all(void); - +extern void __flush_tlb_page(unsigned long context, unsigned long vaddr); extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP @@ -38,15 +54,24 @@ do { flush_tsb_kernel_range(start,end); \ __flush_tlb_kernel_range(start,end); \ } while (0) +static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); +} + #else /* CONFIG_SMP */ extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); #define flush_tlb_kernel_range(start, end) \ do { flush_tsb_kernel_range(start,end); \ smp_flush_tlb_kernel_range(start, end); \ } while (0) +#define global_flush_tlb_page(mm, vaddr) \ + smp_flush_tlb_page(mm, vaddr) + #endif /* ! CONFIG_SMP */ #endif /* _SPARC64_TLBFLUSH_H */ diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 537eb66abd06..ca64d2a86ec0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -849,7 +849,7 @@ void smp_tsb_sync(struct mm_struct *mm) } extern unsigned long xcall_flush_tlb_mm; -extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_page; extern unsigned long xcall_flush_tlb_kernel_range; extern unsigned long xcall_fetch_glob_regs; extern unsigned long xcall_fetch_glob_pmu; @@ -1074,23 +1074,56 @@ local_flush_and_out: put_cpu(); } +struct tlb_pending_info { + unsigned long ctx; + unsigned long nr; + unsigned long *vaddrs; +}; + +static void tlb_pending_func(void *info) +{ + struct tlb_pending_info *t = info; + + __flush_tlb_pending(t->ctx, t->nr, t->vaddrs); +} + void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) { u32 ctx = CTX_HWBITS(mm->context); + struct tlb_pending_info info; int cpu = get_cpu(); + info.ctx = ctx; + info.nr = nr; + info.vaddrs = vaddrs; + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); else - smp_cross_call_masked(&xcall_flush_tlb_pending, - ctx, nr, (unsigned long) vaddrs, - mm_cpumask(mm)); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); put_cpu(); } +void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long context = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (mm == current->mm && atomic_read(&mm->mm_users) == 1) + cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); + else + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); + + put_cpu(); +} + void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index ba6ae7ffdc2c..272aa4f7657e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -24,11 +24,17 @@ static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); void flush_tlb_pending(void) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; - if (tb->tlb_nr) { - flush_tsb_user(tb); + if (!tb->tlb_nr) + goto out; - if (CTX_VALID(tb->mm->context)) { + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { #ifdef CONFIG_SMP smp_flush_tlb_pending(tb->mm, tb->tlb_nr, &tb->vaddrs[0]); @@ -37,12 +43,30 @@ void flush_tlb_pending(void) tb->tlb_nr, &tb->vaddrs[0]); #endif } - tb->tlb_nr = 0; } + tb->tlb_nr = 0; + +out: put_cpu_var(tlb_batch); } +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, bool exec) { @@ -60,6 +84,12 @@ static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, nr = 0; } + if (!tb->active) { + global_flush_tlb_page(mm, vaddr); + flush_tsb_user_page(mm, vaddr); + return; + } + if (nr == 0) tb->mm = mm; diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 428982b9becf..2cc3bce5ee91 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -7,11 +7,10 @@ #include #include #include -#include -#include -#include #include +#include #include +#include #include extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; @@ -46,23 +45,27 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) } } -static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, - unsigned long tsb, unsigned long nentries) +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) { - unsigned long i; + unsigned long tag, ent, hash; - for (i = 0; i < tb->tlb_nr; i++) { - unsigned long v = tb->vaddrs[i]; - unsigned long tag, ent, hash; + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); - v &= ~0x1UL; + tsb_flush(ent, tag); +} - hash = tsb_hash(v, hash_shift, nentries); - ent = tsb + (hash * sizeof(struct tsb)); - tag = (v >> 22UL); +static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, + unsigned long tsb, unsigned long nentries) +{ + unsigned long i; - tsb_flush(ent, tag); - } + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); } void flush_tsb_user(struct tlb_batch *tb) @@ -90,6 +93,30 @@ void flush_tsb_user(struct tlb_batch *tb) spin_unlock_irqrestore(&mm->context.lock, flags); } +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index f8e13d421fcb..432aa0cb1b38 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -52,6 +52,33 @@ __flush_tlb_mm: /* 18 insns */ nop nop + .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + .align 32 .globl __flush_tlb_pending __flush_tlb_pending: /* 26 insns */ @@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + __cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 @@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns */ retl nop +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 @@ -339,6 +405,13 @@ cheetah_patch_cachetlbops: call tlb_patch_one mov 19, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 @@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop - .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=context, %g1=nr, %g7=vaddrs[] */ - sllx %g1, 3, %g1 + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */ or %g5, %g4, %g5 mov PRIMARY_CONTEXT, %g4 stxa %g5, [%g4] ASI_DMMU -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %g5 - andcc %g5, 0x1, %g0 + andcc %g1, 0x1, %g0 be,pn %icc, 2f - - andn %g5, 0x1, %g5 + andn %g1, 0x1, %g5 stxa %g0, [%g5] ASI_IMMU_DEMAP 2: stxa %g0, [%g5] ASI_DMMU_DEMAP membar #Sync - brnz,pt %g1, 1b - nop stxa %g2, [%g4] ASI_DMMU retry nop + nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: /* 25 insns */ @@ -656,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ membar #Sync retry - .globl __hypervisor_xcall_flush_tlb_pending -__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ - sllx %g1, 3, %g1 + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 mov %o2, %g4 -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g1, %o0 /* ARG0: virtual address */ mov %g5, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ srlx %o0, PAGE_SHIFT, %o0 @@ -673,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /* 21 insns */ mov HV_MMU_UNMAP_ADDR_TRAP, %g6 brnz,a,pn %o0, __hypervisor_tlb_xcall_error mov %o0, %g5 - brnz,pt %g1, 1b - nop mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 @@ -757,6 +821,13 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 10, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 @@ -788,12 +859,12 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 21, %o2 - sethi %hi(xcall_flush_tlb_pending), %o0 - or %o0, %lo(xcall_flush_tlb_pending), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 21, %o2 + mov 17, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 -- cgit v1.2.3 From 74c3e3fcf350b2e7e3eaf9550528ee3f74e44b37 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 19 Apr 2013 16:36:03 -0700 Subject: x86, microcode: Verify the family before dispatching microcode patching For each CPU vendor that implements CPU microcode patching, there will be a minimum family for which this is implemented. Verify this minimum level of support. This can be done in the dispatch function or early in the application functions. Doing the latter turned out to be somewhat awkward because of the ineviable split between the BSP and the AP paths, and rather than pushing deep into the application functions, do this in the dispatch function. Reported-by: "Bryan O'Donoghue" Suggested-by: Borislav Petkov Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1366392183-4149-1-git-send-email-bryan.odonoghue.lkml@nexus-software.ie --- arch/x86/kernel/microcode_core_early.c | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c index 577db8417d15..833d51d6ee06 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/microcode_core_early.c @@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void) u32 eax = 0x00000000; u32 ebx, ecx = 0, edx; - if (!have_cpuid_p()) - return X86_VENDOR_UNKNOWN; - native_cpuid(&eax, &ebx, &ecx, &edx); if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) @@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void) return X86_VENDOR_UNKNOWN; } +static int __cpuinit x86_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + int x86; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + x86 = (eax >> 8) & 0xf; + if (x86 == 15) + x86 += (eax >> 20) & 0xff; + + return x86; +} + void __init load_ucode_bsp(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; - if (vendor == X86_VENDOR_INTEL) + vendor = x86_vendor(); + x86 = x86_family(); + + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_bsp(); } void __cpuinit load_ucode_ap(void) { - int vendor = x86_vendor(); + int vendor, x86; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + x86 = x86_family(); - if (vendor == X86_VENDOR_INTEL) + if (vendor == X86_VENDOR_INTEL && x86 >= 6) load_ucode_intel_ap(); } -- cgit v1.2.3