diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2012-06-11 20:11:29 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2012-06-11 20:11:29 +0200 |
commit | 924412f66fd9d21212e560a93792b0b607d46c6e (patch) | |
tree | 823ac1d1f21062c6ad45d52f7bb5ad0a475dd123 /arch | |
parent | timers: Improve get_next_timer_interrupt() (diff) | |
parent | nohz: Move next idle expiry time record into idle logic area (diff) | |
download | linux-924412f66fd9d21212e560a93792b0b607d46c6e.tar.xz linux-924412f66fd9d21212e560a93792b0b607d46c6e.zip |
Merge branch 'nohz-for-tip-2' of git://github.com/fweisbec/linux-dynticks into timers/core
Diffstat (limited to '')
30 files changed, 342 insertions, 182 deletions
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index dbc3850b1d0d..5707f1a62341 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -21,6 +21,7 @@ KBUILD_DEFCONFIG := default_defconfig NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 +LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) MACHINE := $(shell uname -m) ifeq ($(MACHINE),parisc*) @@ -79,7 +80,7 @@ kernel-y := mm/ kernel/ math-emu/ kernel-$(CONFIG_HPUX) += hpux/ core-y += $(addprefix arch/parisc/, $(kernel-y)) -libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name` +libs-y += arch/parisc/lib/ $(LIBGCC) drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 19a434f55059..4383707d9801 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,3 +1,4 @@ include include/asm-generic/Kbuild.asm header-y += pdc.h +generic-y += word-at-a-time.h diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 72cfdb0cfdd1..62a33338549c 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -1,6 +1,8 @@ #ifndef _PARISC_BUG_H #define _PARISC_BUG_H +#include <linux/kernel.h> /* for BUGFLAG_TAINT */ + /* * Tell the user there is some problem. * The offending file and line are encoded in the __bug_table section. diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 0b6d79617d7b..2e3200ca485f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 99a995c2a3f2..be171ee73bf8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -475,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs) struct pt_regs *old_regs; u64 *next_tb = &__get_cpu_var(decrementers_next_tb); struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -509,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs) irq_work_run(); } - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 7e1fef36bde6..e9c670d7a7fe 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -91,11 +91,6 @@ extern void smp_nap(void); /* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ extern void _cpu_idle(void); -/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ -extern void cpu_idle_on_new_stack(struct thread_info *old_ti, - unsigned long new_sp, - unsigned long new_ss10); - #else /* __ASSEMBLY__ */ /* diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 133c4b56a99e..c31637baff28 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -68,20 +68,6 @@ STD_ENTRY(KBacktraceIterator_init_current) jrp lr /* keep backtracer happy */ STD_ENDPROC(KBacktraceIterator_init_current) -/* - * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then - * free the old stack (passed in r0) and re-invoke cpu_idle(). - * We update sp and ksp0 simultaneously to avoid backtracer warnings. - */ -STD_ENTRY(cpu_idle_on_new_stack) - { - move sp, r1 - mtspr SPR_SYSTEM_SAVE_K_0, r2 - } - jal free_thread_info - j cpu_idle - STD_ENDPROC(cpu_idle_on_new_stack) - /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6098ccc59be2..dd87f3420390 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/smp.h> #include <linux/timex.h> #include <linux/hugetlb.h> +#include <linux/start_kernel.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 8bbea6aa40d9..efe5acfc79c3 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -94,10 +94,10 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct booting from floppy is no longer supported.\r\n" - .ascii "Please use a boot loader program instead.\r\n" + .ascii "Direct floppy boot is not supported. " + .ascii "Use a boot loader program instead.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot . . .\r\n" + .ascii "Remove disk and press any key to reboot ...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -111,7 +111,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 2 # nr_sections + .word 3 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -158,8 +158,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x1000 # SectionAlignment - .long 0x200 # FileAlignment + .long 0x20 # SectionAlignment + .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word 0 # MajorImageVersion @@ -200,8 +200,10 @@ extra_header_fields: # Section table section_table: - .ascii ".text" - .byte 0 + # + # The offset & size fields are filled in by build.c. + # + .ascii ".setup" .byte 0 .byte 0 .long 0 @@ -217,9 +219,8 @@ section_table: # # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. But since - # we don't need the loader to fixup any relocs for us, we - # just create an empty (zero-length) .reloc section header. + # because EFI applications must be relocatable. The .reloc + # offset & size fields are filled in by build.c. # .ascii ".reloc" .byte 0 @@ -233,6 +234,25 @@ section_table: .word 0 # NumberOfRelocations .word 0 # NumberOfLineNumbers .long 0x42100040 # Characteristics (section flags) + + # + # The offset & size fields are filled in by build.c. + # + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 3f61f6e2b46f..4b8e165ee572 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,6 +50,8 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; int is_big_kernel; +#define PECOFF_RELOC_RESERVE 0x20 + /*----------------------------------------------------------------------*/ static const u32 crctab32[] = { @@ -133,11 +135,103 @@ static void usage(void) die("Usage: build setup system [> image]"); } -int main(int argc, char ** argv) -{ #ifdef CONFIG_EFI_STUB - unsigned int file_sz, pe_header; + +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + unsigned int pe_header; + unsigned short num_sections; + u8 *section; + + pe_header = get_unaligned_le32(&buf[0x3c]); + num_sections = get_unaligned_le16(&buf[pe_header + 6]); + +#ifdef CONFIG_X86_32 + section = &buf[pe_header + 0xa8]; +#else + section = &buf[pe_header + 0xb8]; #endif + + while (num_sections > 0) { + if (strncmp((char*)section, section_name, 8) == 0) { + /* section header size field */ + put_unaligned_le32(size, section + 0x8); + + /* section header vma field */ + put_unaligned_le32(offset, section + 0xc); + + /* section header 'size of initialised data' field */ + put_unaligned_le32(size, section + 0x10); + + /* section header 'file offset' field */ + put_unaligned_le32(offset, section + 0x14); + + break; + } + section += 0x28; + num_sections--; + } +} + +static void update_pecoff_setup_and_reloc(unsigned int size) +{ + u32 setup_offset = 0x200; + u32 reloc_offset = size - PECOFF_RELOC_RESERVE; + u32 setup_size = reloc_offset - setup_offset; + + update_pecoff_section_header(".setup", setup_offset, setup_size); + update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); + + /* + * Modify .reloc section contents with a single entry. The + * relocation is applied to offset 10 of the relocation section. + */ + put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); + put_unaligned_le32(10, &buf[reloc_offset + 4]); +} + +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) +{ + unsigned int pe_header; + unsigned int text_sz = file_sz - text_start; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of image */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); + + /* + * Size of code: Subtract the size of the first sector (512 bytes) + * which includes the header. + */ + put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); + +#ifdef CONFIG_X86_32 + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation + */ + put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]); +#endif /* CONFIG_X86_32 */ + + update_pecoff_section_header(".text", text_start, text_sz); +} + +#endif /* CONFIG_EFI_STUB */ + +int main(int argc, char ** argv) +{ unsigned int i, sz, setup_sectors; int c; u32 sys_size; @@ -163,6 +257,12 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); +#ifdef CONFIG_EFI_STUB + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + c += PECOFF_RELOC_RESERVE; +#endif + /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; if (setup_sectors < SETUP_SECT_MIN) @@ -170,6 +270,10 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); +#ifdef CONFIG_EFI_STUB + update_pecoff_setup_and_reloc(i); +#endif + /* Set the default root device */ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); @@ -194,66 +298,8 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - file_sz = sz + i + ((sys_size * 16) - sz); - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - - /* - * Subtract the size of the first section (512 bytes) which - * includes the header and .reloc section. The remaining size - * is that of the .text section. - */ - file_sz -= 512; - - /* Size of code */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); - -#ifdef CONFIG_X86_32 - /* - * Address of entry point. - * - * The EFI stub entry point is +16 bytes from the start of - * the .text section. - */ - put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xb4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xbc]); -#else - /* - * Address of entry point. startup_32 is at the beginning and - * the 64-bit entry point (startup_64) is always 512 bytes - * after. The EFI stub entry point is 16 bytes after that, as - * the first instruction allows legacy loaders to jump over - * the EFI stub initialisation - */ - put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xc4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xcc]); -#endif /* CONFIG_X86_32 */ -#endif /* CONFIG_EFI_STUB */ + update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); +#endif crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 0e3793b821ef..dc580c42851c 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -54,6 +54,20 @@ struct nmiaction { __register_nmi_handler((t), &fn##_na); \ }) +/* + * For special handlers that register/unregister in the + * init section only. This should be considered rare. + */ +#define register_nmi_handler_initonly(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na __initdata = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 04cd6882308e..e1f3a17034fc 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,9 +33,8 @@ #define segment_eq(a, b) ((a).seg == (b).seg) #define user_addr_max() (current_thread_info()->addr_limit.seg) -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < \ - (current_thread_info()->addr_limit.seg)) +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < user_addr_max()) /* * Test whether a block of memory is a valid user space address. @@ -47,14 +46,14 @@ * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ -#define __range_not_ok(addr, size) \ +#define __range_not_ok(addr, size, limit) \ ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ : "=&r" (flag), "=r" (roksum) \ : "1" (addr), "g" ((long)(size)), \ - "rm" (current_thread_info()->addr_limit.seg)); \ + "rm" (limit)); \ flag; \ }) @@ -77,7 +76,8 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ + (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) /* * The exception table consists of pairs of addresses relative to the diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index becf47b81735..6149b476d9df 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -149,7 +149,6 @@ /* 4 bits of software ack period */ #define UV2_ACK_MASK 0x7UL #define UV2_ACK_UNITS_SHFT 3 -#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT /* diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e76c191a835..d5fd66f0d4cd 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,7 +20,6 @@ #include <linux/bitops.h> #include <linux/ioport.h> #include <linux/suspend.h> -#include <linux/kmemleak.h> #include <asm/e820.h> #include <asm/io.h> #include <asm/iommu.h> @@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void) return 0; } memblock_reserve(addr, aper_size); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(phys_to_virt(addr)); printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, addr); insert_aperture_resource((u32)addr, aper_size); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561d1a99..5f0ff597437c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + for_each_cpu(cpu, cfg->domain) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; @@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu(cpu, cfg->old_domain) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a97f3c4a3946..da27c5d2168a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); + unsigned long iv = check_interval * HZ; setup_timer(t, mce_timer_fn, smp_processor_id()); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6da0183..c4706cf9c011 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) if (!cpuc->shared_regs) goto error; } + cpuc->is_fake = 1; return cpuc; error: free_fake_cpuc(cpuc); @@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); } +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + return (__range_not_ok(fp, size, TASK_SIZE) == 0); +} + #ifdef CONFIG_COMPAT #include <asm/compat.h> @@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; - if (fp < compat_ptr(regs->sp)) + if (!valid_user_frame(fp, sizeof(frame))) break; perf_callchain_store(entry, frame.return_address); @@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; - if ((unsigned long)fp < regs->sp) + if (!valid_user_frame(fp, sizeof(frame))) break; perf_callchain_store(entry, frame.return_address); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf54493..7241e2fc3c17 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -117,6 +117,7 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ unsigned int group_flag; + int is_fake; /* * Intel DebugStore bits @@ -364,6 +365,7 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546ec6aef..187c294bc658 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +static int intel_alt_er(int idx) { if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; + return idx; - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { + if (idx == EXTRA_REG_RSP_0) + return EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + return EXTRA_REG_RSP_0; + + return idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= 0x01bb; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; } /* @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, struct event_constraint *c = &emptyconstraint; struct er_account *era; unsigned long flags; - int orig_idx = reg->idx; + int idx = reg->idx; - /* already allocated shared msr */ - if (reg->alloc) + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) return NULL; /* call x86_get_event_constraint() */ again: - era = &cpuc->shared_regs->regs[reg->idx]; + era = &cpuc->shared_regs->regs[idx]; /* * we use spin_lock_irqsave() to avoid lockdep issues when * passing a fake cpuc @@ -1173,6 +1183,29 @@ again: if (!atomic_read(&era->ref) || era->config == reg->config) { + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + /* lock in msr value */ era->config = reg->config; era->reg = reg->reg; @@ -1180,17 +1213,17 @@ again: /* one more user */ atomic_inc(&era->ref); - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - /* * need to call x86_get_event_constraint() * to check if associated event has constraints */ c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; + } else { + idx = intel_alt_er(idx); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, struct er_account *era; /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. */ - if (!reg->alloc) + if (!reg->alloc || cpuc->is_fake) return; era = &cpuc->shared_regs->regs[reg->idx]; @@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_shared_regs_event_constraints(cpuc, event); } -static int intel_pmu_hw_config(struct perf_event *event) +static void intel_pebs_aliases_core2(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { /* * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P * (0x003c) so that we can use it with PEBS. @@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) */ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.precise_ip && x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); if (intel_pmu_needs_lbr_smpl(event)) { ret = intel_pmu_setup_lbr_filter(event); @@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, @@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ + x86_add_quirk(intel_sandybridge_quirk); + case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc27f6e5..35e2192df9f4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index e31bf8d5c4d2..149b8d9c6ad4 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); } static void __init cleanup_nmi_testsuite(void) @@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, NMI_FLAG_FIRST, "nmi_selftest")) { nmi_fail = FAILURE; return; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af81604..25b48edb847c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -639,9 +639,11 @@ void native_machine_shutdown(void) set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* - * O.K Now that I'm on the appropriate processor, - * stop all of the others. + * O.K Now that I'm on the appropriate processor, stop all of the + * others. Also disable the local irq to not receive the per-cpu + * timer interrupt which may trigger scheduler's load balance. */ + local_irq_disable(); stop_other_cpus(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd019d78b1f4..3fab55bea29b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_mc && match_llc(c, o))) link_mask(llc_shared, cpu, i); + } + + /* + * This needs a separate iteration over the cpus because we rely on all + * cpu_sibling_mask links to be set-up. + */ + for_each_cpu(i, cpu_sibling_setup_mask) { + o = &cpu_data(i); + if ((i == cpu) || (has_mc && match_mc(c, o))) { link_mask(core, cpu, i); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f61ee67ec00f..677b1ed184c9 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <asm/word-at-a-time.h> +#include <linux/sched.h> /* * best effort, GUP based copy_from_user() that is NMI-safe @@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) void *map; int ret; + if (__range_not_ok(from, n, TASK_SIZE) == 0) + return len; + do { ret = __get_user_pages_fast(addr, 1, 0, &page); if (!ret) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 819137904428..5d7e51f3fd28 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -28,7 +28,7 @@ # - (66): the last prefix is 0x66 # - (F3): the last prefix is 0xF3 # - (F2): the last prefix is 0xF2 -# +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) Table: one byte opcode Referrer: @@ -515,12 +515,12 @@ b4: LFS Gv,Mp b5: LGS Gv,Mp b6: MOVZX Gv,Eb b7: MOVZX Gv,Ew -b8: JMPE | POPCNT Gv,Ev (F3) +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) -bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 97141c26a13a..bc4e9d84157f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en extra += PMD_SIZE; #endif /* The first 2/4M doesn't use large pages. */ - extra += mr->end - mr->start; + if (mr->start < PMD_SIZE) + extra += mr->end - mr->start; ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 732af3a96183..4599c3e8bcb6 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e31bcd8f2eee..fd41a9262d65 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); EXPORT_SYMBOL_GPL(intel_scu_notifier); /* Called by IPC driver */ -void intel_scu_devices_create(void) +void __devinit intel_scu_devices_create(void) { int i; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3ae0e61abd23..59880afa851f 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image &= ~(1L << UV2_LEG_SHFT); mmr_image |= (1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6c3a15..ddcf39b1a18d 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -66,9 +66,10 @@ BEGIN { rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO - lprefix1_expr = "\\(66\\)" + lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\(F2\\)" + lprefix3_expr = "\\((F2|!F3)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 # All opcodes starting with lower-case 'v' or with (v1) superscript @@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, lprefix1_expr)) { lptable1[idx] = add_flags(lptable1[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix2_expr)) { + } + if (match(ext, lprefix2_expr)) { lptable2[idx] = add_flags(lptable2[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix3_expr)) { + } + if (match(ext, lprefix3_expr)) { lptable3[idx] = add_flags(lptable3[idx],flags) variant = "INAT_VARIANT" - } else { + } + if (!match(ext, lprefix_expr)){ table[idx] = add_flags(table[idx],flags) } } |