From faee9a5dc9d8399cc3b1b8e18b6d7ff7b17f1af1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:10 +0200 Subject: [PATCH] i386/x86-64: Use new official CPUID to get APICID/core split on AMD platforms Previously the apicid<->coreid split was computed based on the max number of cores. Now use a new CPUID AMD defined for that. On most systems right now it should be 0 and the old method will be used. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fb850b52b4da..1cb3e21c571a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -873,10 +873,18 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) int node = 0; unsigned apicid = hard_smp_processor_id(); #endif + unsigned ecx = cpuid_ecx(0x80000008); - bits = 0; - while ((1 << bits) < c->x86_max_cores) - bits++; + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } /* Low order bits define the core id (index of core in socket) */ cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); @@ -964,11 +972,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); - if (c->extended_cpuid_level >= 0x80000008) { - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - } return r; } -- cgit v1.2.3 From 240cd6a80642da528bfa382ec2ae4e3cb8991ea7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:56:13 +0200 Subject: [PATCH] i386/x86-64: Emulate CPUID4 on AMD Intel systems report the cache level data from CPUID 4 in sysfs. Add a CPUID 4 emulation for AMD CPUs to report the same information for them. This allows programs to read this information in a uniform way. The AMD way to report this is less flexible so some assumptions are hardcoded (e.g. no L3) Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 2 + arch/i386/kernel/cpu/intel_cacheinfo.c | 113 +++++++++++++++++++++++++++++---- arch/x86_64/kernel/setup.c | 3 + include/asm-i386/processor.h | 1 + include/asm-x86_64/processor.h | 1 + 5 files changed, 107 insertions(+), 13 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index b9c93d9789e7..fd0457c9c827 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -242,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c) } #endif + if (cpuid_eax(0x80000000) >= 0x80000006) + num_cache_leaves = 3; } static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index c8547a6fa7e6..6c37b4fd8ce2 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -4,6 +4,7 @@ * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) * Ashok Raj : Work with CPU hotplug infrastructure. + * Andi Kleen : CPUID4 emulation on AMD. */ #include @@ -130,25 +131,111 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; }; -static unsigned short num_cache_leaves; +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +static unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, + [0xf] = 0xffff // ?? + }; +static unsigned char levels[] = { 1, 1, 2 }; +static unsigned char types[] = { 1, 2, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); + + if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) + return; + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + if (leaf <= 1) { + union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + } else { + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + } + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { - unsigned int eax, ebx, ecx, edx; - union _cpuid4_leaf_eax cache_eax; + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; - cpuid_count(4, index, &eax, &ebx, &ecx, &edx); - cache_eax.full = eax; - if (cache_eax.split.type == CACHE_TYPE_NULL) + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) return -EIO; /* better error ? */ - this_leaf->eax.full = eax; - this_leaf->ebx.full = ebx; - this_leaf->ecx.full = ecx; - this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * - (this_leaf->ebx.split.coherency_line_size + 1) * - (this_leaf->ebx.split.physical_line_partition + 1) * - (this_leaf->ebx.split.ways_of_associativity + 1); + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); return 0; } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 1cb3e21c571a..4b7e02216970 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -976,6 +976,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); + /* Fix cpuid4 emulation for more */ + num_cache_leaves = 3; + return r; } diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0c83cf12eec9..b796210c0f5c 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -112,6 +112,7 @@ extern char ignore_fpu_irq; extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; #ifdef CONFIG_X86_HT extern void detect_ht(struct cpuinfo_x86 *c); diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 3061a38a3b1d..e583f0d95209 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -96,6 +96,7 @@ extern char ignore_irq13; extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern unsigned short num_cache_leaves; /* * EFLAGS bits -- cgit v1.2.3 From d167a51877e94dda73dd656c51f363502309f713 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 26 Jun 2006 13:56:16 +0200 Subject: [PATCH] x86_64: x86_64 version of the smp alternative patch. Changes are largely identical to the i386 version: * alternative #define are moved to the new alternative.h file. * one new elf section with pointers to the lock prefixes which can be nop'ed out for non-smp. * two new elf sections simliar to the "classic" alternatives to replace SMP code with simpler UP code. * fixup headers to use alternative.h instead of defining their own LOCK / LOCK_PREFIX macros. The patch reuses the i386 version of the alternatives code to avoid code duplication. The code in alternatives.c was shuffled around a bit to reduce the number of #ifdefs needed. It also got some tweaks needed for x86_64 (vsyscall page handling) and new features (noreplacement option which was x86_64 only up to now). Debug printk's are changed from compile-time to runtime. Loosely based on a early version from Bastian Blank Signed-off-by: Gerd Hoffmann Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/alternative.c | 118 ++++++++++++++++++++++--------- arch/x86_64/kernel/Makefile | 4 +- arch/x86_64/kernel/module.c | 38 ++++++---- arch/x86_64/kernel/setup.c | 76 +------------------- arch/x86_64/kernel/smpboot.c | 4 ++ arch/x86_64/kernel/vmlinux.lds.S | 20 ++++++ arch/x86_64/mm/init.c | 27 ++++---- include/asm-i386/alternative.h | 2 + include/asm-x86_64/alternative.h | 146 +++++++++++++++++++++++++++++++++++++++ include/asm-x86_64/atomic.h | 42 +++++------ include/asm-x86_64/bitops.h | 7 +- include/asm-x86_64/cpufeature.h | 2 + include/asm-x86_64/mutex.h | 4 +- include/asm-x86_64/rwlock.h | 8 +-- include/asm-x86_64/semaphore.h | 8 +-- include/asm-x86_64/spinlock.h | 10 ++- include/asm-x86_64/system.h | 86 +---------------------- 17 files changed, 344 insertions(+), 258 deletions(-) create mode 100644 include/asm-x86_64/alternative.h (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 5cbd6f99fb2a..50eb0e03777e 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -4,27 +4,41 @@ #include #include -#define DEBUG 0 -#if DEBUG -# define DPRINTK(fmt, args...) printk(fmt, args) -#else -# define DPRINTK(fmt, args...) -#endif +static int no_replacement = 0; +static int smp_alt_once = 0; +static int debug_alternative = 0; + +static int __init noreplacement_setup(char *s) +{ + no_replacement = 1; + return 1; +} +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("noreplacement", noreplacement_setup); +__setup("smp-alt-boot", bootonly); +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) if (debug_alternative) \ + printk(KERN_DEBUG fmt, args) + +#ifdef GENERIC_NOP1 /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ asm("\t.data\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; +extern unsigned char intelnops[]; static unsigned char *intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, @@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { intelnops + 1 + 2 + 3 + 4 + 5 + 6, intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef K8_NOP1 +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +extern unsigned char k8nops[]; static unsigned char *k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, @@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { k8nops + 1 + 2 + 3 + 4 + 5 + 6, k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef K7_NOP1 +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); +extern unsigned char k7nops[]; static unsigned char *k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, @@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { k7nops + 1 + 2 + 3 + 4 + 5 + 6, k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, }; +#endif + +#ifdef CONFIG_X86_64 + +extern char __vsyscall_0; +static inline unsigned char** find_nop_table(void) +{ + return k8_nops; +} + +#else /* CONFIG_X86_64 */ + static struct nop { int cpuid; unsigned char **noptable; @@ -67,14 +107,6 @@ static struct nop { { -1, NULL } }; - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; -extern u8 *__smp_locks[], *__smp_locks_end[]; - -extern u8 __smp_alt_begin[], __smp_alt_end[]; - - static unsigned char** find_nop_table(void) { unsigned char **noptable = intel_nops; @@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void) return noptable; } +#endif /* CONFIG_X86_64 */ + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with self modifying code. This implies that assymetric systems where @@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { unsigned char **noptable = find_nop_table(); struct alt_instr *a; + u8 *instr; int diff, i, k; DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); @@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) BUG_ON(a->replacementlen > a->instrlen); if (!boot_cpu_has(a->cpuid)) continue; - memcpy(a->instr, a->replacement, a->replacementlen); + instr = a->instr; +#ifdef CONFIG_X86_64 + /* vsyscall code is not mapped yet. resolve it manually. */ + if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { + instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); + DPRINTK("%s: vsyscall fixup: %p => %p\n", + __FUNCTION__, a->instr, instr); + } +#endif + memcpy(instr, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; /* Pad the rest with nops */ for (i = a->replacementlen; diff > 0; diff -= k, i += k) { @@ -186,14 +236,6 @@ struct smp_alt_module { static LIST_HEAD(smp_alt_modules); static DEFINE_SPINLOCK(smp_alt); -static int smp_alt_once = 0; -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); - void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) @@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, struct smp_alt_module *smp; unsigned long flags; + if (no_replacement) + return; + if (smp_alt_once) { if (boot_cpu_has(X86_FEATURE_UP)) alternatives_smp_unlock(locks, locks_end, @@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod) struct smp_alt_module *item; unsigned long flags; - if (smp_alt_once) + if (no_replacement || smp_alt_once) return; spin_lock_irqsave(&smp_alt, flags); @@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp) struct smp_alt_module *mod; unsigned long flags; - if (smp_alt_once) + if (no_replacement || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); @@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp) void __init alternative_instructions(void) { + if (no_replacement) { + printk(KERN_INFO "(SMP-)alternatives turned off\n"); + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + return; + } apply_alternatives(__alt_instructions, __alt_instructions_end); /* switch to patch-once-at-boottime-only mode and free the diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 059c88313f4e..381bc6ad743e 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - pci-dma.o pci-nommu.o + pci-dma.o pci-nommu.o alternative.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -49,3 +49,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o +alternative-y += ../../i386/kernel/alternative.o + diff --git a/arch/x86_64/kernel/module.c b/arch/x86_64/kernel/module.c index bac195c74bcc..9d0958ff547f 100644 --- a/arch/x86_64/kernel/module.c +++ b/arch/x86_64/kernel/module.c @@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs, return -ENOSYS; } -extern void apply_alternatives(void *start, void *end); - int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { - const Elf_Shdr *s; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - /* look for .altinstructions to patch */ - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - void *seg; - if (strcmp(".altinstructions", secstrings + s->sh_name)) - continue; - seg = (void *)s->sh_addr; - apply_alternatives(seg, seg + s->sh_size); - } + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".text", secstrings + s->sh_name)) + text = s; + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks= s; + } + + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + if (locks && text) { + void *lseg = (void *)locks->sh_addr; + void *tseg = (void *)text->sh_addr; + alternatives_smp_module_add(me, me->name, + lseg, lseg + locks->sh_size, + tseg, tseg + text->sh_size); + } return 0; } void module_arch_cleanup(struct module *mod) { + alternatives_smp_module_del(mod); } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 4b7e02216970..64640c8f5eed 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -473,80 +473,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) } #endif -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); - -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; - -extern char __vsyscall_0; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - In this case boot with "noreplacement". */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - for (a = start; (void *)a < end; a++) { - u8 *instr; - - if (!boot_cpu_has(a->cpuid)) - continue; - - BUG_ON(a->replacementlen > a->instrlen); - instr = a->instr; - /* vsyscall code is not mapped yet. resolve it manually. */ - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); - __inline_memcpy(instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - __inline_memcpy(instr + i, k8_nops[k], k); - } - } -} - -static int no_replacement __initdata = 0; - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - if (no_replacement) - return; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - -static int __init noreplacement_setup(char *s) -{ - no_replacement = 1; - return 1; -} - -__setup("noreplacement", noreplacement_setup); - #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; #ifdef CONFIG_EDD_MODULE @@ -1303,7 +1229,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, "constant_tsc", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 71a7222cf9ce..06535e7687ce 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -797,6 +797,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) } + alternatives_smp_switch(1); + c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { @@ -1259,6 +1261,8 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index b81f473c4a19..5968c2415da9 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -131,6 +131,26 @@ SECTIONS *(.data.page_aligned) } + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(8); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + . = ALIGN(4096); /* Init code and data */ __init_begin = .; .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 4ba34e95d835..a70a8e3e312d 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -644,20 +644,29 @@ void __init mem_init(void) #endif } -void free_initmem(void) +void free_init_pages(char *what, unsigned long begin, unsigned long end) { unsigned long addr; - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + if (begin >= end) + return; + + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); + for (addr = begin; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); free_page(addr); totalram_pages++; } +} + +void free_initmem(void) +{ memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); - printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); } #ifdef CONFIG_DEBUG_RODATA @@ -686,15 +695,7 @@ void mark_rodata_ro(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start >= end) - return; - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_init_pages("initrd memory", start, end); } #endif diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index d79e9ee10fd7..c61bd1a17f37 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -5,6 +5,8 @@ #include +#include + struct alt_instr { u8 *instr; /* original instruction */ u8 *replacement; diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h new file mode 100644 index 000000000000..387c8f66af7d --- /dev/null +++ b/include/asm-x86_64/alternative.h @@ -0,0 +1,146 @@ +#ifndef _X86_64_ALTERNATIVE_H +#define _X86_64_ALTERNATIVE_H + +#ifdef __KERNEL__ + +#include + +struct alt_instr { + u8 *instr; /* original instruction */ + u8 *replacement; + u8 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction, <= instrlen */ + u8 pad[5]; +}; + +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +struct module; +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_smp_switch(int smp); + +#endif + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .align 8\n" \ + " .quad 661b\n" /* label */ \ + " .quad 663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature) : "memory") + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .align 8\n" \ + " .quad 661b\n" /* label */ \ + " .quad 663f\n" /* new instruction */ \ + " .byte %c0\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" :: "i" (feature), ##input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile ("661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .align 8\n" \ + " .quad 661b\n" /* label */ \ + " .quad 663f\n" /* new instruction */ \ + " .byte %c[feat]\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement,\"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" : output : [feat] "i" (feature), ##input) + +/* + * Alternative inline assembly for SMP. + * + * alternative_smp() takes two versions (SMP first, UP second) and is + * for more complex stuff such as spinlocks. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define alternative_smp(smpinstr, upinstr, args...) \ + asm volatile ("661:\n\t" smpinstr "\n662:\n" \ + ".section .smp_altinstructions,\"a\"\n" \ + " .align 8\n" \ + " .quad 661b\n" /* label */ \ + " .quad 663f\n" /* new instruction */ \ + " .byte 0x66\n" /* X86_FEATURE_UP */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .smp_altinstr_replacement,\"awx\"\n" \ + "663:\n\t" upinstr "\n" /* replacement */ \ + "664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \ + ".previous" : args) + +#define LOCK_PREFIX \ + ".section .smp_locks,\"a\"\n" \ + " .align 8\n" \ + " .quad 661f\n" /* address */ \ + ".previous\n" \ + "661:\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define alternative_smp(smpinstr, upinstr, args...) \ + asm volatile (upinstr : args) +#define LOCK_PREFIX "" +#endif + +#endif /* _X86_64_ALTERNATIVE_H */ diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index bd3fa67ed835..007e88d6d43f 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -1,7 +1,7 @@ #ifndef __ARCH_X86_64_ATOMIC__ #define __ARCH_X86_64_ATOMIC__ -#include +#include /* atomic_t should be 32 bit signed type */ @@ -52,7 +52,7 @@ typedef struct { volatile int counter; } atomic_t; static __inline__ void atomic_add(int i, atomic_t *v) { __asm__ __volatile__( - LOCK "addl %1,%0" + LOCK_PREFIX "addl %1,%0" :"=m" (v->counter) :"ir" (i), "m" (v->counter)); } @@ -67,7 +67,7 @@ static __inline__ void atomic_add(int i, atomic_t *v) static __inline__ void atomic_sub(int i, atomic_t *v) { __asm__ __volatile__( - LOCK "subl %1,%0" + LOCK_PREFIX "subl %1,%0" :"=m" (v->counter) :"ir" (i), "m" (v->counter)); } @@ -86,7 +86,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) unsigned char c; __asm__ __volatile__( - LOCK "subl %2,%0; sete %1" + LOCK_PREFIX "subl %2,%0; sete %1" :"=m" (v->counter), "=qm" (c) :"ir" (i), "m" (v->counter) : "memory"); return c; @@ -101,7 +101,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) static __inline__ void atomic_inc(atomic_t *v) { __asm__ __volatile__( - LOCK "incl %0" + LOCK_PREFIX "incl %0" :"=m" (v->counter) :"m" (v->counter)); } @@ -115,7 +115,7 @@ static __inline__ void atomic_inc(atomic_t *v) static __inline__ void atomic_dec(atomic_t *v) { __asm__ __volatile__( - LOCK "decl %0" + LOCK_PREFIX "decl %0" :"=m" (v->counter) :"m" (v->counter)); } @@ -133,7 +133,7 @@ static __inline__ int atomic_dec_and_test(atomic_t *v) unsigned char c; __asm__ __volatile__( - LOCK "decl %0; sete %1" + LOCK_PREFIX "decl %0; sete %1" :"=m" (v->counter), "=qm" (c) :"m" (v->counter) : "memory"); return c != 0; @@ -152,7 +152,7 @@ static __inline__ int atomic_inc_and_test(atomic_t *v) unsigned char c; __asm__ __volatile__( - LOCK "incl %0; sete %1" + LOCK_PREFIX "incl %0; sete %1" :"=m" (v->counter), "=qm" (c) :"m" (v->counter) : "memory"); return c != 0; @@ -172,7 +172,7 @@ static __inline__ int atomic_add_negative(int i, atomic_t *v) unsigned char c; __asm__ __volatile__( - LOCK "addl %2,%0; sets %1" + LOCK_PREFIX "addl %2,%0; sets %1" :"=m" (v->counter), "=qm" (c) :"ir" (i), "m" (v->counter) : "memory"); return c; @@ -189,7 +189,7 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) { int __i = i; __asm__ __volatile__( - LOCK "xaddl %0, %1;" + LOCK_PREFIX "xaddl %0, %1;" :"=r"(i) :"m"(v->counter), "0"(i)); return i + __i; @@ -237,7 +237,7 @@ typedef struct { volatile long counter; } atomic64_t; static __inline__ void atomic64_add(long i, atomic64_t *v) { __asm__ __volatile__( - LOCK "addq %1,%0" + LOCK_PREFIX "addq %1,%0" :"=m" (v->counter) :"ir" (i), "m" (v->counter)); } @@ -252,7 +252,7 @@ static __inline__ void atomic64_add(long i, atomic64_t *v) static __inline__ void atomic64_sub(long i, atomic64_t *v) { __asm__ __volatile__( - LOCK "subq %1,%0" + LOCK_PREFIX "subq %1,%0" :"=m" (v->counter) :"ir" (i), "m" (v->counter)); } @@ -271,7 +271,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v) unsigned char c; __asm__ __volatile__( - LOCK "subq %2,%0; sete %1" + LOCK_PREFIX "subq %2,%0; sete %1" :"=m" (v->counter), "=qm" (c) :"ir" (i), "m" (v->counter) : "memory"); return c; @@ -286,7 +286,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v) static __inline__ void atomic64_inc(atomic64_t *v) { __asm__ __volatile__( - LOCK "incq %0" + LOCK_PREFIX "incq %0" :"=m" (v->counter) :"m" (v->counter)); } @@ -300,7 +300,7 @@ static __inline__ void atomic64_inc(atomic64_t *v) static __inline__ void atomic64_dec(atomic64_t *v) { __asm__ __volatile__( - LOCK "decq %0" + LOCK_PREFIX "decq %0" :"=m" (v->counter) :"m" (v->counter)); } @@ -318,7 +318,7 @@ static __inline__ int atomic64_dec_and_test(atomic64_t *v) unsigned char c; __asm__ __volatile__( - LOCK "decq %0; sete %1" + LOCK_PREFIX "decq %0; sete %1" :"=m" (v->counter), "=qm" (c) :"m" (v->counter) : "memory"); return c != 0; @@ -337,7 +337,7 @@ static __inline__ int atomic64_inc_and_test(atomic64_t *v) unsigned char c; __asm__ __volatile__( - LOCK "incq %0; sete %1" + LOCK_PREFIX "incq %0; sete %1" :"=m" (v->counter), "=qm" (c) :"m" (v->counter) : "memory"); return c != 0; @@ -357,7 +357,7 @@ static __inline__ int atomic64_add_negative(long i, atomic64_t *v) unsigned char c; __asm__ __volatile__( - LOCK "addq %2,%0; sets %1" + LOCK_PREFIX "addq %2,%0; sets %1" :"=m" (v->counter), "=qm" (c) :"ir" (i), "m" (v->counter) : "memory"); return c; @@ -374,7 +374,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t *v) { long __i = i; __asm__ __volatile__( - LOCK "xaddq %0, %1;" + LOCK_PREFIX "xaddq %0, %1;" :"=r"(i) :"m"(v->counter), "0"(i)); return i + __i; @@ -418,11 +418,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v) /* These are x86-specific, used by some header files */ #define atomic_clear_mask(mask, addr) \ -__asm__ __volatile__(LOCK "andl %0,%1" \ +__asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ : : "r" (~(mask)),"m" (*addr) : "memory") #define atomic_set_mask(mask, addr) \ -__asm__ __volatile__(LOCK "orl %0,%1" \ +__asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \ : : "r" ((unsigned)mask),"m" (*(addr)) : "memory") /* Atomic operations are already serializing on x86 */ diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index e9bf933d25d0..f7ba57b1cc08 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -5,12 +5,7 @@ * Copyright 1992, Linus Torvalds. */ - -#ifdef CONFIG_SMP -#define LOCK_PREFIX "lock ; " -#else -#define LOCK_PREFIX "" -#endif +#include #define ADDR (*(volatile long *) addr) diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index 662964b74e34..afc44e557400 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -65,6 +65,8 @@ #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ #define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ +#define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h index 11fbee2bd6c0..06fab6de2a88 100644 --- a/include/asm-x86_64/mutex.h +++ b/include/asm-x86_64/mutex.h @@ -24,7 +24,7 @@ do { \ typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ - LOCK " decl (%%rdi) \n" \ + LOCK_PREFIX " decl (%%rdi) \n" \ " js 2f \n" \ "1: \n" \ \ @@ -74,7 +74,7 @@ do { \ typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ \ __asm__ __volatile__( \ - LOCK " incl (%%rdi) \n" \ + LOCK_PREFIX " incl (%%rdi) \n" \ " jle 2f \n" \ "1: \n" \ \ diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h index 9942cc393064..dea0e9459264 100644 --- a/include/asm-x86_64/rwlock.h +++ b/include/asm-x86_64/rwlock.h @@ -24,7 +24,7 @@ #define RW_LOCK_BIAS_STR "0x01000000" #define __build_read_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $1,(%0)\n\t" \ + asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \ "js 2f\n" \ "1:\n" \ LOCK_SECTION_START("") \ @@ -34,7 +34,7 @@ ::"a" (rw) : "memory") #define __build_read_lock_const(rw, helper) \ - asm volatile(LOCK "subl $1,%0\n\t" \ + asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \ "js 2f\n" \ "1:\n" \ LOCK_SECTION_START("") \ @@ -54,7 +54,7 @@ } while (0) #define __build_write_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ "jnz 2f\n" \ "1:\n" \ LOCK_SECTION_START("") \ @@ -64,7 +64,7 @@ ::"a" (rw) : "memory") #define __build_write_lock_const(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ + asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ "jnz 2f\n" \ "1:\n" \ LOCK_SECTION_START("") \ diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h index a389aa6fe80f..064df08b9a0f 100644 --- a/include/asm-x86_64/semaphore.h +++ b/include/asm-x86_64/semaphore.h @@ -106,7 +106,7 @@ static inline void down(struct semaphore * sem) __asm__ __volatile__( "# atomic down operation\n\t" - LOCK "decl %0\n\t" /* --sem->count */ + LOCK_PREFIX "decl %0\n\t" /* --sem->count */ "js 2f\n" "1:\n" LOCK_SECTION_START("") @@ -130,7 +130,7 @@ static inline int down_interruptible(struct semaphore * sem) __asm__ __volatile__( "# atomic interruptible down operation\n\t" - LOCK "decl %1\n\t" /* --sem->count */ + LOCK_PREFIX "decl %1\n\t" /* --sem->count */ "js 2f\n\t" "xorl %0,%0\n" "1:\n" @@ -154,7 +154,7 @@ static inline int down_trylock(struct semaphore * sem) __asm__ __volatile__( "# atomic interruptible down operation\n\t" - LOCK "decl %1\n\t" /* --sem->count */ + LOCK_PREFIX "decl %1\n\t" /* --sem->count */ "js 2f\n\t" "xorl %0,%0\n" "1:\n" @@ -178,7 +178,7 @@ static inline void up(struct semaphore * sem) { __asm__ __volatile__( "# atomic up operation\n\t" - LOCK "incl %0\n\t" /* ++sem->count */ + LOCK_PREFIX "incl %0\n\t" /* ++sem->count */ "jle 2f\n" "1:\n" LOCK_SECTION_START("") diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 5d8a5e3589ff..8d3421996f94 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -31,15 +31,19 @@ "jmp 1b\n" \ LOCK_SECTION_END +#define __raw_spin_lock_string_up \ + "\n\tdecl %0" + #define __raw_spin_unlock_string \ "movl $1,%0" \ :"=m" (lock->slock) : : "memory" static inline void __raw_spin_lock(raw_spinlock_t *lock) { - __asm__ __volatile__( - __raw_spin_lock_string - :"=m" (lock->slock) : : "memory"); + alternative_smp( + __raw_spin_lock_string, + __raw_spin_lock_string_up, + "=m" (lock->slock) : : "memory"); } #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index f48e0dad8b3d..68e559f3631c 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h @@ -3,15 +3,10 @@ #include #include +#include #ifdef __KERNEL__ -#ifdef CONFIG_SMP -#define LOCK_PREFIX "lock ; " -#else -#define LOCK_PREFIX "" -#endif - #define __STR(x) #x #define STR(x) __STR(x) @@ -34,7 +29,7 @@ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ + LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ @@ -69,82 +64,6 @@ extern void load_gs_index(unsigned); ".previous" \ : :"r" (value), "r" (0)) -#ifdef __KERNEL__ -struct alt_instr { - __u8 *instr; /* original instruction */ - __u8 *replacement; - __u8 cpuid; /* cpuid bit set for replacement */ - __u8 instrlen; /* length of original instruction */ - __u8 replacementlen; /* length of new instruction, <= instrlen */ - __u8 pad[5]; -}; -#endif - -/* - * Alternative instructions for different CPU types or capabilities. - * - * This allows to use optimized instructions even on generic binary - * kernels. - * - * length of oldinstr must be longer or equal the length of newinstr - * It can be padded with nops as needed. - * - * For non barrier like inlines please define new variants - * without volatile and memory clobber. - */ -#define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature) : "memory") - -/* - * Alternative inline assembly with input. - * - * Peculiarities: - * No memory clobber here. - * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. - */ -#define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature), ##input) - -/* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - " .align 8\n" \ - " .quad 661b\n" /* label */ \ - " .quad 663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) - /* * Clear and set 'TS' bit respectively */ @@ -366,5 +285,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); +extern void free_init_pages(char *what, unsigned long begin, unsigned long end); #endif -- cgit v1.2.3 From 5c0f80fab3724aa44b3352d88155fe0eaae0e54a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:57:04 +0200 Subject: [PATCH] x86_64: Remove long obsolete CVS Early development of x86-64 Linux was in CVS, but that hasn't been the case for a long time now. Remove the obsolete $Id$s. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/fpu32.c | 1 - arch/x86_64/ia32/ia32_signal.c | 2 -- arch/x86_64/ia32/ptrace32.c | 2 -- arch/x86_64/ia32/sys_ia32.c | 8 -------- arch/x86_64/kernel/aperture.c | 1 - arch/x86_64/kernel/e820.c | 1 - arch/x86_64/kernel/head64.c | 2 -- arch/x86_64/kernel/process.c | 1 - arch/x86_64/kernel/setup.c | 2 -- arch/x86_64/kernel/setup64.c | 1 - arch/x86_64/kernel/signal.c | 2 -- arch/x86_64/kernel/traps.c | 2 -- include/asm-x86_64/dma.h | 2 +- include/asm-x86_64/hw_irq.h | 2 -- 14 files changed, 1 insertion(+), 28 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/ia32/fpu32.c b/arch/x86_64/ia32/fpu32.c index 1c23095f1813..2c8209a3605a 100644 --- a/arch/x86_64/ia32/fpu32.c +++ b/arch/x86_64/ia32/fpu32.c @@ -2,7 +2,6 @@ * Copyright 2002 Andi Kleen, SuSE Labs. * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes. * This is used for ptrace, signals and coredumps in 32bit emulation. - * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $ */ #include diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index e0a92439f634..25e5ca22204c 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -6,8 +6,6 @@ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen - * - * $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $ */ #include diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 4defe63f5916..a590b7a0d92d 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -7,8 +7,6 @@ * * This allows to access 64bit processes too; but there is no way to see the extended * register contents. - * - * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $ */ #include diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index ee30557629dc..afbebde9326d 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -929,13 +929,5 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); } -static int __init ia32_init (void) -{ - printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n"); - return 0; -} - -__initcall(ia32_init); - extern unsigned long ia32_sys_call_table[]; EXPORT_SYMBOL(ia32_sys_call_table); diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index a7ad03ee98cf..9ea723bab16c 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -8,7 +8,6 @@ * because only the bootmem allocator can allocate 32+MB. * * Copyright 2002 Andi Kleen, SuSE Labs. - * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $ */ #include #include diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 1ef6028f721e..d8ea250f3925 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -1,7 +1,6 @@ /* * Handle the memory map. * The functions here do the job until bootmem takes over. - * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $ * * Getting sanitize_e820_map() in sync with i386 version by applying change: * - Provisions for empty E820 memory regions (reported by certain BIOSes). diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cea20a66c150..e6a71c9556d9 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -2,8 +2,6 @@ * linux/arch/x86_64/kernel/head64.c -- prepare to run common code * * Copyright (C) 2000 Andrea Arcangeli SuSE - * - * $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $ */ #include diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index fb903e65e079..52c03f6c7731 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -10,7 +10,6 @@ * Andi Kleen. * * CPU hotplug support - ashok.raj@intel.com - * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $ */ /* diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 64640c8f5eed..a91f1d94f9e5 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -5,8 +5,6 @@ * * Nov 2001 Dave Jones * Forked from i386 setup code. - * - * $Id$ */ /* diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 8a691fa6d393..e5bf22a01edb 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -3,7 +3,6 @@ * Copyright (C) 1995 Linus Torvalds * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. * See setup.c for older changelog. - * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $ */ #include #include diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index e5f5ce7909a3..94b4b1128ac8 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -7,8 +7,6 @@ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen - * - * $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $ */ #include diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index cea335e8746c..9f8f1eff4a6c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -6,8 +6,6 @@ * * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 - * - * $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $ */ /* diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h index c556208d3dd7..a37c16f06289 100644 --- a/include/asm-x86_64/dma.h +++ b/include/asm-x86_64/dma.h @@ -1,4 +1,4 @@ -/* $Id: dma.h,v 1.1.1.1 2001/04/19 20:00:38 ak Exp $ +/* * linux/include/asm/dma.h: Defines for using and allocating dma channels. * Written by Hennus Bergman, 1992. * High DMA channel support & info by Hannu Savolainen diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index 3de96fd86a70..1b2ac55d3204 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -12,8 +12,6 @@ * * * hacked by Andi Kleen for x86-64. - * - * $Id: hw_irq.h,v 1.24 2001/09/14 20:55:03 vojtech Exp $ */ #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 357c2b9056df447390b7df3e49960a4c609a89a9 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 26 Jun 2006 13:57:13 +0200 Subject: [PATCH] x86_64: remove unused gart header file include/asm-x86_64/gart-mapping.h is only ever used in arch/x86_64/kernel/setup.c and none of its contents are referenced. Looks to be leftover cruft not removed in the dma_ops patch. Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 1 - include/asm-x86_64/gart-mapping.h | 16 ---------------- 2 files changed, 17 deletions(-) delete mode 100644 include/asm-x86_64/gart-mapping.h (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a91f1d94f9e5..9dadb9a1db2a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,7 +65,6 @@ #include #include #include -#include #include /* diff --git a/include/asm-x86_64/gart-mapping.h b/include/asm-x86_64/gart-mapping.h deleted file mode 100644 index ada497b0b55b..000000000000 --- a/include/asm-x86_64/gart-mapping.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _X8664_GART_MAPPING_H -#define _X8664_GART_MAPPING_H 1 - -#include -#include - -struct device; - -extern void* -gart_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - -extern int -gart_dma_supported(struct device *hwdev, u64 mask); - -#endif /* _X8664_GART_MAPPING_H */ -- cgit v1.2.3 From a813ce432f27c4f5011c7b5ac9d2bbbfeb41d9a7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:57:22 +0200 Subject: [PATCH] x86_64: Rename IOMMU option, fix help and mark option embedded. - Rename the GART_IOMMU option to IOMMU to make clear it's not just for AMD - Rewrite the help text to better emphatise this fact - Make it an embedded option because too many people get it wrong. To my astonishment I discovered the aacraid driver tests this symbol directly. This looks quite broken to me - it's an internal implementation detail of the PCI DMA API. Can the maintainer please clarify what this test was intended to do? Cc: linux-scsi@vger.kernel.org Cc: alan@redhat.com Cc: markh@osdl.org Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 30 ++++++++++++++++-------------- arch/x86_64/Kconfig.debug | 2 +- arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/io_apic.c | 2 +- arch/x86_64/kernel/pci-dma.c | 2 +- arch/x86_64/kernel/setup.c | 2 +- drivers/char/agp/Kconfig | 4 ++-- drivers/char/agp/amd64-agp.c | 4 ++-- drivers/scsi/aacraid/comminit.c | 5 ++++- include/asm-x86_64/pci.h | 2 +- include/asm-x86_64/proto.h | 2 +- 11 files changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index fc75275d8c72..e8c52a1eec06 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -386,24 +386,26 @@ config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y -config GART_IOMMU - bool "K8 GART IOMMU support" +# Mark as embedded because too many people got it wrong. +# The code disables itself when not needed. +config IOMMU + bool "IOMMU support" if EMBEDDED default y select SWIOTLB select AGP depends on PCI help - Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors - and for the bounce buffering software IOMMU. - Needed to run systems with more than 3GB of memory properly with - 32-bit PCI devices that do not support DAC (Double Address Cycle). - The IOMMU can be turned off at runtime with the iommu=off parameter. - Normally the kernel will take the right choice by itself. - This option includes a driver for the AMD Opteron/Athlon64 IOMMU - northbridge and a software emulation used on other systems without - hardware IOMMU. If unsure, say Y. - -# need this always selected by GART_IOMMU for the VIA workaround + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, + sound, many IDE/SATA chipsets and some other devices. + Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART + based IOMMU and a software bounce buffer based IOMMU used on Intel + systems and as fallback. + The code is only active when needed (enough memory and limited + device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified + too. + +# need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -503,7 +505,7 @@ config REORDER config K8_NB def_bool y - depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA) + depends on AGP_AMD64 || IOMMU || (PCI && NUMA) endmenu diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index ea31b4c62105..0226f08be874 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -13,7 +13,7 @@ config DEBUG_RODATA If in doubt, say "N". config IOMMU_DEBUG - depends on GART_IOMMU && DEBUG_KERNEL + depends on IOMMU && DEBUG_KERNEL bool "Enable IOMMU debugging" help Force the IOMMU to on even when you have less than 4GB of diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index f927d11065fe..fd106bdddd6d 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -28,7 +28,7 @@ obj-$(CONFIG_PM) += suspend.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o +obj-$(CONFIG_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 519cd4e6f9e7..c768d8a036d0 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -319,7 +319,7 @@ void __init check_ioapic(void) vendor &= 0xffff; switch (vendor) { case PCI_VENDOR_ID_VIA: -#ifdef CONFIG_GART_IOMMU +#ifdef CONFIG_IOMMU if ((end_pfn > MAX_DMA32_PFN || force_iommu) && !iommu_aperture_allowed) { diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 117bcebe9ebc..33926c367250 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -266,7 +266,7 @@ __init int iommu_setup(char *p) swiotlb = 1; #endif -#ifdef CONFIG_GART_IOMMU +#ifdef CONFIG_IOMMU gart_parse_options(p); #endif diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9dadb9a1db2a..816203d57e11 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -702,7 +702,7 @@ void __init setup_arch(char **cmdline_p) e820_setup_gap(); -#ifdef CONFIG_GART_IOMMU +#ifdef CONFIG_IOMMU iommu_hole_init(); #endif diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index 46685a540772..9826a399fa02 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -55,9 +55,9 @@ config AGP_AMD X on AMD Irongate, 761, and 762 chipsets. config AGP_AMD64 - tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU + tristate "AMD Opteron/Athlon64 on-CPU GART support" if !IOMMU depends on AGP && X86 - default y if GART_IOMMU + default y if IOMMU help This option gives you AGP support for the GLX component of X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 229d015757f9..f690ee8cb732 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -292,7 +292,7 @@ static int __devinit aperture_valid(u64 aper, u32 size) /* * W*s centric BIOS sometimes only set up the aperture in the AGP * bridge, not the northbridge. On AMD64 this is handled early - * in aperture.c, but when GART_IOMMU is not enabled or we run + * in aperture.c, but when IOMMU is not enabled or we run * on a 32bit kernel this needs to be redone. * Unfortunately it is impossible to fix the aperture here because it's too late * to allocate that much memory. But at least error out cleanly instead of @@ -775,7 +775,7 @@ static void __exit agp_amd64_cleanup(void) /* On AMD64 the PCI driver needs to initialize this driver early for the IOMMU, so it has to be called via a backdoor. */ -#ifndef CONFIG_GART_IOMMU +#ifndef CONFIG_IOMMU module_init(agp_amd64_init); module_exit(agp_amd64_cleanup); #endif diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 35b0a6ebd3f5..7cea514e810a 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -104,8 +104,11 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co * always true on real computers. It also has some slight problems * with the GART on x86-64. I've btw never tried DMA from PCI space * on this platform but don't be surprised if its problematic. + * [AK: something is very very wrong when a driver tests this symbol. + * Someone should figure out what the comment writer really meant here and fix + * the code. Or just remove that bad code. ] */ -#ifndef CONFIG_GART_IOMMU +#ifndef CONFIG_IOMMU if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) { init->HostPhysMemPages = cpu_to_le32(num_physpages << (PAGE_SHIFT-12)); diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index 2db0620d5449..3374d34c4acd 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -52,7 +52,7 @@ extern int iommu_setup(char *opt); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#ifdef CONFIG_GART_IOMMU +#ifdef CONFIG_IOMMU /* * x86-64 always supports DAC, but sometimes it is useful to force diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 8abf2a43c944..1064533e0959 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -116,7 +116,7 @@ extern int skip_ioapic_setup; extern int acpi_ht; extern int acpi_disabled; -#ifdef CONFIG_GART_IOMMU +#ifdef CONFIG_IOMMU extern int fallback_aper_order; extern int fallback_aper_force; extern int iommu_aperture; -- cgit v1.2.3 From 2bbc419f9d51b44f3fdeea12c5b786bdace82b8e Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Mon, 26 Jun 2006 13:58:02 +0200 Subject: [PATCH] x86_64: Change assembly to use regular cpuid_count macro Minor cleanup patch: Replacing the asm statement with cpuid_count macro(which already provides the same functionality). Signed-off-by: Rohit Seth Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 816203d57e11..a9de8f02671f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -957,15 +957,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) */ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { - unsigned int eax; + unsigned int eax, t; if (c->cpuid_level < 4) return 1; - __asm__("cpuid" - : "=a" (eax) - : "0" (4), "c" (0) - : "bx", "dx"); + cpuid_count(4, 0, &eax, &t, &t, &t); if (eax & 0x1f) return ((eax >> 26) + 1); -- cgit v1.2.3 From 0dc243ae10c8309c170a3af9f1adad1924a9f217 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 26 Jun 2006 13:58:11 +0200 Subject: [PATCH] x86_64: Calgary IOMMU - IOMMU abstractions This patch creates a new interface for IOMMUs by adding a centralized location for IOMMU allocation (for translation tables/apertures) and IOMMU initialization. In creating these, code was moved around for abstraction, uniformity, and consiceness. Take note of the move of the iommu_setup bootarg parsing code to __setup. This is enabled by moving back the location of the aperture allocation/detection to mem init (which while ugly, was already the location of the swiotlb_init). While a slight departure from the previous patch, I belive this provides the true intention of the previous versions of the patch which changed this code. It also makes the addition of the upcoming calgary code much cleaner than previous patches. [AK: Removed one broken change. iommu_setup still has to be called early] Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-dma.c | 29 +++++++++++++++++++++++++++++ arch/x86_64/kernel/pci-gart.c | 13 ++++++------- arch/x86_64/kernel/setup.c | 5 ----- arch/x86_64/mm/init.c | 7 +------ include/asm-x86_64/pci.h | 2 +- include/asm-x86_64/proto.h | 10 +++++----- 6 files changed, 42 insertions(+), 24 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 7edd1a40fab3..a45844c7e3a3 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -279,3 +279,32 @@ __init int iommu_setup(char *p) } return 1; } +__setup("iommu=", iommu_setup); + +void __init pci_iommu_alloc(void) +{ + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ +#ifdef CONFIG_IOMMU + iommu_hole_init(); +#endif + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + +static int __init pci_iommu_init(void) +{ +#ifdef CONFIG_IOMMU + gart_iommu_init(); +#endif + + no_iommu_init(); + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 4f67957d2b42..9a93954bed37 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -571,7 +571,7 @@ static struct dma_mapping_ops gart_dma_ops = { .unmap_sg = gart_unmap_sg, }; -static int __init pci_iommu_init(void) +void __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long aper_size; @@ -581,7 +581,7 @@ static int __init pci_iommu_init(void) if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); - return -ENODEV; + return; } #ifndef CONFIG_AGP_AMD64 @@ -595,11 +595,11 @@ static int __init pci_iommu_init(void) #endif if (swiotlb) - return -ENODEV; + return; /* Did we detect a different HW IOMMU? */ if (iommu_detected && !iommu_aperture) - return -1; + return; if (no_iommu || (!force_iommu && end_pfn <= MAX_DMA32_PFN) || @@ -611,7 +611,7 @@ static int __init pci_iommu_init(void) "but IOMMU not available.\n" KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } - return -ENODEV; + return; } printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); @@ -678,11 +678,10 @@ static int __init pci_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; - return 0; } /* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); +fs_initcall(gart_iommu_init); void gart_parse_options(char *p) { diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a9de8f02671f..04b2d7b92d17 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -63,7 +63,6 @@ #include #include #include -#include #include #include @@ -702,10 +701,6 @@ void __init setup_arch(char **cmdline_p) e820_setup_gap(); -#ifdef CONFIG_IOMMU - iommu_hole_init(); -#endif - #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b83645a2e02d..2f5f5b11e9d0 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -41,8 +41,6 @@ #include #include #include -#include -#include #ifndef Dprintk #define Dprintk(x...) @@ -587,10 +585,7 @@ void __init mem_init(void) { long codesize, reservedpages, datasize, initsize; -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif - no_iommu_init(); + pci_iommu_alloc(); /* How many end-of-memory variables you have, grandma! */ max_low_pfn = end_pfn; diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index 3374d34c4acd..4dbc07c54f7a 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -39,8 +39,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); #include #include #include -#include /* for have_iommu */ +extern void pci_iommu_alloc(void); extern int iommu_setup(char *opt); /* The PCI address space does equal the physical memory diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 9d3335b4e9b6..038fe1f47e6f 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -37,7 +37,6 @@ extern void ia32_sysenter_target(void); extern void config_acpi_tables(void); extern void ia32_syscall(void); -extern void iommu_hole_init(void); extern int pmtimer_mark_offset(void); extern void pmtimer_resume(void); @@ -101,13 +100,9 @@ extern int unsynchronized_tsc(void); extern void select_idle_routine(const struct cpuinfo_x86 *c); -extern void gart_parse_options(char *); -extern void __init no_iommu_init(void); - extern unsigned long table_start, table_end; extern int exception_trace; -extern int force_iommu, no_iommu; extern int using_apic_timer; extern int disable_apic; extern unsigned cpu_khz; @@ -116,8 +111,13 @@ extern int skip_ioapic_setup; extern int acpi_ht; extern int acpi_disabled; +extern void no_iommu_init(void); +extern int force_iommu, no_iommu; extern int iommu_detected; #ifdef CONFIG_IOMMU +extern void gart_iommu_init(void); +extern void gart_parse_options(char *); +extern void iommu_hole_init(void); extern int fallback_aper_order; extern int fallback_aper_force; extern int iommu_aperture; -- cgit v1.2.3 From f3fa8ebc25129bb69929e20b0c84049c39029d8d Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Mon, 26 Jun 2006 13:58:17 +0200 Subject: [PATCH] x86_64: moving phys_proc_id and cpu_core_id to cpuinfo_x86 Most of the fields of cpuinfo are defined in cpuinfo_x86 structure. This patch moves the phys_proc_id and cpu_core_id for each processor to cpuinfo_x86 structure as well. Signed-off-by: Rohit Seth Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce_amd.c | 10 +++++----- arch/x86_64/kernel/pci-gart.c | 3 --- arch/x86_64/kernel/setup.c | 27 +++++++++++++-------------- arch/x86_64/kernel/smpboot.c | 14 +++++--------- include/asm-x86_64/processor.h | 4 ++++ include/asm-x86_64/smp.h | 2 -- include/asm-x86_64/topology.h | 6 ++---- 7 files changed, 29 insertions(+), 37 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d13b241ad094..86e1e022b20e 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -115,7 +115,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) per_cpu(bank_map, cpu) |= (1 << bank); #ifdef CONFIG_SMP - if (shared_bank[bank] && cpu_core_id[cpu]) + if (shared_bank[bank] && c->cpu_core_id) continue; #endif @@ -323,10 +323,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) struct threshold_bank *b = NULL; #ifdef CONFIG_SMP - if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ + if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ char name[16]; unsigned lcpu = first_cpu(cpu_core_map[cpu]); - if (cpu_core_id[lcpu]) + if (cpu_data[lcpu].cpu_core_id) goto out; /* first core not up yet */ b = per_cpu(threshold_banks, lcpu)[bank]; @@ -434,7 +434,7 @@ static __cpuinit int threshold_create_symlinks(unsigned int cpu) int bank, err = 0; unsigned int lcpu = 0; - if (cpu_core_id[cpu]) + if (cpu_data[cpu].cpu_core_id) return 0; for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { if (lcpu == cpu) @@ -455,7 +455,7 @@ static __cpuinit void threshold_remove_symlinks(unsigned int cpu) { int bank; unsigned int lcpu = 0; - if (cpu_core_id[cpu]) + if (cpu_data[cpu].cpu_core_id) return; for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { if (lcpu == cpu) diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 9a93954bed37..4ca674d16b09 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -680,9 +680,6 @@ void __init gart_iommu_init(void) dma_ops = &gart_dma_ops; } -/* Must execute after PCI subsystem */ -fs_initcall(gart_iommu_init); - void gart_parse_options(char *p) { int arg; diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 04b2d7b92d17..24aa25ee0d7d 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -785,9 +785,9 @@ static int nearby_node(int apicid) static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - int cpu = smp_processor_id(); unsigned bits; #ifdef CONFIG_NUMA + int cpu = smp_processor_id(); int node = 0; unsigned apicid = hard_smp_processor_id(); #endif @@ -805,12 +805,12 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) } /* Low order bits define the core id (index of core in socket) */ - cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); + c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); /* Convert the APIC ID into the socket ID */ - phys_proc_id[cpu] = phys_pkg_id(bits); + c->phys_proc_id = phys_pkg_id(bits); #ifdef CONFIG_NUMA - node = phys_proc_id[cpu]; + node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) node = apicid_to_node[apicid]; if (!node_online(node)) { @@ -823,7 +823,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (phys_proc_id[0] << bits); + int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -834,7 +834,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) numa_set_node(cpu, node); printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", - cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]); + cpu, apicid, c->x86_max_cores, node, c->cpu_core_id); #endif #endif } @@ -905,7 +905,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; int index_msb, core_bits; - int cpu = smp_processor_id(); cpuid(1, &eax, &ebx, &ecx, &edx); @@ -926,10 +925,10 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); - phys_proc_id[cpu] = phys_pkg_id(index_msb); + c->phys_proc_id = phys_pkg_id(index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); + c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -937,12 +936,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id(index_msb) & + c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", - cpu_core_id[cpu]); + c->cpu_core_id); } #endif } @@ -1080,7 +1079,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_SMP - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif } @@ -1288,9 +1287,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { int cpu = c - cpu_data; - seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); - seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 06535e7687ce..b1c10b154bfe 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -63,10 +63,6 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; -/* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; -/* core ID of each logical CPU */ -u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* Last level cache ID of each logical CPU */ u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; @@ -472,8 +468,8 @@ static inline void set_cpu_sibling_map(int cpu) if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (phys_proc_id[cpu] == phys_proc_id[i] && - cpu_core_id[cpu] == cpu_core_id[i]) { + if (c[cpu].phys_proc_id == c[i].phys_proc_id && + c[cpu].cpu_core_id == c[i].cpu_core_id) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); @@ -500,7 +496,7 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(i, c[cpu].llc_shared_map); cpu_set(cpu, c[i].llc_shared_map); } - if (phys_proc_id[cpu] == phys_proc_id[i]) { + if (c[cpu].phys_proc_id == c[i].phys_proc_id) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); /* @@ -1201,8 +1197,8 @@ static void remove_siblinginfo(int cpu) cpu_clear(cpu, cpu_sibling_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); - phys_proc_id[cpu] = BAD_APICID; - cpu_core_id[cpu] = BAD_APICID; + c[cpu].phys_proc_id = 0; + c[cpu].cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index e583f0d95209..3b3c1217fe61 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -69,7 +69,11 @@ struct cpuinfo_x86 { cpumask_t llc_shared_map; /* cpus sharing the last level cache */ #endif __u8 apicid; +#ifdef CONFIG_SMP __u8 booted_cores; /* number of cores as seen by OS */ + __u8 phys_proc_id; /* Physical Processor id. */ + __u8 cpu_core_id; /* Core id. */ +#endif } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 7686b9b25aef..6805e1feb300 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -53,8 +53,6 @@ extern int smp_call_function_single(int cpuid, void (*func) (void *info), extern cpumask_t cpu_sibling_map[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; -extern u8 phys_proc_id[NR_CPUS]; -extern u8 cpu_core_id[NR_CPUS]; extern u8 cpu_llc_id[NR_CPUS]; #define SMP_TRAMPOLINE_BASE 0x6000 diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index d4009f8dade0..c4e46e7fa7ba 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -55,10 +55,8 @@ extern int __node_distance(int, int); #endif #ifdef CONFIG_SMP -#define topology_physical_package_id(cpu) \ - (phys_proc_id[cpu] == BAD_APICID ? -1 : phys_proc_id[cpu]) -#define topology_core_id(cpu) \ - (cpu_core_id[cpu] == BAD_APICID ? 0 : cpu_core_id[cpu]) +#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) +#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (cpu_sibling_map[cpu]) #endif -- cgit v1.2.3 From 7b0e85012581a9a8b12a8191d365716bf3ca3d3c Mon Sep 17 00:00:00 2001 From: Vojtech Pavlik Date: Mon, 26 Jun 2006 13:58:26 +0200 Subject: [PATCH] x86_64: Add X86_FEATURE_RDTSCP, fix rdtscp in /proc/cpuinfo This patch adds the X86_FEATURE_RDTSCP #define, so that kernel code can check for the feature easily and also fixes the location of the "rdtscp" string in the cpuinfo tables. Signed-off-by: Vojtech Pavlik Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 +- include/asm-x86_64/cpufeature.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 24aa25ee0d7d..923b76fb0aa6 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1206,7 +1206,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index afc44e557400..fbf5a19edbb3 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -46,6 +46,7 @@ #define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSR optimizations */ +#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ #define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ #define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */ -- cgit v1.2.3 From e42f94373764d825e2c0464092738f66f5d3effb Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Mon, 26 Jun 2006 13:59:14 +0200 Subject: [PATCH] x86_64: x86_64 setup.c - printing cmp related boottime information Getting phys_proc_id and cpu_core_id information to be printed at boot time for AMD processors. Also matching the Node related boot time information that gets printed for Intel and AMD processors for NUMA configurations. Signed-off-by: Rohit Seth Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 923b76fb0aa6..8ade23d7ae76 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -833,15 +833,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", - cpu, apicid, c->x86_max_cores, node, c->cpu_core_id); + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif #endif } -static int __init init_amd(struct cpuinfo_x86 *c) +static void __init init_amd(struct cpuinfo_x86 *c) { - int r; unsigned level; #ifdef CONFIG_SMP @@ -874,8 +872,8 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); - r = get_model_name(c); - if (!r) { + level = get_model_name(c); + if (!level) { switch (c->x86) { case 15: /* Should distinguish Models here, but this is only @@ -896,8 +894,6 @@ static int __init init_amd(struct cpuinfo_x86 *c) /* Fix cpuid4 emulation for more */ num_cache_leaves = 3; - - return r; } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -909,8 +905,10 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; smp_num_siblings = (ebx & 0xff0000) >> 16; @@ -927,9 +925,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = phys_pkg_id(index_msb); - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - smp_num_siblings = smp_num_siblings / c->x86_max_cores; index_msb = get_count_order(smp_num_siblings) ; @@ -938,11 +933,13 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); } +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); + } + #endif } @@ -969,16 +966,17 @@ static void srat_detect_node(void) #ifdef CONFIG_NUMA unsigned node; int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); /* Don't do the funky fallback heuristics the AMD version employs for now. */ - node = apicid_to_node[hard_smp_processor_id()]; + node = apicid_to_node[apicid]; if (node == NUMA_NO_NODE) node = first_node(node_online_map); numa_set_node(cpu, node); if (acpi_numa > 0) - printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif } -- cgit v1.2.3 From 9c63f8738734eb7e6d3f76ca03186f16ef88edf5 Mon Sep 17 00:00:00 2001 From: Piotr Kaczuba Date: Mon, 26 Jun 2006 13:59:38 +0200 Subject: [PATCH] x86_64: Fix modular pc speaker It turned out that the following change is needed when the speaker is compiled as a module. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 8ade23d7ae76..0d2d4f67aa73 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1361,7 +1361,7 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -#ifdef CONFIG_INPUT_PCSPKR +#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) #include static __init int add_pcspkr(void) { -- cgit v1.2.3 From 2ee60e17896c65da1df5780d3196c050bccb7d10 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 26 Jun 2006 13:59:44 +0200 Subject: [PATCH] x86_64: Move export symbols to their C functions Only exports for assembler files are left in x8664_ksyms.c Originally inspired by a patch from Al Viro Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 1 + arch/x86_64/kernel/process.c | 1 + arch/x86_64/kernel/reboot.c | 1 + arch/x86_64/kernel/setup.c | 2 + arch/x86_64/kernel/setup64.c | 2 + arch/x86_64/kernel/smp.c | 4 ++ arch/x86_64/kernel/smpboot.c | 5 ++ arch/x86_64/kernel/time.c | 2 + arch/x86_64/kernel/traps.c | 2 + arch/x86_64/kernel/x8664_ksyms.c | 114 ++------------------------------------- arch/x86_64/lib/csum-partial.c | 1 + arch/x86_64/lib/csum-wrappers.c | 1 + arch/x86_64/lib/delay.c | 5 ++ arch/x86_64/lib/memmove.c | 2 + arch/x86_64/lib/usercopy.c | 11 +++- arch/x86_64/mm/ioremap.c | 5 ++ 16 files changed, 49 insertions(+), 110 deletions(-) (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index d8ea250f3925..9e94d834624b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -620,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from) } unsigned long pci_mem_start = 0xaeedbabe; +EXPORT_SYMBOL(pci_mem_start); /* * Search for the biggest gap in the low 32 bits of the e820 diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index b596837a1527..ca56e19b8b6e 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -63,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); static ATOMIC_NOTIFIER_HEAD(idle_notifier); diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 57117b8beb2b..2d6769847456 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c @@ -20,6 +20,7 @@ * Power off function, if any */ void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static long no_idt[3]; static enum { diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0d2d4f67aa73..2a5fce0fd1c4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -71,6 +71,7 @@ */ struct cpuinfo_x86 boot_cpu_data __read_mostly; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; @@ -99,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA]; * Setup options */ struct screen_info screen_info; +EXPORT_SYMBOL(screen_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index e5bf22a01edb..f5934cb4a2b6 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -30,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(_cpu_pda); struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; @@ -37,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL(__supported_pte_mask); static int do_not_nx __cpuinitdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 5885b8f044b2..8188bae9c6d5 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -224,6 +224,7 @@ void flush_tlb_current_task(void) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_current_task); void flush_tlb_mm (struct mm_struct * mm) { @@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_mm); void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) { @@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void* info) { @@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, spin_unlock(&call_lock); return 0; } +EXPORT_SYMBOL(smp_call_function); void smp_stop_cpu(void) { diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index b1c10b154bfe..4e9755179ecf 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -63,9 +63,11 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +EXPORT_SYMBOL(cpu_llc_id); /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -78,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map); */ cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +EXPORT_SYMBOL(cpu_callout_map); cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); /* Set when the idlers are all forked */ int smp_threads_ready; /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2125d6c05ff7..ebbee6f59ff5 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -51,6 +51,7 @@ extern int using_apic_timer; static char *time_init_gtod(void); DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); int nohpet __initdata = 0; @@ -64,6 +65,7 @@ static int notsc __initdata = 0; #define US_SCALE 32 /* 2^32, arbitralrily chosen */ unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +EXPORT_SYMBOL(cpu_khz); static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 08f24359bfc1..3d11a2fe45b7 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -70,6 +70,7 @@ asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); ATOMIC_NOTIFIER_HEAD(die_chain); +EXPORT_SYMBOL(die_chain); int register_die_notifier(struct notifier_block *nb) { @@ -431,6 +432,7 @@ void out_of_line_bug(void) { BUG(); } +EXPORT_SYMBOL(out_of_line_bug); #endif static DEFINE_SPINLOCK(die_lock); diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 1def21c9f7cd..370952c4ff22 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -1,66 +1,21 @@ +/* Exports for assembly files. + All C exports should go in the respective C files. */ + #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include - -extern spinlock_t rtc_lock; -#ifdef CONFIG_SMP -extern void __write_lock_failed(rwlock_t *rw); -extern void __read_lock_failed(rwlock_t *rw); -#endif - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -//EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ioremap_nocache); -EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_nocheck); -EXPORT_SYMBOL(ip_compute_csum); -/* Delay loops */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2); EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(copy_from_user); EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(copy_in_user); -EXPORT_SYMBOL(strnlen_user); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(_cpu_pda); #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); - -EXPORT_SYMBOL(smp_call_function); -EXPORT_SYMBOL(cpu_callout_map); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(rtc_lock); - -EXPORT_SYMBOL_GPL(set_nmi_callback); -EXPORT_SYMBOL_GPL(unset_nmi_callback); - /* Export string functions. We normally rely on gcc builtin for most of these, but gcc sometimes decides not to inline them. */ #undef memcpy @@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); #undef memmove extern void * memset(void *,int,__kernel_size_t); -extern size_t strlen(const char *); -extern void * memmove(void * dest,const void *src,size_t count); extern void * memcpy(void *,const void *,__kernel_size_t); extern void * __memcpy(void *,const void *,__kernel_size_t); EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM -/* prototypes are wrong, these are assembly with custom calling functions */ -extern void rwsem_down_read_failed_thunk(void); -extern void rwsem_wake_thunk(void); -extern void rwsem_downgrade_thunk(void); -extern void rwsem_down_write_failed_thunk(void); -EXPORT_SYMBOL(rwsem_down_read_failed_thunk); -EXPORT_SYMBOL(rwsem_wake_thunk); -EXPORT_SYMBOL(rwsem_downgrade_thunk); -EXPORT_SYMBOL(rwsem_down_write_failed_thunk); -#endif - EXPORT_SYMBOL(empty_zero_page); - -EXPORT_SYMBOL(die_chain); - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_sibling_map); -EXPORT_SYMBOL(smp_num_siblings); -#endif - -#ifdef CONFIG_BUG -EXPORT_SYMBOL(out_of_line_bug); -#endif - EXPORT_SYMBOL(init_level4_pgt); - -extern unsigned long __supported_pte_mask; -EXPORT_SYMBOL(__supported_pte_mask); - -#ifdef CONFIG_SMP -EXPORT_SYMBOL(flush_tlb_page); -#endif - -EXPORT_SYMBOL(cpu_khz); - EXPORT_SYMBOL(load_gs_index); diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c index 5384e227cdf6..c493735218da 100644 --- a/arch/x86_64/lib/csum-partial.c +++ b/arch/x86_64/lib/csum-partial.c @@ -147,4 +147,5 @@ unsigned short ip_compute_csum(unsigned char * buff, int len) { return csum_fold(csum_partial(buff,len,0)); } +EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c index 94323f20816e..b1320ec58428 100644 --- a/arch/x86_64/lib/csum-wrappers.c +++ b/arch/x86_64/lib/csum-wrappers.c @@ -109,6 +109,7 @@ csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, { return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); } +EXPORT_SYMBOL(csum_partial_copy_nocheck); unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, __u32 len, unsigned short proto, unsigned int sum) diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 03c460cbdd1c..b6cd3cca2f45 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -36,18 +37,22 @@ void __delay(unsigned long loops) } while((now-bclock) < loops); } +EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); } +EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) { __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ } +EXPORT_SYMBOL(__udelay); void __ndelay(unsigned long nsecs) { __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c index e93d5255fdc9..517eb1bd9de5 100644 --- a/arch/x86_64/lib/memmove.c +++ b/arch/x86_64/lib/memmove.c @@ -3,6 +3,7 @@ */ #define _STRING_C #include +#include #undef memmove void *memmove(void * dest,const void *src,size_t count) @@ -17,3 +18,4 @@ void *memmove(void * dest,const void *src,size_t count) } return dest; } +EXPORT_SYMBOL(memmove); diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c index 9bc2c295818e..a82bfacdb1af 100644 --- a/arch/x86_64/lib/usercopy.c +++ b/arch/x86_64/lib/usercopy.c @@ -5,6 +5,7 @@ * Copyright 1997 Linus Torvalds * Copyright 2002 Andi Kleen */ +#include #include /* @@ -47,6 +48,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } +EXPORT_SYMBOL(__strncpy_from_user); long strncpy_from_user(char *dst, const char __user *src, long count) @@ -56,6 +58,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } +EXPORT_SYMBOL(strncpy_from_user); /* * Zero Userspace @@ -94,7 +97,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) [zero] "r" (0UL), [eight] "r" (8UL)); return size; } - +EXPORT_SYMBOL(__clear_user); unsigned long clear_user(void __user *to, unsigned long n) { @@ -102,6 +105,7 @@ unsigned long clear_user(void __user *to, unsigned long n) return __clear_user(to, n); return n; } +EXPORT_SYMBOL(clear_user); /* * Return the size of a string (including the ending 0) @@ -125,6 +129,7 @@ long __strnlen_user(const char __user *s, long n) s++; } } +EXPORT_SYMBOL(__strnlen_user); long strnlen_user(const char __user *s, long n) { @@ -132,6 +137,7 @@ long strnlen_user(const char __user *s, long n) return 0; return __strnlen_user(s, n); } +EXPORT_SYMBOL(strnlen_user); long strlen_user(const char __user *s) { @@ -147,6 +153,7 @@ long strlen_user(const char __user *s) s++; } } +EXPORT_SYMBOL(strlen_user); unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) { @@ -155,3 +162,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le } return len; } +EXPORT_SYMBOL(copy_in_user); + diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index ae207064201e..45d7d823c3b8 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -219,6 +220,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } return (__force void __iomem *) (offset + (char *)addr); } +EXPORT_SYMBOL(__ioremap); /** * ioremap_nocache - map bus memory into CPU space @@ -246,6 +248,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { return __ioremap(phys_addr, size, _PAGE_PCD); } +EXPORT_SYMBOL(ioremap_nocache); /** * iounmap - Free a IO remapping @@ -291,3 +294,5 @@ void iounmap(volatile void __iomem *addr) BUG_ON(p != o || o == NULL); kfree(p); } +EXPORT_SYMBOL(iounmap); + -- cgit v1.2.3 From 0080e667550db5ae8c9318181500c413b99ff164 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 26 Jun 2006 13:59:59 +0200 Subject: [PATCH] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs Intel now has support for Architectural Performance Monitoring Counters ( Refer to IA-32 Intel Architecture Software Developer's Manual http://www.intel.com/design/pentium4/manuals/253669.htm ). This feature is present starting from Intel Core Duo and Intel Core Solo processors. What this means is, the performance monitoring counters and some performance monitoring events are now defined in an architectural way (using cpuid). And there will be no need to check for family/model etc for these architectural events. Below is the patch to use this performance counters in nmi watchdog driver. Patch handles both i386 and x86-64 kernels. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 6 +++ arch/i386/kernel/nmi.c | 65 +++++++++++++++++++++++++- arch/x86_64/kernel/nmi.c | 81 +++++++++++++++++++++++++++++++-- arch/x86_64/kernel/setup.c | 7 +++ include/asm-i386/cpufeature.h | 1 + include/asm-i386/intel_arch_perfmon.h | 19 ++++++++ include/asm-x86_64/cpufeature.h | 2 +- include/asm-x86_64/intel_arch_perfmon.h | 19 ++++++++ 8 files changed, 193 insertions(+), 7 deletions(-) create mode 100644 include/asm-i386/intel_arch_perfmon.h create mode 100644 include/asm-x86_64/intel_arch_perfmon.h (limited to 'arch/x86_64/kernel/setup.c') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5386b29bb5a5..10afc645c540 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) select_idle_routine(c); l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index bd3875419630..a76e93146585 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -24,6 +24,7 @@ #include #include +#include #include "mach_traps.h" @@ -95,6 +96,9 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -207,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); +static void disable_intel_arch_watchdog(void); + static void disable_lapic_nmi_watchdog(void) { if (nmi_active <= 0) @@ -216,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_K7_EVNTSEL0, 0, 0); break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + disable_intel_arch_watchdog(); + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -444,6 +454,53 @@ static int setup_p4_watchdog(void) return 1; } +static void disable_intel_arch_watchdog(void) +{ + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); +} + +static int setup_intel_arch_watchdog(void) +{ + unsigned int evntsel; + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + + clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); + clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + write_watchdog_counter("INTEL_ARCH_PERFCTR0"); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + return 1; +} + void setup_apic_nmi_watchdog (void) { switch (boot_cpu_data.x86_vendor) { @@ -453,6 +510,11 @@ void setup_apic_nmi_watchdog (void) setup_k7_watchdog(); break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + if (!setup_intel_arch_watchdog()) + return; + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -556,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } - else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { + else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || + nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { /* Only P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt * other P6 variant */ diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index ab421e22fa67..399489c93132 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -66,6 +67,9 @@ static unsigned int nmi_p4_cccr_val; #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + #define MSR_P4_MISC_ENABLE 0x1A0 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) @@ -97,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void) case X86_VENDOR_AMD: return boot_cpu_data.x86 == 15; case X86_VENDOR_INTEL: - return boot_cpu_data.x86 == 15; + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return 1; + else + return (boot_cpu_data.x86 == 15); } return 0; } @@ -203,6 +210,8 @@ int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); +static void disable_intel_arch_watchdog(void); + static void disable_lapic_nmi_watchdog(void) { if (nmi_active <= 0) @@ -215,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void) if (boot_cpu_data.x86 == 15) { wrmsr(MSR_P4_IQ_CCCR0, 0, 0); wrmsr(MSR_P4_CRU_ESCR0, 0, 0); + } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + disable_intel_arch_watchdog(); } break; } @@ -367,6 +378,53 @@ static void setup_k7_watchdog(void) wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); } +static void disable_intel_arch_watchdog(void) +{ + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); +} + +static int setup_intel_arch_watchdog(void) +{ + unsigned int evntsel; + unsigned ebx; + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebp indicates event present. + */ + ebx = cpuid_ebx(10); + if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + + clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); + clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); + return 1; +} + static int setup_p4_watchdog(void) { @@ -420,10 +478,16 @@ void setup_apic_nmi_watchdog(void) setup_k7_watchdog(); break; case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 != 15) - return; - if (!setup_p4_watchdog()) + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + if (!setup_intel_arch_watchdog()) + return; + } else if (boot_cpu_data.x86 == 15) { + if (!setup_p4_watchdog()) + return; + } else { return; + } + break; default: @@ -508,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) */ wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); - } + } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { + /* + * For Intel based architectural perfmon + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + } wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); } } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 2a5fce0fd1c4..594642a6c037 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -988,6 +988,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned n; init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 3ecedbafa8ce..d314ebb3d59e 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -72,6 +72,7 @@ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-i386/intel_arch_perfmon.h b/include/asm-i386/intel_arch_perfmon.h new file mode 100644 index 000000000000..134ea9cc5283 --- /dev/null +++ b/include/asm-i386/intel_arch_perfmon.h @@ -0,0 +1,19 @@ +#ifndef X86_INTEL_ARCH_PERFMON_H +#define X86_INTEL_ARCH_PERFMON_H 1 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0) + +#endif /* X86_INTEL_ARCH_PERFMON_H */ diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index fbf5a19edbb3..ee792faaca01 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -67,7 +67,7 @@ #define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ - +#define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86_64/intel_arch_perfmon.h b/include/asm-x86_64/intel_arch_perfmon.h new file mode 100644 index 000000000000..59c396431569 --- /dev/null +++ b/include/asm-x86_64/intel_arch_perfmon.h @@ -0,0 +1,19 @@ +#ifndef X86_64_INTEL_ARCH_PERFMON_H +#define X86_64_INTEL_ARCH_PERFMON_H 1 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0) + +#endif /* X86_64_INTEL_ARCH_PERFMON_H */ -- cgit v1.2.3