summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-08-03 07:12:19 +0200
committerTejun Heo <tj@kernel.org>2009-08-03 18:29:34 +0200
commitbdf977b37418cdf8a2252504779a7e12a09b7575 (patch)
tree0f3896a5b39c35f3018d3cb16f797ae7d6c4a66b
parentx86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() (diff)
downloadlinux-bdf977b37418cdf8a2252504779a7e12a09b7575.tar.xz
linux-bdf977b37418cdf8a2252504779a7e12a09b7575.zip
x86, percpu: Collect hot percpu variables into one cacheline
On x86_64, percpu variables current_task and kernel_stack are used for get_current() and current_thread_info() respectively and thus are often used close to each other. Move definition of current_task to kernel/cpu/common.c right above kernel_stack definition and align it to cacheline so that they always fall into the same cacheline. Two percpu variables defined there together - irq_stack_ptr and irq_count - are also pretty hot and will benefit from sharing the cacheline. For consistency, current_task definition for x86_32 is also moved to kernel/cpu/common.c. Putting current_task and kernel_stack into the same cacheline was suggested by Linus Torvalds. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/kernel/cpu/common.c15
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
3 files changed, 13 insertions, 8 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12493c5485e5..1bd88ed978bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot. Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+ &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
/*
@@ -1041,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else /* CONFIG_X86_64 */
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..daa4107be3b4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..c4c675d5ba1a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@
asmlinkage extern void ret_from_fork(void);
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);