summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2005-09-09 22:02:02 +0200
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 22:57:31 +0200
commit383f2835eb9afb723af71850037b2f074ac9db60 (patch)
tree1ef99fd4d7246b2afa16dc7d1514b6ff25fa8284
parent[PATCH] fix disassociate_ctty vs. fork race (diff)
downloadlinux-383f2835eb9afb723af71850037b2f074ac9db60.tar.xz
linux-383f2835eb9afb723af71850037b2f074ac9db60.zip
[PATCH] Prefetch kernel stacks to speed up context switch
For architecture like ia64, the switch stack structure is fairly large (currently 528 bytes). For context switch intensive application, we found that significant amount of cache misses occurs in switch_to() function. The following patch adds a hook in the schedule() function to prefetch switch stack structure as soon as 'next' task is determined. This allows maximum overlap in prefetch cache lines for that structure. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "Luck, Tony" <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to '')
-rw-r--r--arch/ia64/kernel/entry.S23
-rw-r--r--include/asm-ia64/system.h1
-rw-r--r--include/linux/sched.h5
-rw-r--r--kernel/sched.c1
4 files changed, 30 insertions, 0 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c8821024509..915e12791836 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
br.cond.sptk.many b7
END(load_switch_stack)
+GLOBAL_ENTRY(prefetch_stack)
+ add r14 = -IA64_SWITCH_STACK_SIZE, sp
+ add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
+ ;;
+ ld8 r16 = [r15] // load next's stack pointer
+ lfetch.fault.excl [r14], 128
+ ;;
+ lfetch.fault.excl [r14], 128
+ lfetch.fault [r16], 128
+ ;;
+ lfetch.fault.excl [r14], 128
+ lfetch.fault [r16], 128
+ ;;
+ lfetch.fault.excl [r14], 128
+ lfetch.fault [r16], 128
+ ;;
+ lfetch.fault.excl [r14], 128
+ lfetch.fault [r16], 128
+ ;;
+ lfetch.fault [r16], 128
+ br.ret.sptk.many rp
+END(prefetch_switch_stack)
+
GLOBAL_ENTRY(execve)
mov r15=__NR_execve // put syscall number in place
break __BREAK_SYSCALL
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 33256db4a7cf..635235fa1e32 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task_struct *task);
*/
#define __ARCH_WANT_UNLOCKED_CTXSW
+#define ARCH_HAS_PREFETCH_SWITCH_STACK
#define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
void cpu_idle_wait(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea1b5f32ec5c..c551e6a1447e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,11 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
#define GROUP_AT(gi, i) \
((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
+#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
+extern void prefetch_stack(struct task_struct*);
+#else
+static inline void prefetch_stack(struct task_struct *t) { }
+#endif
struct audit_context; /* See audit.c */
struct mempolicy;
diff --git a/kernel/sched.c b/kernel/sched.c
index 18b95520a2e2..2632b812cf24 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2888,6 +2888,7 @@ switch_tasks:
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);
prefetch(next);
+ prefetch_stack(next);
clear_tsk_need_resched(prev);
rcu_qsctr_inc(task_cpu(prev));