summaryrefslogtreecommitdiffstats
path: root/kernel/hrtimer.c
diff options
context:
space:
mode:
authorArun R Bharadwaj <arun@linux.vnet.ibm.com>2009-04-16 08:46:41 +0200
committerThomas Gleixner <tglx@linutronix.de>2009-05-13 16:52:42 +0200
commiteea08f32adb3f97553d49a4f79a119833036000a (patch)
tree4e6af5185309d7abe49a8fa19634ea38582381e4 /kernel/hrtimer.c
parenttimers: /proc/sys sysctl hook to enable timer migration (diff)
downloadlinux-eea08f32adb3f97553d49a4f79a119833036000a.tar.xz
linux-eea08f32adb3f97553d49a4f79a119833036000a.zip
timers: Logic to move non pinned timers
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]: This patch migrates all non pinned timers and hrtimers to the current idle load balancer, from all the idle CPUs. Timers firing on busy CPUs are not migrated. While migrating hrtimers, care should be taken to check if migrating a hrtimer would result in a latency or not. So we compare the expiry of the hrtimer with the next timer interrupt on the target cpu and migrate the hrtimer only if it expires *after* the next interrupt on the target cpu. So, added a clockevents_get_next_event() helper function to return the next_event on the target cpu's clock_event_device. [ tglx: cleanups and simplifications ] Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r--kernel/hrtimer.c51
1 files changed, 49 insertions, 2 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd549241..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
+ int cpu, preferred_cpu = -1;
+
+ cpu = smp_processor_id();
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+ preferred_cpu = get_nohz_load_balancer();
+ if (preferred_cpu >= 0)
+ cpu = preferred_cpu;
+ }
+#endif
- new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+ new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
timer->base = NULL;
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
+
+ /* Optimized away for NOHZ=n SMP=n */
+ if (cpu == preferred_cpu) {
+ /* Calculate clock monotonic expiry time */
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+ new_base->offset);
+#else
+ ktime_t expires = hrtimer_get_expires(timer);
+#endif
+
+ /*
+ * Get the next event on target cpu from the
+ * clock events layer.
+ * This covers the highres=off nohz=on case as well.
+ */
+ ktime_t next = clockevents_get_next_event(cpu);
+
+ ktime_t delta = ktime_sub(expires, next);
+
+ /*
+ * We do not migrate the timer when it is expiring
+ * before the next event on the target cpu because
+ * we cannot reprogram the target cpu hardware and
+ * we would cause it to fire late.
+ */
+ if (delta.tv64 < 0) {
+ cpu = smp_processor_id();
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
+ }
+ }
timer->base = new_base;
}
return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
return base;
}
-# define switch_hrtimer_base(t, b) (b)
+# define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */