From 9138d581b0ef855c0314c41c14852a7231b9941c Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Mon, 7 Nov 2005 11:27:13 -0800 Subject: [IA64] Extend notify_die() hooks for IA64 notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple notification points for these events because they can take many seconds to run which has nasty effects on the behaviour of the rest of the system. DIE_SS replaced by a generic DIE_FAULT which checks the vector number, to allow interception of faults other than SS. DIE_MACHINE_{HALT,RESTART} added to allow last minute close down processing, especially when the halt/restart routines are called from error handlers. DIE_OOPS added. The check for kprobe's break numbers has been moved from traps.c to kprobes.c, allowing DIE_BREAK to be used for any additional break numbers, i.e. it is no longer kprobes specific. Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE. Both of these disable the system for long periods which impact on watchdogs and heartbeat systems in general. More patches to come that use these events to reset watchdogs and heartbeats. unregister_die_notifier() added and both routines exported. Requested by Dean Nelson. Lock removed from {un,}register_die_notifier. notifier_chain_register() already takes a lock. Also the generic notifier chain locking is being reworked to distinguish between callbacks that can block and those that cannot, the lock in {un,}register_die_notifier would interfere with that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 Leading white space removed from arch/ia64/kernel/kprobes.c. Typo in mca.c in original version of this patch found & fixed by Dean Nelson. Signed-off-by: Keith Owens Acked-by: Dean Nelson Acked-by: Anil Keshavamurthy Signed-off-by: Tony Luck --- arch/ia64/kernel/process.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/ia64/kernel/process.c') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050359e4..c78355cb8902 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -4,6 +4,9 @@ * Copyright (C) 1998-2003 Hewlett-Packard Co * David Mosberger-Tang * 04/11/17 Ashok Raj Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. */ #define __KERNEL_SYSCALLS__ /* see */ #include @@ -34,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -804,12 +808,14 @@ cpu_halt (void) void machine_restart (char *restart_cmd) { + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); } void machine_halt (void) { + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); cpu_halt(); } -- cgit v1.2.3 From 5bfb5d690f36d316a5f3b4f7775fda996faa6b12 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:01 -0800 Subject: [PATCH] sched: disable preempt in idle tasks Run idle threads with preempt disabled. Also corrected a bugs in arm26's cpu_idle (make it actually call schedule()). How did it ever work before? Might fix the CPU hotplugging hang which Nigel Cunningham noted. We think the bug hits if the idle thread is preempted after checking need_resched() and before going to sleep, then the CPU offlined. After calling stop_machine_run, the CPU eventually returns from preemption and into the idle thread and goes to sleep. The CPU will continue executing previous idle and have no chance to call play_dead. By disabling preemption until we are ready to explicitly schedule, this bug is fixed and the idle threads generally become more robust. From: alexs PPC build fix From: Yoichi Yuasa MIPS build fix Signed-off-by: Nick Piggin Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 4 ++-- arch/arm/kernel/smp.c | 5 ++++- arch/arm26/kernel/process.c | 12 +++++------- arch/cris/arch-v32/kernel/smp.c | 1 + arch/cris/kernel/process.c | 2 ++ arch/frv/kernel/process.c | 6 +++++- arch/h8300/kernel/process.c | 28 +++++++++++++++------------- arch/i386/kernel/process.c | 4 +++- arch/i386/kernel/smpboot.c | 1 + arch/ia64/kernel/process.c | 2 ++ arch/ia64/kernel/smpboot.c | 1 + arch/m32r/kernel/process.c | 2 ++ arch/m32r/kernel/smpboot.c | 1 + arch/m68k/kernel/process.c | 2 ++ arch/mips/kernel/process.c | 2 ++ arch/mips/kernel/smp.c | 4 +++- arch/parisc/kernel/process.c | 2 ++ arch/parisc/kernel/smp.c | 1 + arch/powerpc/platforms/iseries/setup.c | 4 ++++ arch/powerpc/platforms/pseries/setup.c | 4 ++++ arch/ppc/kernel/idle.c | 17 ++++++++++++----- arch/ppc/kernel/smp.c | 1 + arch/ppc64/kernel/idle.c | 4 ++++ arch/s390/kernel/process.c | 11 ++++++++--- arch/s390/kernel/smp.c | 1 + arch/sh/kernel/process.c | 2 ++ arch/sh/kernel/smp.c | 5 ++++- arch/sh64/kernel/process.c | 2 ++ arch/sparc/kernel/process.c | 4 ++++ arch/sparc64/kernel/process.c | 4 ++++ arch/sparc64/kernel/smp.c | 3 +++ arch/v850/kernel/process.c | 16 ++++++++++------ arch/x86_64/kernel/process.c | 2 ++ arch/x86_64/kernel/smpboot.c | 1 + arch/xtensa/kernel/process.c | 3 ++- init/main.c | 4 +++- 36 files changed, 125 insertions(+), 43 deletions(-) (limited to 'arch/ia64/kernel/process.c') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index ba298277becd..93dd92cc12f8 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -116,13 +116,13 @@ void cpu_idle(void) if (!idle) idle = default_idle; - preempt_disable(); leds_event(led_idle_start); while (!need_resched()) idle(); leds_event(led_idle_end); - preempt_enable(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 77e2e9ca89fa..e55ea952f7aa 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -256,7 +256,9 @@ void __cpuexit cpu_die(void) asmlinkage void __cpuinit secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = smp_processor_id(); printk("CPU%u: Booted secondary processor\n", cpu); @@ -273,6 +275,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) local_flush_tlb_all(); cpu_init(); + preempt_disable(); /* * Give the platform a chance to do its own initialisation. diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c index 9eb9964d32a7..15833a0057dd 100644 --- a/arch/arm26/kernel/process.c +++ b/arch/arm26/kernel/process.c @@ -74,15 +74,13 @@ __setup("hlt", hlt_setup); void cpu_idle(void) { /* endless idle loop with no priority at all */ - preempt_disable(); while (1) { - while (!need_resched()) { - local_irq_disable(); - if (!need_resched() && !hlt_counter) - local_irq_enable(); - } + while (!need_resched()) + cpu_relax(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); } - schedule(); } static char reboot_mode = 'h'; diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 957f551ba5ce..13867f4fad16 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -161,6 +161,7 @@ void __init smp_callin(void) REG_WR(intr_vect, irq_regs[cpu], rw_mask, vect_mask); unmask_irq(IPI_INTR_VECT); unmask_irq(TIMER_INTR_VECT); + preempt_disable(); local_irq_enable(); cpu_set(cpu, cpu_online_map); diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 949a0e40e03c..7c80afb10460 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -218,7 +218,9 @@ void cpu_idle (void) idle = default_idle; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 3001b82b1514..54a452136f00 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -77,16 +77,20 @@ void (*idle)(void) = core_sleep_idle; */ void cpu_idle(void) { + int cpu = smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { - irq_stat[smp_processor_id()].idle_timestamp = jiffies; + irq_stat[cpu].idle_timestamp = jiffies; if (!frv_dma_inprogress && idle) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 27f1fce64ce4..fe21adf3e75e 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -53,22 +53,18 @@ asmlinkage void ret_from_fork(void); #if !defined(CONFIG_H8300H_SIM) && !defined(CONFIG_H8S_SIM) void default_idle(void) { - while(1) { - if (!need_resched()) { - local_irq_enable(); - __asm__("sleep"); - local_irq_disable(); - } - schedule(); - } + local_irq_disable(); + if (!need_resched()) { + local_irq_enable(); + /* XXX: race here! What if need_resched() gets set now? */ + __asm__("sleep"); + } else + local_irq_enable(); } #else void default_idle(void) { - while(1) { - if (need_resched()) - schedule(); - } + cpu_relax(); } #endif void (*idle)(void) = default_idle; @@ -81,7 +77,13 @@ void (*idle)(void) = default_idle; */ void cpu_idle(void) { - idle(); + while (1) { + while (!need_resched()) + idle(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } void machine_restart(char * __unused) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 7a14fdfd3af9..5296e284ea36 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -179,7 +179,7 @@ static inline void play_dead(void) */ void cpu_idle(void) { - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -201,7 +201,9 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 47ec76794d02..bc5a9d97466b 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -485,6 +485,7 @@ static void __devinit start_secondary(void *unused) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050359e4..4c621fc3c3b9 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -292,7 +292,9 @@ cpu_idle (void) #ifdef CONFIG_SMP normal_xtp(); #endif + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); if (cpu_is_offline(smp_processor_id())) play_dead(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 400a48987124..8f44e7d2df66 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -399,6 +399,7 @@ start_secondary (void *unused) Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); cpu_init(); + preempt_disable(); smp_callin(); cpu_idle(); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index ea13a8f4d8b0..cc4b571e5db7 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -104,7 +104,9 @@ void cpu_idle (void) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 640d592ea072..b90c54169fa5 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -426,6 +426,7 @@ void __init smp_cpus_done(unsigned int max_cpus) int __init start_secondary(void *unused) { cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) cpu_relax(); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 11b1b90ba6ba..13d109328a42 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -102,7 +102,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) idle(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 4fe3d5715c41..dd725779d91f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -52,7 +52,9 @@ ATTRIB_NORET void cpu_idle(void) while (!need_resched()) if (cpu_wait) (*cpu_wait)(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index fcacf1aae98a..25472fcaf715 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -82,7 +82,7 @@ extern ATTRIB_NORET void cpu_idle(void); */ asmlinkage void start_secondary(void) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; cpu_probe(); cpu_report(); @@ -95,6 +95,8 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); + preempt_disable(); + cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; prom_smp_finish(); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 7fdca87ef647..f482f78de435 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -92,7 +92,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) barrier(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 5db3be4e2704..a9ecf6465784 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -463,6 +463,7 @@ void __init smp_callin(void) #endif smp_cpu_init(slave_id); + preempt_disable(); #if 0 /* NOT WORKING YET - see entry.S */ istack = (void *)__get_free_pages(GFP_KERNEL,ISTACK_ORDER); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index d3e4bf756c83..0130f2619dac 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -694,7 +694,9 @@ static void iseries_shared_idle(void) if (hvlpevent_is_pending()) process_iSeries_events(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } @@ -726,7 +728,9 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e78c39368841..4854f5eb5c3d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -539,7 +539,9 @@ static void pseries_dedicated_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -583,7 +585,9 @@ static void pseries_shared_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); diff --git a/arch/ppc/kernel/idle.c b/arch/ppc/kernel/idle.c index 11e5b44713f7..a6141f05c919 100644 --- a/arch/ppc/kernel/idle.c +++ b/arch/ppc/kernel/idle.c @@ -53,10 +53,6 @@ void default_idle(void) } #endif } - if (need_resched()) - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); } /* @@ -64,11 +60,22 @@ void default_idle(void) */ void cpu_idle(void) { - for (;;) + int cpu = smp_processor_id(); + + for (;;) { if (ppc_md.idle != NULL) ppc_md.idle(); else default_idle(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + if (need_resched()) { + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } + + } } #if defined(CONFIG_SYSCTL) && defined(CONFIG_6xx) diff --git a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c index bc5bf1124836..43b8fc2ca591 100644 --- a/arch/ppc/kernel/smp.c +++ b/arch/ppc/kernel/smp.c @@ -341,6 +341,7 @@ int __devinit start_secondary(void *unused) cpu = smp_processor_id(); smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); + preempt_disable(); cpu_callin_map[cpu] = 1; printk("CPU %d done callin...\n", cpu); diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 8fec27469802..909ea669af91 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -61,7 +61,9 @@ void default_idle(void) } ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -77,7 +79,9 @@ void native_idle(void) if (need_resched()) { ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } if (cpu_is_offline(smp_processor_id()) && diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9f3dff6c0b72..66ca5757e368 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -102,7 +102,6 @@ void default_idle(void) local_irq_disable(); if (need_resched()) { local_irq_enable(); - schedule(); return; } @@ -139,8 +138,14 @@ void default_idle(void) void cpu_idle(void) { - for (;;) - default_idle(); + for (;;) { + while (!need_resched()) + default_idle(); + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } void show_regs(struct pt_regs *regs) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e13c87b446b2..5856b3fda6bf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -533,6 +533,7 @@ int __devinit start_secondary(void *cpuvoid) { /* Setup the cpu */ cpu_init(); + preempt_disable(); /* init per CPU timer */ init_cpu_timer(); #ifdef CONFIG_VIRT_TIMER diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 6dce9d0b81f8..1cbc26b796ad 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -64,7 +64,9 @@ void default_idle(void) cpu_sleep(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 5ecefc02896a..59e49b18252c 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -112,7 +112,9 @@ int __cpu_up(unsigned int cpu) int start_secondary(void *unused) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = smp_processor_id(); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; @@ -120,6 +122,7 @@ int start_secondary(void *unused) smp_store_cpu_info(cpu); __smp_slave_init(cpu); + preempt_disable(); per_cpu_trap_init(); atomic_inc(&cpus_booted); diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c index efde41c0cd66..0c09537449b3 100644 --- a/arch/sh64/kernel/process.c +++ b/arch/sh64/kernel/process.c @@ -334,7 +334,9 @@ void default_idle(void) } local_irq_enable(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 29e72b57d4fd..c39f4d01096d 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -120,7 +120,9 @@ void cpu_idle(void) (*pm_idle)(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } @@ -133,7 +135,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while(1) { if(need_resched()) { + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } barrier(); /* or else gcc optimizes... */ diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 7d10b0397091..2f89206e008f 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -74,7 +74,9 @@ void cpu_idle(void) while (!need_resched()) barrier(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); } } @@ -93,7 +95,9 @@ void cpu_idle(void) if (need_resched()) { unidle_me(); clear_thread_flag(TIF_POLLING_NRFLAG); + preempt_enable_no_resched(); schedule(); + preempt_disable(); set_thread_flag(TIF_POLLING_NRFLAG); check_pgt_cache(); } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 5d90ee9aebf1..8aca4b1dc04e 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -168,6 +168,9 @@ void __init smp_callin(void) rmb(); cpu_set(cpuid, cpu_online_map); + + /* idle thread is expected to have preempt disabled */ + preempt_disable(); } void cpu_panic(void) diff --git a/arch/v850/kernel/process.c b/arch/v850/kernel/process.c index 9c708c32c1f0..39cf247cdae4 100644 --- a/arch/v850/kernel/process.c +++ b/arch/v850/kernel/process.c @@ -36,11 +36,8 @@ extern void ret_from_fork (void); /* The idle loop. */ void default_idle (void) { - while (1) { - while (! need_resched ()) - asm ("halt; nop; nop; nop; nop; nop" ::: "cc"); - schedule (); - } + while (! need_resched ()) + asm ("halt; nop; nop; nop; nop; nop" ::: "cc"); } void (*idle)(void) = default_idle; @@ -54,7 +51,14 @@ void (*idle)(void) = default_idle; void cpu_idle (void) { /* endless idle loop with no priority at all */ - (*idle) (); + while (1) { + while (!need_resched()) + (*idle) (); + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + } } /* diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index b5a89c0bdf59..571f9fe490ce 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -204,7 +204,9 @@ void cpu_idle (void) idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 4b5b088ec102..c4e59bbdc187 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -472,6 +472,7 @@ void __cpuinit start_secondary(void) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); /* otherwise gcc will move up the smp_processor_id before the cpu_init */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 08ef6d82ee51..6a44b54ae817 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -96,8 +96,9 @@ void cpu_idle(void) while (1) { while (!need_resched()) platform_idle(); - preempt_enable(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/init/main.c b/init/main.c index f142d4035341..27f97f9b4636 100644 --- a/init/main.c +++ b/init/main.c @@ -394,14 +394,16 @@ static void noinline rest_init(void) kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); unlock_kernel(); - preempt_enable_no_resched(); /* * The boot idle thread must execute schedule() * at least one to get things moving: */ + preempt_enable_no_resched(); schedule(); + preempt_disable(); + /* Call into cpu_idle with preempt disabled */ cpu_idle(); } -- cgit v1.2.3 From 64c7c8f88559624abdbe12b5da6502e8879f8d28 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:04 -0800 Subject: [PATCH] sched: resched and cpu_idle rework Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce confusion, and make their semantics rigid. Improves efficiency of resched_task and some cpu_idle routines. * In resched_task: - TIF_NEED_RESCHED is only cleared with the task's runqueue lock held, and as we hold it during resched_task, then there is no need for an atomic test and set there. The only other time this should be set is when the task's quantum expires, in the timer interrupt - this is protected against because the rq lock is irq-safe. - If TIF_NEED_RESCHED is set, then we don't need to do anything. It won't get unset until the task get's schedule()d off. - If we are running on the same CPU as the task we resched, then set TIF_NEED_RESCHED and no further action is required. - If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set after TIF_NEED_RESCHED has been set, then we need to send an IPI. Using these rules, we are able to remove the test and set operation in resched_task, and make clear the previously vague semantics of POLLING_NRFLAG. * In idle routines: - Enter cpu_idle with preempt disabled. When the need_resched() condition becomes true, explicitly call schedule(). This makes things a bit clearer (IMO), but haven't updated all architectures yet. - Many do a test and clear of TIF_NEED_RESCHED for some reason. According to the resched_task rules, this isn't needed (and actually breaks the assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock held). So remove that. Generally one less locked memory op when switching to the idle thread. - Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner most polling idle loops. The above resched_task semantics allow it to be set until before the last time need_resched() is checked before going into a halt requiring interrupt wakeup. Many idle routines simply never enter such a halt, and so POLLING_NRFLAG can be always left set, completely eliminating resched IPIs when rescheduling the idle task. POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs. Signed-off-by: Nick Piggin Cc: Ingo Molnar Cc: Con Kolivas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sched-arch.txt | 89 ++++++++++++++++++++++++++++++++++ arch/alpha/kernel/process.c | 10 ++-- arch/arm/kernel/process.c | 14 ++++-- arch/i386/kernel/apm.c | 20 +++++++- arch/i386/kernel/process.c | 64 +++++++++++------------- arch/ia64/kernel/process.c | 32 ++++++------ arch/parisc/kernel/process.c | 2 + arch/powerpc/platforms/iseries/setup.c | 10 +--- arch/powerpc/platforms/pseries/setup.c | 12 ++--- arch/ppc/kernel/idle.c | 20 ++++---- arch/ppc64/kernel/idle.c | 11 +---- arch/s390/kernel/process.c | 13 ++--- arch/sh/kernel/process.c | 12 ++--- arch/sh64/kernel/process.c | 14 ++---- arch/sparc/kernel/process.c | 35 ++++++------- arch/sparc64/kernel/process.c | 20 +++++--- arch/sparc64/kernel/smp.c | 13 +---- arch/x86_64/kernel/process.c | 67 ++++++++++++------------- drivers/acpi/processor_idle.c | 37 ++++++++------ kernel/sched.c | 21 +++++--- 20 files changed, 296 insertions(+), 220 deletions(-) create mode 100644 Documentation/sched-arch.txt (limited to 'arch/ia64/kernel/process.c') diff --git a/Documentation/sched-arch.txt b/Documentation/sched-arch.txt new file mode 100644 index 000000000000..941615a9769b --- /dev/null +++ b/Documentation/sched-arch.txt @@ -0,0 +1,89 @@ + CPU Scheduler implementation hints for architecture specific code + + Nick Piggin, 2005 + +Context switch +============== +1. Runqueue locking +By default, the switch_to arch function is called with the runqueue +locked. This is usually not a problem unless switch_to may need to +take the runqueue lock. This is usually due to a wake up operation in +the context switch. See include/asm-ia64/system.h for an example. + +To request the scheduler call switch_to with the runqueue unlocked, +you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file +(typically the one where switch_to is defined). + +Unlocked context switches introduce only a very minor performance +penalty to the core scheduler implementation in the CONFIG_SMP case. + +2. Interrupt status +By default, the switch_to arch function is called with interrupts +disabled. Interrupts may be enabled over the call if it is likely to +introduce a significant interrupt latency by adding the line +`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for +unlocked context switches. This define also implies +`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an +example. + + +CPU idle +======== +Your cpu_idle routines need to obey the following rules: + +1. Preempt should now disabled over idle routines. Should only + be enabled to call schedule() then disabled again. + +2. need_resched/TIF_NEED_RESCHED is only ever set, and will never + be cleared until the running task has called schedule(). Idle + threads need only ever query need_resched, and may never set or + clear it. + +3. When cpu_idle finds (need_resched() == 'true'), it should call + schedule(). It should not call schedule() otherwise. + +4. The only time interrupts need to be disabled when checking + need_resched is if we are about to sleep the processor until + the next interrupt (this doesn't provide any protection of + need_resched, it prevents losing an interrupt). + + 4a. Common problem with this type of sleep appears to be: + local_irq_disable(); + if (!need_resched()) { + local_irq_enable(); + *** resched interrupt arrives here *** + __asm__("sleep until next interrupt"); + } + +5. TIF_POLLING_NRFLAG can be set by idle routines that do not + need an interrupt to wake them up when need_resched goes high. + In other words, they must be periodically polling need_resched, + although it may be reasonable to do some background work or enter + a low CPU priority. + + 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter + an interrupt sleep, it needs to be cleared then a memory + barrier issued (followed by a test of need_resched with + interrupts disabled, as explained in 3). + +arch/i386/kernel/process.c has examples of both polling and +sleeping idle functions. + + +Possible arch/ problems +======================= + +Possible arch problems I found (and either tried to fix or didn't): + +h8300 - Is such sleeping racy vs interrupts? (See #4a). + The H8/300 manual I found indicates yes, however disabling IRQs + over the sleep mean only NMIs can wake it up, so can't fix easily + without doing spin waiting. + +ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a) + +sh64 - Is sleeping racy vs interrupts? (See #4a) + +sparc - IRQs on at this point(?), change local_irq_save to _disable. + - TODO: needs secondary CPUs to disable preempt (See #1) + diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index eb20c3afff58..a8682612abc0 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -43,21 +43,17 @@ #include "proto.h" #include "pci_impl.h" -void default_idle(void) -{ - barrier(); -} - void cpu_idle(void) { + set_thread_flag(TIF_POLLING_NRFLAG); + while (1) { - void (*idle)(void) = default_idle; /* FIXME -- EV6 and LCA45 know how to power down the CPU. */ while (!need_resched()) - idle(); + cpu_relax(); schedule(); } } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 93dd92cc12f8..c0f6a119de3b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -86,12 +86,16 @@ EXPORT_SYMBOL(pm_power_off); */ void default_idle(void) { - local_irq_disable(); - if (!need_resched() && !hlt_counter) { - timer_dyn_reprogram(); - arch_idle(); + if (hlt_counter) + cpu_relax(); + else { + local_irq_disable(); + if (!need_resched()) { + timer_dyn_reprogram(); + arch_idle(); + } + local_irq_enable(); } - local_irq_enable(); } /* diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 86e80c551478..003548b8735f 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -769,8 +769,26 @@ static int set_system_power_state(u_short state) static int apm_do_idle(void) { u32 eax; + u8 ret = 0; + int idled = 0; + int polling; + + polling = test_thread_flag(TIF_POLLING_NRFLAG); + if (polling) { + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + } + if (!need_resched()) { + idled = 1; + ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); + } + if (polling) + set_thread_flag(TIF_POLLING_NRFLAG); + + if (!idled) + return 0; - if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) { + if (ret) { static unsigned long t; /* This always fails on some SMP boards running UP kernels. diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5296e284ea36..1cb261f225d5 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -99,14 +99,22 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { + local_irq_enable(); + if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + while (!need_resched()) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + else + local_irq_enable(); + } + set_thread_flag(TIF_POLLING_NRFLAG); } else { - cpu_relax(); + while (!need_resched()) + cpu_relax(); } } #ifdef CONFIG_APM_MODULE @@ -120,29 +128,14 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - int oldval; - local_irq_enable(); - /* - * Deal with another CPU just having chosen a thread to - * run here: - */ - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } + asm volatile( + "2:" + "testl %0, %1;" + "rep; nop;" + "je 2b;" + : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); } #ifdef CONFIG_HOTPLUG_CPU @@ -181,6 +174,8 @@ void cpu_idle(void) { int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -246,15 +241,12 @@ static void mwait_idle(void) { local_irq_enable(); - if (!need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); - do { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) - break; - __mwait(0, 0); - } while (!need_resched()); - clear_thread_flag(TIF_POLLING_NRFLAG); + while (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (need_resched()) + break; + __mwait(0, 0); } } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 4c621fc3c3b9..640d6908f8ec 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -197,11 +197,15 @@ void default_idle (void) { local_irq_enable(); - while (!need_resched()) - if (can_do_pal_halt) - safe_halt(); - else + while (!need_resched()) { + if (can_do_pal_halt) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + local_irq_enable(); + } else cpu_relax(); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -263,16 +267,16 @@ void __attribute__((noreturn)) cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; + int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { + if (!need_resched()) { + void (*idle)(void); #ifdef CONFIG_SMP - if (!need_resched()) min_xtp(); #endif - while (!need_resched()) { - void (*idle)(void); - if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; @@ -284,19 +288,17 @@ cpu_idle (void) if (!idle) idle = default_idle; (*idle)(); - } - - if (mark_idle) - (*mark_idle)(0); - + if (mark_idle) + (*mark_idle)(0); #ifdef CONFIG_SMP - normal_xtp(); + normal_xtp(); #endif + } preempt_enable_no_resched(); schedule(); preempt_disable(); check_pgt_cache(); - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(cpu)) play_dead(); } } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index f482f78de435..fee4f1f09adc 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -88,6 +88,8 @@ void default_idle(void) */ void cpu_idle(void) { + set_thread_flag(TIF_POLLING_NRFLAG); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 0130f2619dac..7f8f0cda6a74 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -703,13 +703,10 @@ static void iseries_shared_idle(void) static void iseries_dedicated_idle(void) { long oldval; + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - + if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); HMT_low(); @@ -722,9 +719,6 @@ static void iseries_dedicated_idle(void) } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } ppc64_runlatch_on(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4854f5eb5c3d..a093a0d4dd69 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -469,6 +469,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu) * more. */ clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); /* * SMT dynamic mode. Cede will result in this thread going @@ -481,6 +482,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu) cede_processor(); else local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); } else { /* * Give the HV an opportunity at the processor, since we are @@ -492,11 +494,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu) static void pseries_dedicated_idle(void) { - long oldval; struct paca_struct *lpaca = get_paca(); unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { /* @@ -505,10 +507,7 @@ static void pseries_dedicated_idle(void) */ lpaca->lppaca.idle = 1; - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - + if (!need_resched()) { start_snooze = __get_tb() + *smt_snooze_delay * tb_ticks_per_usec; @@ -531,9 +530,6 @@ static void pseries_dedicated_idle(void) } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } lpaca->lppaca.idle = 0; diff --git a/arch/ppc/kernel/idle.c b/arch/ppc/kernel/idle.c index a6141f05c919..3c4e4cb61074 100644 --- a/arch/ppc/kernel/idle.c +++ b/arch/ppc/kernel/idle.c @@ -63,18 +63,18 @@ void cpu_idle(void) int cpu = smp_processor_id(); for (;;) { - if (ppc_md.idle != NULL) - ppc_md.idle(); - else - default_idle(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - if (need_resched()) { - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + while (need_resched()) { + if (ppc_md.idle != NULL) + ppc_md.idle(); + else + default_idle(); } + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); } } diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 909ea669af91..715bc0e71e0f 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -34,15 +34,11 @@ extern void power4_idle(void); void default_idle(void) { - long oldval; unsigned int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - + if (!need_resched()) { while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); @@ -55,9 +51,6 @@ void default_idle(void) } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } ppc64_runlatch_on(); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 66ca5757e368..78b64fe5e7c2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -99,14 +99,15 @@ void default_idle(void) { int cpu, rc; + /* CPU is going idle. */ + cpu = smp_processor_id(); + local_irq_disable(); - if (need_resched()) { + if (need_resched()) { local_irq_enable(); - return; - } + return; + } - /* CPU is going idle. */ - cpu = smp_processor_id(); rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu); if (rc != NOTIFY_OK && rc != NOTIFY_DONE) BUG(); @@ -119,7 +120,7 @@ void default_idle(void) __ctl_set_bit(8, 15); #ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(cpu)) cpu_die(); #endif diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 1cbc26b796ad..fd4f240b833d 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -51,14 +51,13 @@ void enable_hlt(void) EXPORT_SYMBOL(enable_hlt); -void default_idle(void) +void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { if (hlt_counter) { - while (1) - if (need_resched()) - break; + while (!need_resched()) + cpu_relax(); } else { while (!need_resched()) cpu_sleep(); @@ -70,11 +69,6 @@ void default_idle(void) } } -void cpu_idle(void) -{ - default_idle(); -} - void machine_restart(char * __unused) { /* SR.BL=1 and invoke address error to let CPU reset (manual reset) */ diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c index 0c09537449b3..b95d04141855 100644 --- a/arch/sh64/kernel/process.c +++ b/arch/sh64/kernel/process.c @@ -307,23 +307,19 @@ __setup("hlt", hlt_setup); static inline void hlt(void) { - if (hlt_counter) - return; - __asm__ __volatile__ ("sleep" : : : "memory"); } /* * The idle loop on a uniprocessor SH.. */ -void default_idle(void) +void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { if (hlt_counter) { - while (1) - if (need_resched()) - break; + while (!need_resched()) + cpu_relax(); } else { local_irq_disable(); while (!need_resched()) { @@ -338,11 +334,7 @@ void default_idle(void) schedule(); preempt_disable(); } -} -void cpu_idle(void) -{ - default_idle(); } void machine_restart(char * __unused) diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index c39f4d01096d..ea8647411462 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -67,13 +67,6 @@ extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *); struct task_struct *last_task_used_math = NULL; struct thread_info *current_set[NR_CPUS]; -/* - * default_idle is new in 2.5. XXX Review, currently stolen from sparc64. - */ -void default_idle(void) -{ -} - #ifndef CONFIG_SMP #define SUN4C_FAULT_HIGH 100 @@ -92,12 +85,11 @@ void cpu_idle(void) static unsigned long fps; unsigned long now; unsigned long faults; - unsigned long flags; extern unsigned long sun4c_kernel_faults; extern void sun4c_grow_kernel_ring(void); - local_irq_save(flags); + local_irq_disable(); now = jiffies; count -= (now - last_jiffies); last_jiffies = now; @@ -113,13 +105,16 @@ void cpu_idle(void) sun4c_grow_kernel_ring(); } } - local_irq_restore(flags); + local_irq_enable(); } - while((!need_resched()) && pm_idle) { - (*pm_idle)(); + if (pm_idle) { + while (!need_resched()) + (*pm_idle)(); + } else { + while (!need_resched()) + cpu_relax(); } - preempt_enable_no_resched(); schedule(); preempt_disable(); @@ -132,15 +127,15 @@ void cpu_idle(void) /* This is being executed in task 0 'user space'. */ void cpu_idle(void) { + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while(1) { - if(need_resched()) { - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - check_pgt_cache(); - } - barrier(); /* or else gcc optimizes... */ + while (!need_resched()) + cpu_relax(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + check_pgt_cache(); } } diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 2f89206e008f..02f9dec1d459 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -85,23 +85,31 @@ void cpu_idle(void) /* * the idle loop on a UltraMultiPenguin... + * + * TIF_POLLING_NRFLAG is set because we do not sleep the cpu + * inside of the idler task, so an interrupt is not needed + * to get a clean fast response. + * + * XXX Reverify this assumption... -DaveM + * + * Addendum: We do want it to do something for the signal + * delivery case, we detect that by just seeing + * if we are trying to send this to an idler or not. */ -#define idle_me_harder() (cpu_data(smp_processor_id()).idle_volume += 1) -#define unidle_me() (cpu_data(smp_processor_id()).idle_volume = 0) void cpu_idle(void) { + cpuinfo_sparc *cpuinfo = &local_cpu_data(); set_thread_flag(TIF_POLLING_NRFLAG); + while(1) { if (need_resched()) { - unidle_me(); - clear_thread_flag(TIF_POLLING_NRFLAG); + cpuinfo->idle_volume = 0; preempt_enable_no_resched(); schedule(); preempt_disable(); - set_thread_flag(TIF_POLLING_NRFLAG); check_pgt_cache(); } - idle_me_harder(); + cpuinfo->idle_volume++; /* The store ordering is so that IRQ handlers on * other cpus see our increasing idleness for the buddy diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 8aca4b1dc04e..797a65493fb8 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1152,20 +1152,9 @@ void __init smp_cpus_done(unsigned int max_cpus) (bogosum/(5000/HZ))%100); } -/* This needn't do anything as we do not sleep the cpu - * inside of the idler task, so an interrupt is not needed - * to get a clean fast response. - * - * XXX Reverify this assumption... -DaveM - * - * Addendum: We do want it to do something for the signal - * delivery case, we detect that by just seeing - * if we are trying to send this to an idler or not. - */ void smp_send_reschedule(int cpu) { - if (cpu_data(cpu).idle_volume == 0) - smp_receive_signal(cpu); + smp_receive_signal(cpu); } /* This is a nop because we capture all other cpus diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 571f9fe490ce..59be85d9a4bc 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -86,12 +86,22 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { + local_irq_enable(); + if (!atomic_read(&hlt_counter)) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + while (!need_resched()) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + else + local_irq_enable(); + } + set_thread_flag(TIF_POLLING_NRFLAG); + } else { + while (!need_resched()) + cpu_relax(); } } @@ -102,30 +112,16 @@ void default_idle(void) */ static void poll_idle (void) { - int oldval; - local_irq_enable(); - /* - * Deal with another CPU just having chosen a thread to - * run here: - */ - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - asm volatile( - "2:" - "testl %0,%1;" - "rep; nop;" - "je 2b;" - : : - "i" (_TIF_NEED_RESCHED), - "m" (current_thread_info()->flags)); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } + asm volatile( + "2:" + "testl %0,%1;" + "rep; nop;" + "je 2b;" + : : + "i" (_TIF_NEED_RESCHED), + "m" (current_thread_info()->flags)); } void cpu_idle_wait(void) @@ -187,6 +183,8 @@ static inline void play_dead(void) */ void cpu_idle (void) { + set_thread_flag(TIF_POLLING_NRFLAG); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -221,15 +219,12 @@ static void mwait_idle(void) { local_irq_enable(); - if (!need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); - do { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) - break; - __mwait(0, 0); - } while (!need_resched()); - clear_thread_flag(TIF_POLLING_NRFLAG); + while (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (need_resched()) + break; + __mwait(0, 0); } } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 161db4acfb91..573b6a97bb1f 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -167,6 +167,19 @@ acpi_processor_power_activate(struct acpi_processor *pr, return; } +static void acpi_safe_halt(void) +{ + int polling = test_thread_flag(TIF_POLLING_NRFLAG); + if (polling) { + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + } + if (!need_resched()) + safe_halt(); + if (polling) + set_thread_flag(TIF_POLLING_NRFLAG); +} + static atomic_t c3_cpu_count; static void acpi_processor_idle(void) @@ -177,7 +190,7 @@ static void acpi_processor_idle(void) int sleep_ticks = 0; u32 t1, t2 = 0; - pr = processors[raw_smp_processor_id()]; + pr = processors[smp_processor_id()]; if (!pr) return; @@ -197,8 +210,13 @@ static void acpi_processor_idle(void) } cx = pr->power.state; - if (!cx) - goto easy_out; + if (!cx) { + if (pm_idle_save) + pm_idle_save(); + else + acpi_safe_halt(); + return; + } /* * Check BM Activity @@ -278,7 +296,8 @@ static void acpi_processor_idle(void) if (pm_idle_save) pm_idle_save(); else - safe_halt(); + acpi_safe_halt(); + /* * TBD: Can't get time duration while in C1, as resumes * go to an ISR rather than here. Need to instrument @@ -414,16 +433,6 @@ static void acpi_processor_idle(void) */ if (next_state != pr->power.state) acpi_processor_power_activate(pr, next_state); - - return; - - easy_out: - /* do C1 instead of busy loop */ - if (pm_idle_save) - pm_idle_save(); - else - safe_halt(); - return; } static int acpi_processor_set_power_policy(struct acpi_processor *pr) diff --git a/kernel/sched.c b/kernel/sched.c index 0f2def822296..ac3f5cc3bb51 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -864,21 +864,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) #ifdef CONFIG_SMP static void resched_task(task_t *p) { - int need_resched, nrpolling; + int cpu; assert_spin_locked(&task_rq(p)->lock); - /* minimise the chance of sending an interrupt to poll_idle() */ - nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); - need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); - nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); + if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) + return; + + set_tsk_thread_flag(p, TIF_NEED_RESCHED); + + cpu = task_cpu(p); + if (cpu == smp_processor_id()) + return; - if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) - smp_send_reschedule(task_cpu(p)); + /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ + smp_mb(); + if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) + smp_send_reschedule(cpu); } #else static inline void resched_task(task_t *p) { + assert_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); } #endif -- cgit v1.2.3 From 1e185b97b4364063f1135604b87f8d8469944233 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Tue, 15 Nov 2005 14:37:05 -0800 Subject: [PATCH] ia64: cpu_idle performance bug fix Our performance validation on 2.6.15-rc1 caught a disastrous performance regression on ia64 with netperf (-98%) and volanomark (-58%) compares to previous kernel version 2.6.14-git7. See the following chart (result group 1 & 2). http://kernel-perf.sourceforge.net/results.machine_id=26.html We have root caused it to commit 64c7c8f88559624abdbe12b5da6502e8879f8d28 This changeset broke the ia64 task resched notification. In sched.c:resched_task(), a reschedule IPI is conditioned upon TIF_POLLING_NRFLAG. However, the above changeset unconditionally set the polling thread flag for idle tasks regardless whether pal_halt_light is in use or not. As a result, resched IPI is not sent from resched_task(). And since the default behavior on ia64 is to use pal_halt_light, we end up delaying the rescheduling task until next timer tick, and thus cause the performance regression. This fixes the performance bug. I'm glad our performance suite is turning up bad performance bug like this in time. Signed-off-by: Ken Chen Signed-off-by: Linus Torvalds --- arch/ia64/kernel/process.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/ia64/kernel/process.c') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index e92ea64d8040..4305d2ba76f6 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -202,12 +202,9 @@ default_idle (void) { local_irq_enable(); while (!need_resched()) { - if (can_do_pal_halt) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - local_irq_enable(); - } else + if (can_do_pal_halt) + safe_halt(); + else cpu_relax(); } } @@ -272,10 +269,14 @@ cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { + if (can_do_pal_halt) + clear_thread_flag(TIF_POLLING_NRFLAG); + else + set_thread_flag(TIF_POLLING_NRFLAG); + if (!need_resched()) { void (*idle)(void); #ifdef CONFIG_SMP -- cgit v1.2.3 From 8bf1101bd52573e0573e374d56d2feecdbb5e444 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Wed, 23 Nov 2005 13:37:42 -0800 Subject: [PATCH] kprobes: Fix return probes on sys_execve Fix a bug in kprobes that can cause an Oops or even a crash when a return probe is installed on one of the following functions: sys_execve, do_execve, load_*_binary, flush_old_exec, or flush_thread. The fix is to remove the call to kprobe_flush_task() in flush_thread(). This fix has been tested on all architectures for which the return-probes feature has been implemented (i386, x86_64, ppc64, ia64). Please apply. BACKGROUND Up to now, we have called kprobe_flush_task() under two situations: when a task exits, and when it execs. Flushing kretprobe_instances on exit is correct because (a) do_exit() doesn't return, and (b) one or more return-probed functions may be active when a task calls do_exit(). Neither is the case for sys_execve() and its callees. Initially, the mistaken call to kprobe_flush_task() on exec was harmless because we put the "real" return address of each active probed function back in the stack, just to be safe, when we recycled its kretprobe_instance. When support for ppc64 and ia64 was added, this safety measure couldn't be employed, and was eventually dropped even for i386 and x86_64. sys_execve() and its callees were informally blacklisted for return probes until this fix was developed. Acked-by: Prasanna S Panchamukhi Signed-off-by: Jim Keniston Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 7 ------- arch/ia64/kernel/process.c | 7 ------- arch/powerpc/kernel/process.c | 1 - arch/x86_64/kernel/process.c | 7 ------- 4 files changed, 22 deletions(-) (limited to 'arch/ia64/kernel/process.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 1cb261f225d5..df6c2bcde067 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -393,13 +393,6 @@ void flush_thread(void) { struct task_struct *tsk = current; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 4305d2ba76f6..2e33665d9c18 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -718,13 +718,6 @@ kernel_thread_helper (int (*fn)(void *), void *arg) void flush_thread (void) { - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(current); - /* drop floating-point and debug-register state if it exists: */ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); ia64_drop_fpu(current); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index de69fb37c731..a5a7542a8ff3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -457,7 +457,6 @@ void flush_thread(void) if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT); #endif - kprobe_flush_task(current); #ifndef CONFIG_SMP if (last_task_used_math == current) diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 5afd63e8cef7..7519fc520eb3 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -351,13 +351,6 @@ void flush_thread(void) struct task_struct *tsk = current; struct thread_info *t = current_thread_info(); - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); -- cgit v1.2.3