From bcccc50ce8fcc833cfed4bb71ede211a6ef5b84a Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Tue, 13 Mar 2012 15:34:17 +0100 Subject: ARM: 7420/1: Improve build environment isolation Increasingly distributions are setting default build environments to have LDFLAGS with hardening options. There seems to be an assumption with those options that LDFLAGS are passed to the compiler frontend rather than used directly with ld (which the kernel build process assumes) To prevent build failures in such environments this patch changes the ARM architecture Makefile to override the LDFLAGS from the environment similar to the behaviour on other common architectures e.g. x86 Signed-off-by: Vincent Sanders Signed-off-by: Russell King --- arch/arm/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 0298b00fe241..f8ebf1e97027 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,6 +10,9 @@ # # Copyright (C) 1995-2001 by Russell King +# Ensure linker flags are correct +LDFLAGS := + LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 -- cgit v1.2.3 From 158e8bfe802f730f9ea7cde32eee8b43285bdd4a Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Sun, 24 Jun 2012 12:46:26 +0100 Subject: ARM: 7432/1: use the new linux/sizes.h Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Acked-by: Linus Walleij Cc: Alan Cox Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- arch/arm/mm/dma-mapping.c | 2 +- arch/arm/mm/init.c | 2 +- arch/arm/mm/ioremap.c | 2 +- arch/arm/mm/mmu.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index fcb575747e5e..e965f1b560f1 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #ifdef CONFIG_NEED_MACH_MEMORY_H #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ea6b43154090..30a031c0fcf5 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c21d06c7dd7e..ad7fd8ae8258 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 4f55f5062ab7..566750fa57d4 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5dad60b558b..2196116c882f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -16,13 +16,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include -- cgit v1.2.3 From ff081e05bfba3461119cd280201d163b6858eda2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 6 Jul 2012 22:03:42 +0100 Subject: ARM: 7457/1: smp: Fix suspicious RCU originating from cpu_die() While running hotplug tests I ran into this RCU splat =============================== [ INFO: suspicious RCU usage. ] 3.4.0 #3275 Tainted: G W ------------------------------- include/linux/rcupdate.h:729 rcu_read_lock() used illegally while idle! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 4 locks held by swapper/2/0: #0: ((cpu_died).wait.lock){......}, at: [] complete+0x1c/0x5c #1: (&p->pi_lock){-.-.-.}, at: [] try_to_wake_up+0x2c/0x388 #2: (&rq->lock){-.-.-.}, at: [] try_to_wake_up+0x130/0x388 #3: (rcu_read_lock){.+.+..}, at: [] cpuacct_charge+0x28/0x1f4 stack backtrace: [] (unwind_backtrace+0x0/0x12c) from [] (cpuacct_charge+0x94/0x1f4) [] (cpuacct_charge+0x94/0x1f4) from [] (update_curr+0x24c/0x2c8) [] (update_curr+0x24c/0x2c8) from [] (enqueue_task_fair+0x50/0x194) [] (enqueue_task_fair+0x50/0x194) from [] (enqueue_task+0x30/0x34) [] (enqueue_task+0x30/0x34) from [] (ttwu_activate+0x14/0x38) [] (ttwu_activate+0x14/0x38) from [] (try_to_wake_up+0x178/0x388) [] (try_to_wake_up+0x178/0x388) from [] (__wake_up_common+0x34/0x78) [] (__wake_up_common+0x34/0x78) from [] (complete+0x48/0x5c) [] (complete+0x48/0x5c) from [] (cpu_die+0x2c/0x58) [] (cpu_die+0x2c/0x58) from [] (cpu_idle+0x64/0xfc) [] (cpu_idle+0x64/0xfc) from [<80208160>] (0x80208160) When a cpu is marked offline during its idle thread it calls cpu_die() during an RCU idle period. cpu_die() calls complete() to notify the killing process that the cpu has died. complete() calls into the scheduler code and eventually grabs an RCU read lock in cpuacct_charge(). Mark complete() as RCU_NONIDLE so that RCU pays attention to this CPU for the duration of the complete() function even though it's in idle. Suggested-by: "Paul E. McKenney" Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 2c7217d971db..aea74f5bc34a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -179,7 +179,7 @@ void __ref cpu_die(void) mb(); /* Tell __cpu_die() that this CPU is now safe to dispose of */ - complete(&cpu_died); + RCU_NONIDLE(complete(&cpu_died)); /* * actual CPU shutdown procedure is at least platform (if not -- cgit v1.2.3 From 3169663ac5902f2228ea0eb8cc34eb52cbd4b283 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 6 Jun 2012 11:42:36 +0100 Subject: ARM: sa11x0/pxa: convert OS timer registers to IOMEM Make the OS timer registers have IOMEM like properities so they can be passed to readl_relaxed/writel_relaxed() et.al. rather than being straight volatile dereferences. Add linux/io.h includes where required. linux/io.h includes added to arch/arm/mach-sa1100/cpu-sa1100.c, arch/arm/mach-sa1100/jornada720_ssp.c, arch/arm/mach-sa1100/leds-lart.c drivers/input/touchscreen/jornada720_ts.c, drivers/pcmcia/sa1100_shannon.c from Arnd. This fixes these warnings: arch/arm/mach-sa1100/time.c: In function 'sa1100_timer_init': arch/arm/mach-sa1100/time.c:104: warning: passing argument 1 of 'clocksource_mmio_init' discards qualifiers from pointer target type arch/arm/mach-pxa/time.c: In function 'pxa_timer_init': arch/arm/mach-pxa/time.c:126: warning: passing argument 1 of 'clocksource_mmio_init' discards qualifiers from pointer target type Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mach-pxa/include/mach/regs-ost.h | 22 +++++------ arch/arm/mach-pxa/reset.c | 7 ++-- arch/arm/mach-pxa/time.c | 52 +++++++++++++------------- arch/arm/mach-sa1100/assabet.c | 2 +- arch/arm/mach-sa1100/cpu-sa1100.c | 1 + arch/arm/mach-sa1100/cpu-sa1110.c | 1 + arch/arm/mach-sa1100/include/mach/SA-1100.h | 16 ++++---- arch/arm/mach-sa1100/include/mach/gpio.h | 1 + arch/arm/mach-sa1100/include/mach/hardware.h | 6 ++- arch/arm/mach-sa1100/include/mach/uncompress.h | 2 + arch/arm/mach-sa1100/irq.c | 1 + arch/arm/mach-sa1100/jornada720_ssp.c | 1 + arch/arm/mach-sa1100/leds-cerf.c | 1 + arch/arm/mach-sa1100/leds-lart.c | 1 + arch/arm/mach-sa1100/pm.c | 1 + arch/arm/mach-sa1100/time.c | 48 ++++++++++++------------ drivers/input/touchscreen/jornada720_ts.c | 1 + drivers/pcmcia/sa1100_shannon.c | 1 + drivers/watchdog/sa1100_wdt.c | 14 +++---- 19 files changed, 97 insertions(+), 82 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-pxa/include/mach/regs-ost.h b/arch/arm/mach-pxa/include/mach/regs-ost.h index a3e5f86ef67e..628819995c52 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ost.h +++ b/arch/arm/mach-pxa/include/mach/regs-ost.h @@ -7,17 +7,17 @@ * OS Timer & Match Registers */ -#define OSMR0 __REG(0x40A00000) /* */ -#define OSMR1 __REG(0x40A00004) /* */ -#define OSMR2 __REG(0x40A00008) /* */ -#define OSMR3 __REG(0x40A0000C) /* */ -#define OSMR4 __REG(0x40A00080) /* */ -#define OSCR __REG(0x40A00010) /* OS Timer Counter Register */ -#define OSCR4 __REG(0x40A00040) /* OS Timer Counter Register */ -#define OMCR4 __REG(0x40A000C0) /* */ -#define OSSR __REG(0x40A00014) /* OS Timer Status Register */ -#define OWER __REG(0x40A00018) /* OS Timer Watchdog Enable Register */ -#define OIER __REG(0x40A0001C) /* OS Timer Interrupt Enable Register */ +#define OSMR0 io_p2v(0x40A00000) /* */ +#define OSMR1 io_p2v(0x40A00004) /* */ +#define OSMR2 io_p2v(0x40A00008) /* */ +#define OSMR3 io_p2v(0x40A0000C) /* */ +#define OSMR4 io_p2v(0x40A00080) /* */ +#define OSCR io_p2v(0x40A00010) /* OS Timer Counter Register */ +#define OSCR4 io_p2v(0x40A00040) /* OS Timer Counter Register */ +#define OMCR4 io_p2v(0x40A000C0) /* */ +#define OSSR io_p2v(0x40A00014) /* OS Timer Status Register */ +#define OWER io_p2v(0x40A00018) /* OS Timer Watchdog Enable Register */ +#define OIER io_p2v(0x40A0001C) /* OS Timer Interrupt Enable Register */ #define OSSR_M3 (1 << 3) /* Match status channel 3 */ #define OSSR_M2 (1 << 2) /* Match status channel 2 */ diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c index b4528899ef08..3fab583755d4 100644 --- a/arch/arm/mach-pxa/reset.c +++ b/arch/arm/mach-pxa/reset.c @@ -77,9 +77,10 @@ static void do_gpio_reset(void) static void do_hw_reset(void) { /* Initialize the watchdog and let it fire */ - OWER = OWER_WME; - OSSR = OSSR_M3; - OSMR3 = OSCR + 368640; /* ... in 100 ms */ + writel_relaxed(OWER_WME, OWER); + writel_relaxed(OSSR_M3, OSSR); + /* ... in 100 ms */ + writel_relaxed(readl_relaxed(OSCR) + 368640, OSMR3); } void pxa_restart(char mode, const char *cmd) diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 3d6c9bd90de6..4bc47d63698b 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -35,7 +35,7 @@ static u32 notrace pxa_read_sched_clock(void) { - return OSCR; + return readl_relaxed(OSCR); } @@ -47,8 +47,8 @@ pxa_ost0_interrupt(int irq, void *dev_id) struct clock_event_device *c = dev_id; /* Disarm the compare/match, signal the event. */ - OIER &= ~OIER_E0; - OSSR = OSSR_M0; + writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); + writel_relaxed(OSSR_M0, OSSR); c->event_handler(c); return IRQ_HANDLED; @@ -59,10 +59,10 @@ pxa_osmr0_set_next_event(unsigned long delta, struct clock_event_device *dev) { unsigned long next, oscr; - OIER |= OIER_E0; - next = OSCR + delta; - OSMR0 = next; - oscr = OSCR; + writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER); + next = readl_relaxed(OSCR) + delta; + writel_relaxed(next, OSMR0); + oscr = readl_relaxed(OSCR); return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0; } @@ -72,15 +72,15 @@ pxa_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *dev) { switch (mode) { case CLOCK_EVT_MODE_ONESHOT: - OIER &= ~OIER_E0; - OSSR = OSSR_M0; + writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); + writel_relaxed(OSSR_M0, OSSR); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: /* initializing, released, or preparing for suspend */ - OIER &= ~OIER_E0; - OSSR = OSSR_M0; + writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); + writel_relaxed(OSSR_M0, OSSR); break; case CLOCK_EVT_MODE_RESUME: @@ -108,8 +108,8 @@ static void __init pxa_timer_init(void) { unsigned long clock_tick_rate = get_clock_tick_rate(); - OIER = 0; - OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; + writel_relaxed(0, OIER); + writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR); setup_sched_clock(pxa_read_sched_clock, 32, clock_tick_rate); @@ -122,7 +122,7 @@ static void __init pxa_timer_init(void) setup_irq(IRQ_OST0, &pxa_ost0_irq); - clocksource_mmio_init(&OSCR, "oscr0", clock_tick_rate, 200, 32, + clocksource_mmio_init(OSCR, "oscr0", clock_tick_rate, 200, 32, clocksource_mmio_readl_up); clockevents_register_device(&ckevt_pxa_osmr0); } @@ -132,12 +132,12 @@ static unsigned long osmr[4], oier, oscr; static void pxa_timer_suspend(void) { - osmr[0] = OSMR0; - osmr[1] = OSMR1; - osmr[2] = OSMR2; - osmr[3] = OSMR3; - oier = OIER; - oscr = OSCR; + osmr[0] = readl_relaxed(OSMR0); + osmr[1] = readl_relaxed(OSMR1); + osmr[2] = readl_relaxed(OSMR2); + osmr[3] = readl_relaxed(OSMR3); + oier = readl_relaxed(OIER); + oscr = readl_relaxed(OSCR); } static void pxa_timer_resume(void) @@ -151,12 +151,12 @@ static void pxa_timer_resume(void) if (osmr[0] - oscr < MIN_OSCR_DELTA) osmr[0] += MIN_OSCR_DELTA; - OSMR0 = osmr[0]; - OSMR1 = osmr[1]; - OSMR2 = osmr[2]; - OSMR3 = osmr[3]; - OIER = oier; - OSCR = oscr; + writel_relaxed(osmr[0], OSMR0); + writel_relaxed(osmr[1], OSMR1); + writel_relaxed(osmr[2], OSMR2); + writel_relaxed(osmr[3], OSMR3); + writel_relaxed(oier, OIER); + writel_relaxed(oscr, OSCR); } #else #define pxa_timer_suspend NULL diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index d1dc7f1a239c..d673211f121c 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -362,7 +362,7 @@ static void __init assabet_init(void) static void __init map_sa1100_gpio_regs( void ) { unsigned long phys = __PREG(GPLR) & PMD_MASK; - unsigned long virt = io_p2v(phys); + unsigned long virt = (unsigned long)io_p2v(phys); int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO); pmd_t *pmd; diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c index 19b2053f5af4..e8f4d1e19233 100644 --- a/arch/arm/mach-sa1100/cpu-sa1100.c +++ b/arch/arm/mach-sa1100/cpu-sa1100.c @@ -87,6 +87,7 @@ #include #include #include +#include #include diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c index 675bf8ef97e8..48c45b0c92bb 100644 --- a/arch/arm/mach-sa1100/cpu-sa1110.c +++ b/arch/arm/mach-sa1100/cpu-sa1110.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-sa1100/include/mach/SA-1100.h b/arch/arm/mach-sa1100/include/mach/SA-1100.h index 3f2d1b60188c..0ac6cc08a19c 100644 --- a/arch/arm/mach-sa1100/include/mach/SA-1100.h +++ b/arch/arm/mach-sa1100/include/mach/SA-1100.h @@ -830,14 +830,14 @@ * (read/write). */ -#define OSMR0 __REG(0x90000000) /* OS timer Match Reg. 0 */ -#define OSMR1 __REG(0x90000004) /* OS timer Match Reg. 1 */ -#define OSMR2 __REG(0x90000008) /* OS timer Match Reg. 2 */ -#define OSMR3 __REG(0x9000000c) /* OS timer Match Reg. 3 */ -#define OSCR __REG(0x90000010) /* OS timer Counter Reg. */ -#define OSSR __REG(0x90000014 ) /* OS timer Status Reg. */ -#define OWER __REG(0x90000018 ) /* OS timer Watch-dog Enable Reg. */ -#define OIER __REG(0x9000001C ) /* OS timer Interrupt Enable Reg. */ +#define OSMR0 io_p2v(0x90000000) /* OS timer Match Reg. 0 */ +#define OSMR1 io_p2v(0x90000004) /* OS timer Match Reg. 1 */ +#define OSMR2 io_p2v(0x90000008) /* OS timer Match Reg. 2 */ +#define OSMR3 io_p2v(0x9000000c) /* OS timer Match Reg. 3 */ +#define OSCR io_p2v(0x90000010) /* OS timer Counter Reg. */ +#define OSSR io_p2v(0x90000014) /* OS timer Status Reg. */ +#define OWER io_p2v(0x90000018) /* OS timer Watch-dog Enable Reg. */ +#define OIER io_p2v(0x9000001C) /* OS timer Interrupt Enable Reg. */ #define OSSR_M(Nb) /* Match detected [0..3] */ \ (0x00000001 << (Nb)) diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h index a38fc4f54241..6a9eecf3137e 100644 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ b/arch/arm/mach-sa1100/include/mach/gpio.h @@ -24,6 +24,7 @@ #ifndef __ASM_ARCH_SA1100_GPIO_H #define __ASM_ARCH_SA1100_GPIO_H +#include #include #include #include diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h index 99f5856d8de4..cbedd75a9d65 100644 --- a/arch/arm/mach-sa1100/include/mach/hardware.h +++ b/arch/arm/mach-sa1100/include/mach/hardware.h @@ -32,7 +32,7 @@ #define PIO_START 0x80000000 /* physical start of IO space */ #define io_p2v( x ) \ - ( (((x)&0x00ffffff) | (((x)&0x30000000)>>VIO_SHIFT)) + VIO_BASE ) + IOMEM( (((x)&0x00ffffff) | (((x)&0x30000000)>>VIO_SHIFT)) + VIO_BASE ) #define io_v2p( x ) \ ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))< @@ -56,7 +58,7 @@ #define cpu_is_sa1100() ((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID) #define cpu_is_sa1110() ((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID) -# define __REG(x) (*((volatile unsigned long *)io_p2v(x))) +# define __REG(x) (*((volatile unsigned long __iomem *)io_p2v(x))) # define __PREG(x) (io_v2p((unsigned long)&(x))) static inline unsigned long get_clock_tick_rate(void) diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h index 6cb39ddde656..5cf71da60e42 100644 --- a/arch/arm/mach-sa1100/include/mach/uncompress.h +++ b/arch/arm/mach-sa1100/include/mach/uncompress.h @@ -8,6 +8,8 @@ #include "hardware.h" +#define IOMEM(x) (x) + /* * The following code assumes the serial port has already been * initialized by the bootloader. We search for the first enabled diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index 516ccc25d7fd..2124f1fc2fbe 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c index b412fc09c80c..7f07f08d8968 100644 --- a/arch/arm/mach-sa1100/jornada720_ssp.c +++ b/arch/arm/mach-sa1100/jornada720_ssp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-sa1100/leds-cerf.c b/arch/arm/mach-sa1100/leds-cerf.c index 040540fb7d8a..30fc3b2bf555 100644 --- a/arch/arm/mach-sa1100/leds-cerf.c +++ b/arch/arm/mach-sa1100/leds-cerf.c @@ -4,6 +4,7 @@ * Author: ??? */ #include +#include #include #include diff --git a/arch/arm/mach-sa1100/leds-lart.c b/arch/arm/mach-sa1100/leds-lart.c index a51830c60e53..50a5b143b460 100644 --- a/arch/arm/mach-sa1100/leds-lart.c +++ b/arch/arm/mach-sa1100/leds-lart.c @@ -10,6 +10,7 @@ * pace of the LED. */ #include +#include #include #include diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 690cf0ce5c0c..6645d1e31f14 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -23,6 +23,7 @@ * Storage is local on the stack now. */ #include +#include #include #include #include diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 6af26e8d55e6..80702c9ecc77 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -22,7 +22,7 @@ static u32 notrace sa1100_read_sched_clock(void) { - return OSCR; + return readl_relaxed(OSCR); } #define MIN_OSCR_DELTA 2 @@ -32,8 +32,8 @@ static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id) struct clock_event_device *c = dev_id; /* Disarm the compare/match, signal the event. */ - OIER &= ~OIER_E0; - OSSR = OSSR_M0; + writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); + writel_relaxed(OSSR_M0, OSSR); c->event_handler(c); return IRQ_HANDLED; @@ -44,10 +44,10 @@ sa1100_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c) { unsigned long next, oscr; - OIER |= OIER_E0; - next = OSCR + delta; - OSMR0 = next; - oscr = OSCR; + writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER); + next = readl_relaxed(OSCR) + delta; + writel_relaxed(next, OSMR0); + oscr = readl_relaxed(OSCR); return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0; } @@ -59,8 +59,8 @@ sa1100_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *c) case CLOCK_EVT_MODE_ONESHOT: case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - OIER &= ~OIER_E0; - OSSR = OSSR_M0; + writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); + writel_relaxed(OSSR_M0, OSSR); break; case CLOCK_EVT_MODE_RESUME: @@ -86,8 +86,8 @@ static struct irqaction sa1100_timer_irq = { static void __init sa1100_timer_init(void) { - OIER = 0; - OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; + writel_relaxed(0, OIER); + writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR); setup_sched_clock(sa1100_read_sched_clock, 32, 3686400); @@ -100,7 +100,7 @@ static void __init sa1100_timer_init(void) setup_irq(IRQ_OST0, &sa1100_timer_irq); - clocksource_mmio_init(&OSCR, "oscr", CLOCK_TICK_RATE, 200, 32, + clocksource_mmio_init(OSCR, "oscr", CLOCK_TICK_RATE, 200, 32, clocksource_mmio_readl_up); clockevents_register_device(&ckevt_sa1100_osmr0); } @@ -110,26 +110,26 @@ unsigned long osmr[4], oier; static void sa1100_timer_suspend(void) { - osmr[0] = OSMR0; - osmr[1] = OSMR1; - osmr[2] = OSMR2; - osmr[3] = OSMR3; - oier = OIER; + osmr[0] = readl_relaxed(OSMR0); + osmr[1] = readl_relaxed(OSMR1); + osmr[2] = readl_relaxed(OSMR2); + osmr[3] = readl_relaxed(OSMR3); + oier = readl_relaxed(OIER); } static void sa1100_timer_resume(void) { - OSSR = 0x0f; - OSMR0 = osmr[0]; - OSMR1 = osmr[1]; - OSMR2 = osmr[2]; - OSMR3 = osmr[3]; - OIER = oier; + writel_relaxed(0x0f, OSSR); + writel_relaxed(osmr[0], OSMR0); + writel_relaxed(osmr[1], OSMR1); + writel_relaxed(osmr[2], OSMR2); + writel_relaxed(osmr[3], OSMR3); + writel_relaxed(oier, OIER); /* * OSMR0 is the system timer: make sure OSCR is sufficiently behind */ - OSCR = OSMR0 - LATCH; + writel_relaxed(OSMR0 - LATCH, OSCR); } #else #define sa1100_timer_suspend NULL diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c index d9be6eac99b1..7f03d1bd916e 100644 --- a/drivers/input/touchscreen/jornada720_ts.c +++ b/drivers/input/touchscreen/jornada720_ts.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c index decb34730bcf..56ab73915602 100644 --- a/drivers/pcmcia/sa1100_shannon.c +++ b/drivers/pcmcia/sa1100_shannon.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c index 54984deb8561..ccd6b29e21bf 100644 --- a/drivers/watchdog/sa1100_wdt.c +++ b/drivers/watchdog/sa1100_wdt.c @@ -54,10 +54,10 @@ static int sa1100dog_open(struct inode *inode, struct file *file) return -EBUSY; /* Activate SA1100 Watchdog timer */ - OSMR3 = OSCR + pre_margin; - OSSR = OSSR_M3; - OWER = OWER_WME; - OIER |= OIER_E3; + writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); + writel_relaxed(OSSR_M3, OSSR); + writel_relaxed(OWER_WME, OWER); + writel_relaxed(readl_relaxed(OIER) | OIER_E3, OIER); return nonseekable_open(inode, file); } @@ -80,7 +80,7 @@ static ssize_t sa1100dog_write(struct file *file, const char __user *data, { if (len) /* Refresh OSMR3 timer. */ - OSMR3 = OSCR + pre_margin; + writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); return len; } @@ -114,7 +114,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, break; case WDIOC_KEEPALIVE: - OSMR3 = OSCR + pre_margin; + writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); ret = 0; break; @@ -129,7 +129,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd, } pre_margin = oscr_freq * time; - OSMR3 = OSCR + pre_margin; + writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3); /*fall through*/ case WDIOC_GETTIMEOUT: -- cgit v1.2.3 From 98437b83ee9e8c4adc84efa7f5a02647041f675b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Jun 2012 20:23:20 +0100 Subject: ARM: Realview: use SGI0 to wake secondary CPUs Signed-off-by: Russell King --- arch/arm/plat-versatile/platsmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index 49c7db48c7f1..d7c5c171f5aa 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -85,7 +85,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * the boot monitor to read the system wide flags register, * and branch to the address found there. */ - gic_raise_softirq(cpumask_of(cpu), 1); + gic_raise_softirq(cpumask_of(cpu), 0); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { -- cgit v1.2.3 From 79d15ce999e7ae3e4b6cd71a720915e5d1c6c9c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Jun 2012 20:24:07 +0100 Subject: ARM: OMAP: use SGI0 to wake secondary CPUs Signed-off-by: Russell King --- arch/arm/mach-omap2/omap-smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index deffbf1c9627..596eb70d37be 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -111,7 +111,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) booted = true; } - gic_raise_softirq(cpumask_of(cpu), 1); + gic_raise_softirq(cpumask_of(cpu), 0); /* * Now the secondary core is starting up let it run its -- cgit v1.2.3 From ffae894035a1e0c46869eb81d53ea88ca4eaf2f0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 11 Jun 2012 20:24:44 +0100 Subject: ARM: MSM: use SGI0 to wake secondary CPUs Signed-off-by: Russell King --- arch/arm/mach-msm/platsmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c index db0117ec55f4..e012dc8391cf 100644 --- a/arch/arm/mach-msm/platsmp.c +++ b/arch/arm/mach-msm/platsmp.c @@ -127,7 +127,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * the boot monitor to read the system wide flags register, * and branch to the address found there. */ - gic_raise_softirq(cpumask_of(cpu), 1); + gic_raise_softirq(cpumask_of(cpu), 0); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { -- cgit v1.2.3 From 02df19b4227e5b799e4642e88b568f9474fa78d0 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 15 Jun 2012 10:23:32 +0100 Subject: ARM: 7424/1: update die handler from x86 Robustify ARM's die() handling with improvements from x86: - Fix for a deadlock (before panic in the case of panic_on_oops) if we oops under a spinlock which is also used from interrupt handler, since the old code was unconditionally enabling interrupts. - Usage of arch spinlock so lockdep etc doesn't get involved while we're trying to dump out oopses. - Deadlock prevention in the unlikely event that die() recurses. The changes all touch the same few lines of code, so they're done together in one patch. Signed-off-by: Rabin Vincent Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 78 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 23 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4928d89758f4..2df8715c36c0 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -233,9 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #define S_ISA " ARM" #endif -static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; @@ -245,12 +245,12 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt /* trap and error numbers are mostly meaningless on ARM */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV); if (ret == NOTIFY_STOP) - return ret; + return 1; print_modules(); __show_regs(regs); printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, @@ -259,45 +259,77 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt dump_instr(KERN_EMERG, regs); } - return ret; + return 0; } -static DEFINE_RAW_SPINLOCK(die_lock); +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; -/* - * This function is protected against re-entrancy. - */ -void die(const char *str, struct pt_regs *regs, int err) +static unsigned long oops_begin(void) { - struct thread_info *thread = current_thread_info(); - int ret; - enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + int cpu; + unsigned long flags; oops_enter(); - raw_spin_lock_irq(&die_lock); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; console_verbose(); bust_spinlocks(1); - if (!user_mode(regs)) - bug_type = report_bug(regs->ARM_pc, regs); - if (bug_type != BUG_TRAP_TYPE_NONE) - str = "Oops - BUG"; - ret = __die(str, err, thread, regs); + return flags; +} - if (regs && kexec_should_crash(thread->task)) +static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); + die_owner = -1; add_taint(TAINT_DIE); - raw_spin_unlock_irq(&die_lock); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + if (signr) + do_exit(signr); +} + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + if (!user_mode(regs)) + bug_type = report_bug(regs->ARM_pc, regs); + if (bug_type != BUG_TRAP_TYPE_NONE) + str = "Oops - BUG"; + + if (__die(str, err, regs)) + sig = 0; + + oops_end(flags, regs, sig); } void arm_notify_die(const char *str, struct pt_regs *regs, -- cgit v1.2.3 From 9ad86ddde0345e903eb74837e52cf19fe0c7f825 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 25 Jun 2012 14:59:38 +0100 Subject: ARM: 7436/1: Do not map the vectors page as write-through on UP systems The vectors page has been traditionally mapped as WT on UP systems but this creates a mismatched alias with the directly mapped RAM that is using WB attributes. On newer processors like Cortex-A15 this has implications on the data/instructions coherency at the point of unification (usually L2). This patch removes such restriction. Signed-off-by: Catalin Marinas Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5dad60b558b..f37dc1856a69 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -421,12 +421,6 @@ static void __init build_mem_type_table(void) cp = &cache_policies[cachepolicy]; vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; - /* - * Only use write-through for non-SMP systems - */ - if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) - vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; - /* * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) -- cgit v1.2.3 From d0f34a11ddab9b456e4caf9fc48d8d7e832e0e50 Mon Sep 17 00:00:00 2001 From: Genoud Richard Date: Tue, 26 Jun 2012 16:37:59 +0100 Subject: ARM: 7437/1: zImage: Allow DTB command line concatenation with ATAG_CMDLINE This patch allows the ATAG_CMDLINE provided by the bootloader to be concatenated to the bootargs property of the device tree. This is useful to merge static values defined in the device tree with the boot loader's (possibly) more dynamic values, such as startup reasons and more. The bootloader should use the device tree to pass those values to the kernel, but that's not always simple (old bootloader or very small one). The behaviour is the same as the one introduced by Victor Boivie in 4394c1244249198c6b85093d46935b761b36ae05 by extending the CONFIG_CMDLINE. Signed-off-by: Richard Genoud Tested-by: Gregory CLEMENT Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/Kconfig | 19 ++++++++++ arch/arm/boot/compressed/atags_to_fdt.c | 62 +++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b649c5904a4f..8c9d264f2108 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1960,6 +1960,25 @@ config ARM_ATAG_DTB_COMPAT bootloaders, this option allows zImage to extract the information from the ATAG list and store it at run time into the appended DTB. +choice + prompt "Kernel command line type" if ARM_ATAG_DTB_COMPAT + default ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER + +config ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader instead of + the device tree bootargs property. If the boot loader doesn't provide + any, the device tree bootargs property will be used. + +config ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND + bool "Extend with bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the the device tree bootargs property. + +endchoice + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 797f04bedb47..aabc02a68482 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -1,6 +1,12 @@ #include #include +#if defined(CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND) +#define do_extend_cmdline 1 +#else +#define do_extend_cmdline 0 +#endif + static int node_offset(void *fdt, const char *node_path) { int offset = fdt_path_offset(fdt, node_path); @@ -36,6 +42,48 @@ static int setprop_cell(void *fdt, const char *node_path, return fdt_setprop_cell(fdt, offset, property, val); } +static const void *getprop(const void *fdt, const char *node_path, + const char *property, int *len) +{ + int offset = fdt_path_offset(fdt, node_path); + + if (offset == -FDT_ERR_NOTFOUND) + return NULL; + + return fdt_getprop(fdt, offset, property, len); +} + +static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline) +{ + char cmdline[COMMAND_LINE_SIZE]; + const char *fdt_bootargs; + char *ptr = cmdline; + int len = 0; + + /* copy the fdt command line into the buffer */ + fdt_bootargs = getprop(fdt, "/chosen", "bootargs", &len); + if (fdt_bootargs) + if (len < COMMAND_LINE_SIZE) { + memcpy(ptr, fdt_bootargs, len); + /* len is the length of the string + * including the NULL terminator */ + ptr += len - 1; + } + + /* and append the ATAG_CMDLINE */ + if (fdt_cmdline) { + len = strlen(fdt_cmdline); + if (ptr - cmdline + len + 2 < COMMAND_LINE_SIZE) { + *ptr++ = ' '; + memcpy(ptr, fdt_cmdline, len); + ptr += len; + } + } + *ptr = '\0'; + + setprop_string(fdt, "/chosen", "bootargs", cmdline); +} + /* * Convert and fold provided ATAGs into the provided FDT. * @@ -72,8 +120,18 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space) for_each_tag(atag, atag_list) { if (atag->hdr.tag == ATAG_CMDLINE) { - setprop_string(fdt, "/chosen", "bootargs", - atag->u.cmdline.cmdline); + /* Append the ATAGS command line to the device tree + * command line. + * NB: This means that if the same parameter is set in + * the device tree and in the tags, the one from the + * tags will be chosen. + */ + if (do_extend_cmdline) + merge_fdt_bootargs(fdt, + atag->u.cmdline.cmdline); + else + setprop_string(fdt, "/chosen", "bootargs", + atag->u.cmdline.cmdline); } else if (atag->hdr.tag == ATAG_MEM) { if (memcount >= sizeof(mem_reg_property)/4) continue; -- cgit v1.2.3 From 9fa16b7755967b1b4dc704de71a3d639d13e21fc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 4 Jul 2012 04:58:12 +0100 Subject: ARM: 7439/1: head.S: simplify initial page table mapping Let's map the initial RAM up to the end of the kernel .bss instead of the strict kernel image area. This simplifies the code as the kernel image only needs to be handled specially in the XIP case. That covers the legacy ATAG location as well. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/head.S | 59 ++++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 36 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 835898e7d704..3db960e20cb8 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -55,14 +55,6 @@ add \rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE .endm -#ifdef CONFIG_XIP_KERNEL -#define KERNEL_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) -#define KERNEL_END _edata_loc -#else -#define KERNEL_START KERNEL_RAM_VADDR -#define KERNEL_END _end -#endif - /* * Kernel startup entry point. * --------------------------- @@ -218,51 +210,46 @@ __create_page_tables: blo 1b /* - * Now setup the pagetables for our kernel direct - * mapped region. + * Map our RAM from the start to the end of the kernel .bss section. */ - mov r3, pc - mov r3, r3, lsr #SECTION_SHIFT - orr r3, r7, r3, lsl #SECTION_SHIFT - add r0, r4, #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! - ldr r6, =(KERNEL_END - 1) - add r0, r0, #1 << PMD_ORDER + add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER) + ldr r6, =(_end - 1) + orr r3, r8, r7 add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) -1: cmp r0, r6 +1: str r3, [r0], #1 << PMD_ORDER add r3, r3, #1 << SECTION_SHIFT - strls r3, [r0], #1 << PMD_ORDER + cmp r0, r6 bls 1b #ifdef CONFIG_XIP_KERNEL /* - * Map some ram to cover our .data and .bss areas. + * Map the kernel image separately as it is not located in RAM. */ - add r3, r8, #TEXT_OFFSET - orr r3, r3, r7 - add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]! - ldr r6, =(_end - 1) - add r0, r0, #4 +#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + mov r3, pc + mov r3, r3, lsr #SECTION_SHIFT + orr r3, r7, r3, lsl #SECTION_SHIFT + add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) + str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! + ldr r6, =(_edata_loc - 1) + add r0, r0, #1 << PMD_ORDER add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: cmp r0, r6 - add r3, r3, #1 << 20 - strls r3, [r0], #4 + add r3, r3, #1 << SECTION_SHIFT + strls r3, [r0], #1 << PMD_ORDER bls 1b #endif /* - * Then map boot params address in r2 or the first 1MB (2MB with LPAE) - * of ram if boot params address is not specified. + * Then map boot params address in r2 if specified. */ mov r0, r2, lsr #SECTION_SHIFT movs r0, r0, lsl #SECTION_SHIFT - moveq r0, r8 - sub r3, r0, r8 - add r3, r3, #PAGE_OFFSET - add r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) - orr r6, r7, r0 - str r6, [r3] + subne r3, r0, r8 + addne r3, r3, #PAGE_OFFSET + addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + orrne r6, r7, r0 + strne r6, [r3] #ifdef CONFIG_DEBUG_LL #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) -- cgit v1.2.3 From a5203c4ce6750730b1d95a8bc1e8214765450f7e Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 8 Jul 2012 22:51:44 +0100 Subject: ARM: 7460/1: remove asm/locks.h Commit 64ac24e738823161693bf791f87adc802cf529ff ("Generic semaphore implementation") removed the last include of this header. Apparently it was just an oversight to keep this header. It can safely be removed now. Acked-by: Will Deacon Signed-off-by: Paul Bolle Signed-off-by: Russell King --- arch/arm/include/asm/locks.h | 274 ------------------------------------------- 1 file changed, 274 deletions(-) delete mode 100644 arch/arm/include/asm/locks.h (limited to 'arch/arm') diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h deleted file mode 100644 index ef4c897772d1..000000000000 --- a/arch/arm/include/asm/locks.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * arch/arm/include/asm/locks.h - * - * Copyright (C) 2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Interrupt safe locking assembler. - */ -#ifndef __ASM_PROC_LOCKS_H -#define __ASM_PROC_LOCKS_H - -#if __LINUX_ARM_ARCH__ >= 6 - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -"1: ldrex lr, [%1]\n" \ -" sub lr, lr, %2\n" \ -" strex ip, lr, [%1]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" cmp lr, #0\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" sub lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -"1: ldrex lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -"1: ldrex lr, [%0]\n" \ -" add lr, lr, %1\n" \ -" strex ip, lr, [%0]\n" \ -" teq ip, #0\n" \ -" bne 1b\n" \ -" teq lr, #0\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#else - -#define __down_op(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %0\n" \ -" blmi " #fail \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_ret(ptr,fail) \ - ({ \ - unsigned int ret; \ - __asm__ __volatile__( \ - "@ down_op_ret\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%1]\n" \ -" subs lr, lr, %2\n" \ -" str lr, [%1]\n" \ -" msr cpsr_c, ip\n" \ -" movmi ip, %1\n" \ -" movpl ip, #0\n" \ -" blmi " #fail "\n" \ -" mov %0, ip" \ - : "=&r" (ret) \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - ret; \ - }) - -#define __up_op(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movle ip, %0\n" \ -" blle " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -/* - * The value 0x01000000 supports up to 128 processors and - * lots of processes. BIAS must be chosen such that sub'ing - * BIAS once per CPU will result in the long remaining - * negative. - */ -#define RW_LOCK_BIAS 0x01000000 -#define RW_LOCK_BIAS_STR "0x01000000" - -#define __down_op_write(ptr,fail) \ - ({ \ - __asm__ __volatile__( \ - "@ down_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" subs lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movne ip, %0\n" \ -" blne " #fail \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __up_op_write(ptr,wake) \ - ({ \ - __asm__ __volatile__( \ - "@ up_op_write\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" movcs ip, %0\n" \ -" blcs " #wake \ - : \ - : "r" (ptr), "I" (RW_LOCK_BIAS) \ - : "ip", "lr", "cc"); \ - smp_mb(); \ - }) - -#define __down_op_read(ptr,fail) \ - __down_op(ptr, fail) - -#define __up_op_read(ptr,wake) \ - ({ \ - smp_mb(); \ - __asm__ __volatile__( \ - "@ up_op_read\n" \ -" mrs ip, cpsr\n" \ -" orr lr, ip, #128\n" \ -" msr cpsr_c, lr\n" \ -" ldr lr, [%0]\n" \ -" adds lr, lr, %1\n" \ -" str lr, [%0]\n" \ -" msr cpsr_c, ip\n" \ -" moveq ip, %0\n" \ -" bleq " #wake \ - : \ - : "r" (ptr), "I" (1) \ - : "ip", "lr", "cc"); \ - }) - -#endif - -#endif -- cgit v1.2.3 From 27a5569dc66ecce06cb532542ddcd0b6da8783f6 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 6 Jul 2012 11:06:49 +0100 Subject: ARM: 7444/1: kernel: add arch-timer C3STOP feature When a CPU is shutdown its architected timer comparators registers are lost. Within CPU idle, before processors enter shutdown they enter clock events broadcast mode through the clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, cpuid); function where the local timers are emulated by a global always-on timer. On CPU resume, the per-CPU tick device normal mode is restored by exiting broadcast mode through clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, cpuid); In order for this mechanism to function, architected timers should add to their feature C3STOP, which means that they are not able to function when the CPU is in off-mode. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier Signed-off-by: Russell King --- arch/arm/kernel/arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index dd58035621f7..df44c8cf9e2e 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -137,7 +137,7 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk) /* Be safe... */ arch_timer_disable(); - clk->features = CLOCK_EVT_FEAT_ONESHOT; + clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; clk->rating = 450; clk->set_mode = arch_timer_set_mode; -- cgit v1.2.3 From 575320d625d5b5eb115575a1f5e17af456e69577 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:43:03 +0100 Subject: ARM: 7445/1: mm: update CONTEXTIDR register to contain PID of current process This patch introduces a new Kconfig option which, when enabled, causes the kernel to write the PID of the current task into the PROCID field of the CONTEXTIDR on context switch. This is useful when analysing hardware trace, since writes to this register can be configured to emit an event into the trace stream. The thread notifier for writing the PID is deliberately kept separate from the ASID-writing code so that we can support newer processors using LPAE, where the ASID is stored in TTBR0. As such, the switch_mm code is updated to perform a read-modify-write sequence to ensure that we don't clobber the PID on CPUs using the classic 2-level page tables. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 9 +++++++++ arch/arm/mm/context.c | 35 +++++++++++++++++++++++++++++++++++ arch/arm/mm/proc-v6.S | 6 ++++++ arch/arm/mm/proc-v7-2level.S | 5 +++++ 4 files changed, 55 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 01a134141216..521e15bc6401 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -369,4 +369,13 @@ config ARM_KPROBES_TEST help Perform tests of kprobes API and instruction set simulation. +config PID_IN_CONTEXTIDR + bool "Write the current PID to the CONTEXTIDR register" + depends on CPU_COPY_V6 + help + Enabling this option causes the kernel to write the current PID to + the PROCID field of the CONTEXTIDR register, at the expense of some + additional instructions during context switch. Say Y here only if you + are planning to use hardware trace tools with this kernel. + endmenu diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 806cc4f63516..119bc52ab93e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -14,6 +14,7 @@ #include #include +#include #include static DEFINE_RAW_SPINLOCK(cpu_asid_lock); @@ -48,6 +49,40 @@ void cpu_set_reserved_ttbr0(void) } #endif +#ifdef CONFIG_PID_IN_CONTEXTIDR +static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, + void *t) +{ + u32 contextidr; + pid_t pid; + struct thread_info *thread = t; + + if (cmd != THREAD_NOTIFY_SWITCH) + return NOTIFY_DONE; + + pid = task_pid_nr(thread->task) << ASID_BITS; + asm volatile( + " mrc p15, 0, %0, c13, c0, 1\n" + " bfi %1, %0, #0, %2\n" + " mcr p15, 0, %1, c13, c0, 1\n" + : "=r" (contextidr), "+r" (pid) + : "I" (ASID_BITS)); + isb(); + + return NOTIFY_OK; +} + +static struct notifier_block contextidr_notifier_block = { + .notifier_call = contextidr_notifier, +}; + +static int __init contextidr_notifier_init(void) +{ + return thread_register_notifier(&contextidr_notifier_block); +} +arch_initcall(contextidr_notifier_init); +#endif + /* * We fork()ed a process, and we need a new context for the child * to run in. diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 5900cd520e84..86b8b480634f 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -107,6 +107,12 @@ ENTRY(cpu_v6_switch_mm) mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB mcr p15, 0, r2, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + bic r2, r2, #0xff @ extract the PID + and r1, r1, #0xff + orr r1, r1, r2 @ insert into new context ID +#endif mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif mov pc, lr diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 42ac069c8012..fd045e706390 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -46,6 +46,11 @@ ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + lsr r2, r2, #8 @ extract the PID + bfi r1, r2, #8, #24 @ insert into new context ID +#endif #ifdef CONFIG_ARM_ERRATA_754322 dsb #endif -- cgit v1.2.3 From 546c2896a42202dbc7d02f7c6ec9948ac1bf511b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:43:41 +0100 Subject: ARM: 7446/1: spinlock: use ticket algorithm for ARMv6+ locking implementation Ticket spinlocks ensure locking fairness by introducing a FIFO-like nature to the granting of lock acquisitions and also reducing the thundering herd effect when spinning on a lock by allowing the cacheline to remain in a shared state amongst the waiting CPUs. This is especially important on systems where memory-access times are not necessarily uniform when accessing the lock structure (for example, on a multi-cluster platform where the lock is allocated into L1 when a CPU releases it). This patch implements the ticket spinlock algorithm for ARM, replacing the simpler implementation for ARMv6+ processors. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 72 +++++++++++++++++++++++------------ arch/arm/include/asm/spinlock_types.h | 17 ++++++++- 2 files changed, 63 insertions(+), 26 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 65fa3c88095c..0da2effd4b37 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -59,18 +59,13 @@ static inline void dsb_sev(void) } /* - * ARMv6 Spin-locking. + * ARMv6 ticket-based spin-locking. * - * We exclusively read the old value. If it is zero, we may have - * won the lock, so we try exclusively storing it. A memory barrier - * is required after we get a lock, and before we release it, because - * V6 CPUs are assumed to have weakly ordered memory. - * - * Unlocked value: 0 - * Locked value: 1 + * A memory barrier is required after we get a lock, and before we + * release it, because V6 CPUs are assumed to have weakly ordered + * memory. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -79,31 +74,39 @@ static inline void dsb_sev(void) static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; + u32 newval; + arch_spinlock_t lockval; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" -" teq %0, #0\n" - WFE("ne") -" strexeq %0, %2, [%1]\n" -" teqeq %0, #0\n" +"1: ldrex %0, [%3]\n" +" add %1, %0, %4\n" +" strex %2, %1, [%3]\n" +" teq %2, #0\n" " bne 1b" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner); + } + smp_mb(); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp; + u32 slock; __asm__ __volatile__( -" ldrex %0, [%1]\n" -" teq %0, #0\n" -" strexeq %0, %2, [%1]" - : "=&r" (tmp) - : "r" (&lock->lock), "r" (1) +" ldrex %0, [%2]\n" +" subs %1, %0, %0, ror #16\n" +" addeq %0, %0, %3\n" +" strexeq %1, %0, [%2]" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); if (tmp == 0) { @@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { + unsigned long tmp; + u32 slock; + smp_mb(); __asm__ __volatile__( -" str %1, [%0]\n" - : - : "r" (&lock->lock), "r" (0) +" mov %1, #1\n" +"1: ldrex %0, [%2]\n" +" uadd16 %0, %0, %1\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (slock), "=&r" (tmp) + : "r" (&lock->slock) : "cc"); dsb_sev(); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return tickets.owner != tickets.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + return (tickets.next - tickets.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * RWLOCKS * diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h index d14d197ae04a..b262d2f8b478 100644 --- a/arch/arm/include/asm/spinlock_types.h +++ b/arch/arm/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#define TICKET_SHIFT 16 + typedef struct { - volatile unsigned int lock; + union { + u32 slock; + struct __raw_tickets { +#ifdef __ARMEB__ + u16 next; + u16 owner; +#else + u16 owner; + u16 next; +#endif + } tickets; + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { volatile unsigned int lock; -- cgit v1.2.3 From 881ccccb6bd3f0e1fff8b9addbe0de90e0b16166 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:44:13 +0100 Subject: ARM: 7447/1: rwlocks: remove unused branch labels from trylock routines The ARM arch_{read,write}_trylock implementations include unused backwards branch labels, since we don't retry the locking operation if the exclusive store fails. This patch removes the labels. Acked-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 0da2effd4b37..b4ca707d0a69 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -182,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) unsigned long tmp; __asm__ __volatile__( -"1: ldrex %0, [%1]\n" +" ldrex %0, [%1]\n" " teq %0, #0\n" " strexeq %0, %2, [%1]" : "=&r" (tmp) @@ -268,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) unsigned long tmp, tmp2 = 1; __asm__ __volatile__( -"1: ldrex %0, [%2]\n" +" ldrex %0, [%2]\n" " adds %0, %0, #1\n" " strexpl %1, %0, [%2]\n" : "=&r" (tmp), "+r" (tmp2) -- cgit v1.2.3 From 4295b898f5a5c7e62ae68e7a4ecc4b414622ffe6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:00 +0100 Subject: ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration In order to provide PMU name strings compatible with the OProfile user ABI, an enumeration of all PMUs is currently used by perf to identify each PMU uniquely. Unfortunately, this does not scale well in the presence of multiple PMUs and creates a single, global namespace across all PMUs in the system. This patch removes the enumeration and instead uses the name string for the PMU to map onto the OProfile variant. perf_pmu_name is implemented for CPU PMUs, which is all that OProfile cares about anyway. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/perf_event.h | 17 +------------- arch/arm/include/asm/pmu.h | 3 +-- arch/arm/kernel/perf_event.c | 15 +++++-------- arch/arm/kernel/perf_event_v6.c | 2 -- arch/arm/kernel/perf_event_v7.c | 5 ----- arch/arm/kernel/perf_event_xscale.c | 2 -- arch/arm/oprofile/common.c | 45 +++++++++++++++++++++++-------------- 7 files changed, 36 insertions(+), 53 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 00cbe10a50e3..e074948d8143 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,21 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* ARM perf PMU IDs for use by internal perf clients. */ -enum arm_perf_pmu_ids { - ARM_PERF_PMU_ID_XSCALE1 = 0, - ARM_PERF_PMU_ID_XSCALE2, - ARM_PERF_PMU_ID_V6, - ARM_PERF_PMU_ID_V6MP, - ARM_PERF_PMU_ID_CA8, - ARM_PERF_PMU_ID_CA9, - ARM_PERF_PMU_ID_CA5, - ARM_PERF_PMU_ID_CA15, - ARM_PERF_PMU_ID_CA7, - ARM_NUM_PMU_IDS, -}; - -extern enum arm_perf_pmu_ids -armpmu_get_pmu_id(void); +/* Nothing to see here... */ #endif /* __ARM_PERF_EVENT_H__ */ diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 90114faa9f3c..4432305f4a2a 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -103,10 +103,9 @@ struct pmu_hw_events { struct arm_pmu { struct pmu pmu; - enum arm_perf_pmu_ids id; enum arm_pmu_type type; cpumask_t active_irqs; - const char *name; + char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 186c8cb982c5..df85eda3add3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -47,17 +47,14 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu; -enum arm_perf_pmu_ids -armpmu_get_pmu_id(void) +const char *perf_pmu_name(void) { - int id = -ENODEV; - - if (cpu_pmu != NULL) - id = cpu_pmu->id; + if (!cpu_pmu) + return NULL; - return id; + return cpu_pmu->pmu.name; } -EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); +EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { @@ -760,7 +757,7 @@ init_hw_perf_events(void) cpu_pmu->name, cpu_pmu->num_events); cpu_pmu_init(cpu_pmu); register_cpu_notifier(&pmu_cpu_notifier); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW); } else { pr_info("no hardware support available\n"); } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index ab627a740fa3..c90fcb2b6967 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -650,7 +650,6 @@ static int armv6_map_event(struct perf_event *event) } static struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, @@ -685,7 +684,6 @@ static int armv6mpcore_map_event(struct perf_event *event) } static struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index d3c536068162..f04070bd2183 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1258,7 +1258,6 @@ static u32 __init armv7_read_num_pmnc_events(void) static struct arm_pmu *__init armv7_a8_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.map_event = armv7_a8_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1267,7 +1266,6 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void) static struct arm_pmu *__init armv7_a9_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.map_event = armv7_a9_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1276,7 +1274,6 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void) static struct arm_pmu *__init armv7_a5_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA5; armv7pmu.name = "ARMv7 Cortex-A5"; armv7pmu.map_event = armv7_a5_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1285,7 +1282,6 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void) static struct arm_pmu *__init armv7_a15_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA15; armv7pmu.name = "ARMv7 Cortex-A15"; armv7pmu.map_event = armv7_a15_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1295,7 +1291,6 @@ static struct arm_pmu *__init armv7_a15_pmu_init(void) static struct arm_pmu *__init armv7_a7_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA7; armv7pmu.name = "ARMv7 Cortex-A7"; armv7pmu.map_event = armv7_a7_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index e34e7254e652..f759fe0bab63 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -435,7 +435,6 @@ static int xscale_map_event(struct perf_event *event) } static struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, @@ -803,7 +802,6 @@ xscale2pmu_write_counter(int counter, u32 val) } static struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 4e0a371630b3..99c63d4b6af8 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -23,26 +23,37 @@ #include #ifdef CONFIG_HW_PERF_EVENTS + +/* + * OProfile has a curious naming scheme for the ARM PMUs, but they are + * part of the user ABI so we need to map from the perf PMU name for + * supported PMUs. + */ +static struct op_perf_name { + char *perf_name; + char *op_name; +} op_perf_name_map[] = { + { "xscale1", "arm/xscale1" }, + { "xscale1", "arm/xscale2" }, + { "v6", "arm/armv6" }, + { "v6mpcore", "arm/mpcore" }, + { "ARMv7 Cortex-A8", "arm/armv7" }, + { "ARMv7 Cortex-A9", "arm/armv7-ca9" }, +}; + char *op_name_from_perf_id(void) { - enum arm_perf_pmu_ids id = armpmu_get_pmu_id(); - - switch (id) { - case ARM_PERF_PMU_ID_XSCALE1: - return "arm/xscale1"; - case ARM_PERF_PMU_ID_XSCALE2: - return "arm/xscale2"; - case ARM_PERF_PMU_ID_V6: - return "arm/armv6"; - case ARM_PERF_PMU_ID_V6MP: - return "arm/mpcore"; - case ARM_PERF_PMU_ID_CA8: - return "arm/armv7"; - case ARM_PERF_PMU_ID_CA9: - return "arm/armv7-ca9"; - default: - return NULL; + int i; + struct op_perf_name names; + const char *perf_name = perf_pmu_name(); + + for (i = 0; i < ARRAY_SIZE(op_perf_name_map); ++i) { + names = op_perf_name_map[i]; + if (!strcmp(names.perf_name, perf_name)) + return names.op_name; } + + return NULL; } #endif -- cgit v1.2.3 From 8c56cc8be5b38e3684eba96dc9b3f7ca7e495755 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:45:39 +0100 Subject: ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions This patch implements the word-at-a-time interface for ARM using the same algorithm as x86. We use the fls macro from ARMv5 onwards, where we have a clz instruction available which saves us a mov instruction when targetting Thumb-2. For older CPUs, we use the magic 0x0ff0001 constant. Big-endian configurations make use of the implementation from asm-generic. With this implemented, we can replace our byte-at-a-time strnlen_user and strncpy_from_user functions with the optimised generic versions. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 2 ++ arch/arm/include/asm/uaccess.h | 27 ++++------------- arch/arm/include/asm/word-at-a-time.h | 55 +++++++++++++++++++++++++++++++++++ arch/arm/kernel/armksyms.c | 4 --- arch/arm/lib/Makefile | 1 - arch/arm/lib/strncpy_from_user.S | 43 --------------------------- arch/arm/lib/strnlen_user.S | 40 ------------------------- 7 files changed, 63 insertions(+), 109 deletions(-) create mode 100644 arch/arm/include/asm/word-at-a-time.h delete mode 100644 arch/arm/lib/strncpy_from_user.S delete mode 100644 arch/arm/lib/strnlen_user.S (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8c9d264f2108..574561a66d75 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -46,6 +46,8 @@ config ARM select GENERIC_SMP_IDLE_THREAD select KTIME_SCALAR select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 71f6536d17ac..479a6352e0b5 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) +#define user_addr_max() \ + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + /* * The "__xxx" versions of the user access functions do not verify the * address space - it must have been done previously with a separate @@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l #define __clear_user(addr,n) (memset((void __force *)addr, 0, n), 0) #endif -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} - -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) +extern long strncpy_from_user(char *dest, const char __user *src, long count); -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* _ASMARM_UACCESS_H */ diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..74b2d4578577 --- /dev/null +++ b/arch/arm/include/asm/word-at-a-time.h @@ -0,0 +1,55 @@ +#ifndef __ASM_ARM_WORD_AT_A_TIME_H +#define __ASM_ARM_WORD_AT_A_TIME_H + +#ifndef __ARMEB__ + +/* + * Little-endian word-at-a-time zero byte handling. + * Heavily based on the x86 algorithm. + */ +#include + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + unsigned long ret; + +#if __LINUX_ARM_ARCH__ >= 5 + /* We have clz available. */ + ret = fls(mask) >> 3; +#else + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + ret = (0x0ff0001 + mask) >> 23; + /* Fix the 1 for 00 case */ + ret &= mask; +#endif + + return ret; +} + +#else /* __ARMEB__ */ +#include +#endif + +#endif /* __ASM_ARM_WORD_AT_A_TIME_H */ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index b57c75e0b01f..c3dff6abc89d 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -87,10 +87,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 992769ae2599..d5060dab6e52 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -8,7 +8,6 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ delay.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ - strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ testchangebit.o testclearbit.o testsetbit.o \ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S deleted file mode 100644 index f202d7bd1647..000000000000 --- a/arch/arm/lib/strncpy_from_user.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - * linux/arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * r0 = dst, r1 = src, r2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov ip, r1 -1: subs r2, r2, #1 - ldrusr r3, r1, 1, pl - bmi 2f - strb r3, [r0], #1 - teq r3, #0 - bne 1b - sub r1, r1, #1 @ take NUL character out of count -2: sub r0, r1, ip - mov pc, lr -ENDPROC(__strncpy_from_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r3, #0 - strb r3, [r0, #0] @ null terminate - mov r0, #-EFAULT - mov pc, lr - .popsection - diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S deleted file mode 100644 index 0ecbb459c4f1..000000000000 --- a/arch/arm/lib/strnlen_user.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - * linux/arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n + 1 if too long - */ -ENTRY(__strnlen_user) - mov r2, r0 -1: - ldrusr r3, r0, 1 - teq r3, #0 - beq 2f - subs r1, r1, #1 - bne 1b - add r0, r0, #1 -2: sub r0, r0, r2 - mov pc, lr -ENDPROC(__strnlen_user) - - .pushsection .fixup,"ax" - .align 0 -9001: mov r0, #0 - mov pc, lr - .popsection -- cgit v1.2.3 From b9a50f74905ad9126c91b495ece8a5f45434c643 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:08 +0100 Subject: ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for little-endian ARMv6+ CPUs DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad for ARM CPUs with native support for unaligned memory accesses (v6+) when running little-endian. Reviewed-by: Nicolas Pitre Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/word-at-a-time.h | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 574561a66d75..acd12efe6f3e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h index 74b2d4578577..4d52f92967a6 100644 --- a/arch/arm/include/asm/word-at-a-time.h +++ b/arch/arm/include/asm/word-at-a-time.h @@ -48,6 +48,47 @@ static inline unsigned long find_zero(unsigned long mask) return ret; } +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#define zero_bytemask(mask) (mask) + +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, [%2]\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x3\n" + " bic %2, %2, #0x3\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" + " lsr %0, %0, %1\n" + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .long 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Qo" (*(unsigned long *)addr)); + + return ret; +} + + +#endif /* DCACHE_WORD_ACCESS */ + #else /* __ARMEB__ */ #include #endif -- cgit v1.2.3 From 923df96b9f31b7d08d8438ff9677326d9537accf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:46:45 +0100 Subject: ARM: 7451/1: arch timer: implement read_current_timer and get_cycles This patch implements read_current_timer using the architected timers when they are selected via CONFIG_ARM_ARCH_TIMER. If they are detected not to be usable at runtime, we return -ENXIO to the caller. Furthermore, if read_current_timer is exported then we can implement get_cycles in terms of it for use as both an entropy source and for implementing __udelay and friends. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/arch_timer.h | 3 +++ arch/arm/include/asm/timex.h | 10 ++++++---- arch/arm/kernel/arch_timer.c | 8 ++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index ed2e95d46e29..62e75475e57e 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -1,7 +1,10 @@ #ifndef __ASMARM_ARCH_TIMER_H #define __ASMARM_ARCH_TIMER_H +#include + #ifdef CONFIG_ARM_ARCH_TIMER +#define ARCH_HAS_READ_CURRENT_TIMER int arch_timer_of_register(void); int arch_timer_sched_clock_init(void); #else diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 3be8de3adaba..ce119442277c 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -12,13 +12,15 @@ #ifndef _ASMARM_TIMEX_H #define _ASMARM_TIMEX_H +#include #include typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) -{ - return 0; -} +#ifdef ARCH_HAS_READ_CURRENT_TIMER +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#else +#define get_cycles() (0) +#endif #endif diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index dd58035621f7..dbbeec4f06e2 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -223,6 +223,14 @@ static cycle_t arch_counter_read(struct clocksource *cs) return arch_counter_get_cntpct(); } +int read_current_timer(unsigned long *timer_val) +{ + if (!arch_timer_rate) + return -ENXIO; + *timer_val = arch_counter_get_cntpct(); + return 0; +} + static struct clocksource clocksource_counter = { .name = "arch_sys_counter", .rating = 400, -- cgit v1.2.3 From d0a533b18235d36206b9b422efadb7cee444dfdb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:47:17 +0100 Subject: ARM: 7452/1: delay: allow timer-based delay implementation to be selected This patch allows a timer-based delay implementation to be selected by switching the delay routines over to use get_cycles, which is implemented in terms of read_current_timer. This further allows us to skip the loop calibration and have a consistent delay function in the face of core frequency scaling. To avoid the pain of dealing with memory-mapped counters, this implementation uses the co-processor interface to the architected timers when they are available. The previous loop-based implementation is kept around for CPUs without the architected timers and we retain both the maximum delay (2ms) and the corresponding conversion factors for determining the number of loops required for a given interval. Since the indirection of the timer routines will only work when called from C, the sa1100 sleep routines are modified to branch to the loop-based delay functions directly. Tested-by: Shinya Kuribayashi Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/delay.h | 32 +++++++++++++++----- arch/arm/kernel/arch_timer.c | 3 ++ arch/arm/kernel/armksyms.c | 3 +- arch/arm/lib/Makefile | 2 +- arch/arm/lib/delay-loop.S | 67 +++++++++++++++++++++++++++++++++++++++++ arch/arm/lib/delay.S | 69 ------------------------------------------ arch/arm/lib/delay.c | 71 ++++++++++++++++++++++++++++++++++++++++++++ arch/arm/mach-sa1100/sleep.S | 8 ++--- 8 files changed, 172 insertions(+), 83 deletions(-) create mode 100644 arch/arm/lib/delay-loop.S delete mode 100644 arch/arm/lib/delay.S create mode 100644 arch/arm/lib/delay.c (limited to 'arch/arm') diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index b2deda181549..dc6145120de3 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -6,9 +6,22 @@ #ifndef __ASM_ARM_DELAY_H #define __ASM_ARM_DELAY_H +#include #include /* HZ */ -extern void __delay(int loops); +#define MAX_UDELAY_MS 2 +#define UDELAY_MULT ((UL(2199023) * HZ) >> 11) +#define UDELAY_SHIFT 30 + +#ifndef __ASSEMBLY__ + +extern struct arm_delay_ops { + void (*delay)(unsigned long); + void (*const_udelay)(unsigned long); + void (*udelay)(unsigned long); +} arm_delay_ops; + +#define __delay(n) arm_delay_ops.delay(n) /* * This function intentionally does not exist; if you see references to @@ -23,22 +36,27 @@ extern void __bad_udelay(void); * division by multiplication: you don't have to worry about * loss of precision. * - * Use only for very small delays ( < 1 msec). Should probably use a + * Use only for very small delays ( < 2 msec). Should probably use a * lookup table, really, as the multiplications take much too long with * short delays. This is a "reasonable" implementation, though (and the * first constant multiplications gets optimized away if the delay is * a constant) */ -extern void __udelay(unsigned long usecs); -extern void __const_udelay(unsigned long); - -#define MAX_UDELAY_MS 2 +#define __udelay(n) arm_delay_ops.udelay(n) +#define __const_udelay(n) arm_delay_ops.const_udelay(n) #define udelay(n) \ (__builtin_constant_p(n) ? \ ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \ - __const_udelay((n) * ((2199023U*HZ)>>11))) : \ + __const_udelay((n) * UDELAY_MULT)) : \ __udelay(n)) +/* Loop-based definitions for assembly code. */ +extern void __loop_delay(unsigned long loops); +extern void __loop_udelay(unsigned long usecs); +extern void __loop_const_udelay(unsigned long); + +#endif /* __ASSEMBLY__ */ + #endif /* defined(_ARM_DELAY_H) */ diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index dbbeec4f06e2..675cee09c014 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -32,6 +32,8 @@ static int arch_timer_ppi2; static struct clock_event_device __percpu **arch_timer_evt; +extern void init_current_timer_delay(unsigned long freq); + /* * Architected system timer support. */ @@ -304,6 +306,7 @@ static int __init arch_timer_register(void) if (err) goto out_free_irq; + init_current_timer_delay(arch_timer_rate); return 0; out_free_irq: diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index b57c75e0b01f..71962284d288 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -49,8 +49,7 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); /* platform dependent support */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(arm_delay_ops); /* networking */ EXPORT_SYMBOL(csum_partial); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 992769ae2599..b621114644fd 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -6,7 +6,7 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ - delay.o findbit.o memchr.o memcpy.o \ + delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ strncpy_from_user.o strnlen_user.o \ strchr.o strrchr.o \ diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S new file mode 100644 index 000000000000..36b668d8e121 --- /dev/null +++ b/arch/arm/lib/delay-loop.S @@ -0,0 +1,67 @@ +/* + * linux/arch/arm/lib/delay.S + * + * Copyright (C) 1995, 1996 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + .text + +.LC0: .word loops_per_jiffy +.LC1: .word UDELAY_MULT + +/* + * r0 <= 2000 + * lpj <= 0x01ffffff (max. 3355 bogomips) + * HZ <= 1000 + */ + +ENTRY(__loop_udelay) + ldr r2, .LC1 + mul r0, r2, r0 +ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 + mov r1, #-1 + ldr r2, .LC0 + ldr r2, [r2] @ max = 0x01ffffff + add r0, r0, r1, lsr #32-14 + mov r0, r0, lsr #14 @ max = 0x0001ffff + add r2, r2, r1, lsr #32-10 + mov r2, r2, lsr #10 @ max = 0x00007fff + mul r0, r2, r0 @ max = 2^32-1 + add r0, r0, r1, lsr #32-6 + movs r0, r0, lsr #6 + moveq pc, lr + +/* + * loops = r0 * HZ * loops_per_jiffy / 1000000 + */ + +@ Delay routine +ENTRY(__loop_delay) + subs r0, r0, #1 +#if 0 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 + movls pc, lr + subs r0, r0, #1 +#endif + bhi __loop_delay + mov pc, lr +ENDPROC(__loop_udelay) +ENDPROC(__loop_const_udelay) +ENDPROC(__loop_delay) diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S deleted file mode 100644 index 3c9a05c8d20b..000000000000 --- a/arch/arm/lib/delay.S +++ /dev/null @@ -1,69 +0,0 @@ -/* - * linux/arch/arm/lib/delay.S - * - * Copyright (C) 1995, 1996 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include - .text - -.LC0: .word loops_per_jiffy -.LC1: .word (2199023*HZ)>>11 - -/* - * r0 <= 2000 - * lpj <= 0x01ffffff (max. 3355 bogomips) - * HZ <= 1000 - */ - -ENTRY(__udelay) - ldr r2, .LC1 - mul r0, r2, r0 -ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 - mov r1, #-1 - ldr r2, .LC0 - ldr r2, [r2] @ max = 0x01ffffff - add r0, r0, r1, lsr #32-14 - mov r0, r0, lsr #14 @ max = 0x0001ffff - add r2, r2, r1, lsr #32-10 - mov r2, r2, lsr #10 @ max = 0x00007fff - mul r0, r2, r0 @ max = 2^32-1 - add r0, r0, r1, lsr #32-6 - movs r0, r0, lsr #6 - moveq pc, lr - -/* - * loops = r0 * HZ * loops_per_jiffy / 1000000 - * - * Oh, if only we had a cycle counter... - */ - -@ Delay routine -ENTRY(__delay) - subs r0, r0, #1 -#if 0 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 - movls pc, lr - subs r0, r0, #1 -#endif - bhi __delay - mov pc, lr -ENDPROC(__udelay) -ENDPROC(__const_udelay) -ENDPROC(__delay) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c new file mode 100644 index 000000000000..d6dacc69254e --- /dev/null +++ b/arch/arm/lib/delay.c @@ -0,0 +1,71 @@ +/* + * Delay loops based on the OpenRISC implementation. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Will Deacon + */ + +#include +#include +#include +#include +#include + +/* + * Default to the loop-based delay implementation. + */ +struct arm_delay_ops arm_delay_ops = { + .delay = __loop_delay, + .const_udelay = __loop_const_udelay, + .udelay = __loop_udelay, +}; + +#ifdef ARCH_HAS_READ_CURRENT_TIMER +static void __timer_delay(unsigned long cycles) +{ + cycles_t start = get_cycles(); + + while ((get_cycles() - start) < cycles) + cpu_relax(); +} + +static void __timer_const_udelay(unsigned long xloops) +{ + unsigned long long loops = xloops; + loops *= loops_per_jiffy; + __timer_delay(loops >> UDELAY_SHIFT); +} + +static void __timer_udelay(unsigned long usecs) +{ + __timer_const_udelay(usecs * UDELAY_MULT); +} + +void __init init_current_timer_delay(unsigned long freq) +{ + pr_info("Switching to timer-based delay loop\n"); + lpj_fine = freq / HZ; + arm_delay_ops.delay = __timer_delay; + arm_delay_ops.const_udelay = __timer_const_udelay; + arm_delay_ops.udelay = __timer_udelay; +} + +unsigned long __cpuinit calibrate_delay_is_known(void) +{ + return lpj_fine; +} +#endif diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S index 30cc6721665b..85863741ef8b 100644 --- a/arch/arm/mach-sa1100/sleep.S +++ b/arch/arm/mach-sa1100/sleep.S @@ -38,9 +38,9 @@ ENTRY(sa1100_finish_suspend) orr r4, r4, #MDREFR_K1DB2 ldr r5, =PPCR - @ Pre-load __udelay into the I-cache + @ Pre-load __loop_udelay into the I-cache mov r0, #1 - bl __udelay + bl __loop_udelay mov r0, r0 @ The following must all exist in a single cache line to @@ -53,11 +53,11 @@ ENTRY(sa1100_finish_suspend) @ delay 90us and set CPU PLL to lowest speed @ fixes resume problem on high speed SA1110 mov r0, #90 - bl __udelay + bl __loop_udelay mov r1, #0 str r1, [r5] mov r0, #90 - bl __udelay + bl __loop_udelay /* * SA1110 SDRAM controller workaround. register values: -- cgit v1.2.3 From 64284a9f8a91b2f1af577f78fabe05d8072cb6e6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:48:50 +0100 Subject: ARM: 7454/1: entry: don't bother with syscall tracing on ret_from_fork path ret_from_fork is setup for a freshly spawned child task via copy_thread, called from copy_process. The latter function clears TIF_SYSCALL_TRACE and also resets the child task's audit_context to NULL, meaning that there is little point invoking the system call tracing routines. Furthermore, getting hold of the syscall number is a complete pain and it looks like the current code doesn't even bother. This patch removes the syscall tracing checks from ret_from_fork. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 4afed88d250a..10911c93fbf1 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -95,13 +95,7 @@ ENDPROC(ret_to_user) ENTRY(ret_from_fork) bl schedule_tail get_thread_info tsk - ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing mov why, #1 - tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls? - beq ret_slow_syscall - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace b ret_slow_syscall ENDPROC(ret_from_fork) -- cgit v1.2.3 From 5125430cccc41f67bfe024394a302901034f6d39 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:49:27 +0100 Subject: ARM: 7455/1: audit: move syscall auditing until after ptrace SIGTRAP handling When auditing system calls on ARM, the audit code is called before notifying the parent process in the case that the current task is being ptraced. At this point, the parent (debugger) may choose to change the system call being issued via the SET_SYSCALL ptrace request, causing the wrong system call to be reported to the audit tools. This patch moves the audit calls after the ptrace SIGTRAP handling code in the syscall tracing implementation. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/ptrace.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 14e38261cd31..592a39d0ef31 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -911,14 +911,8 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) { unsigned long ip; - if (why) - audit_syscall_exit(regs); - else - audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, - regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return scno; + goto out_no_trace; current_thread_info()->syscall = scno; @@ -935,6 +929,13 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) current_thread_info()->syscall = -1; regs->ARM_ip = ip; + scno = current_thread_info()->syscall; - return current_thread_info()->syscall; +out_no_trace: + if (why) + audit_syscall_exit(regs); + else + audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, + regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); + return scno; } -- cgit v1.2.3 From ad722541147e6e517a2077e3d944105e7bc4fa8e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jul 2012 15:50:14 +0100 Subject: ARM: 7456/1: ptrace: provide separate functions for tracing syscall {entry,exit} The syscall_trace on ARM takes a `why' parameter to indicate whether or not we are entering or exiting a system call. This can be confusing for people looking at the code since (a) it conflicts with the why register alias in the entry assembly code and (b) it is not immediately clear what it represents. This patch splits up the syscall_trace function into separate wrappers for syscall entry and exit, allowing the low-level syscall handling code to branch to the appropriate function. Reported-by: Al Viro Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 14 ++++++-------- arch/arm/kernel/ptrace.c | 37 +++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 20 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 10911c93fbf1..49d9f9305247 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -442,10 +442,9 @@ ENDPROC(vector_swi) * context switches, and waiting for our parent to respond. */ __sys_trace: - mov r2, scno - add r1, sp, #S_OFF - mov r0, #0 @ trace entry [IP = 0] - bl syscall_trace + mov r1, scno + add r0, sp, #S_OFF + bl syscall_trace_enter adr lr, BSYM(__sys_trace_return) @ return address mov scno, r0 @ syscall number (possibly new) @@ -457,10 +456,9 @@ __sys_trace: __sys_trace_return: str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 - mov r2, scno - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace + mov r1, scno + mov r0, sp + bl syscall_trace_exit b ret_slow_syscall .align 5 diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 592a39d0ef31..dab711e6e1ca 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -907,12 +907,18 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static int ptrace_syscall_trace(struct pt_regs *regs, int scno, + enum ptrace_syscall_dir dir) { unsigned long ip; if (!test_thread_flag(TIF_SYSCALL_TRACE)) - goto out_no_trace; + return scno; current_thread_info()->syscall = scno; @@ -921,21 +927,28 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) * IP = 0 -> entry, =1 -> exit */ ip = regs->ARM_ip; - regs->ARM_ip = why; + regs->ARM_ip = dir; - if (why) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) current_thread_info()->syscall = -1; regs->ARM_ip = ip; - scno = current_thread_info()->syscall; + return current_thread_info()->syscall; +} -out_no_trace: - if (why) - audit_syscall_exit(regs); - else - audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, - regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); - return scno; +asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) +{ + int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER); + audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1, + regs->ARM_r2, regs->ARM_r3); + return ret; +} + +asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno) +{ + int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT); + audit_syscall_exit(regs); + return ret; } -- cgit v1.2.3 From 130d9aabf997bd8449ff4e877fe3c42df066805e Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 10 Jul 2012 14:08:40 +0100 Subject: ARM: 7461/1: topology: Add arch_scale_freq_power function Add infrastructure to be able to modify the cpu_power of each core Signed-off-by: Vincent Guittot Reviewed-by: Namhyung Kim Acked-by: Peter Zijlstra Signed-off-by: Russell King --- arch/arm/kernel/topology.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 8200deaa14f6..51f23b3ed0a6 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -22,6 +22,37 @@ #include #include +/* + * cpu power scale management + */ + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +/* + * cpu topology management + */ + #define MPIDR_SMP_BITMASK (0x3 << 30) #define MPIDR_SMP_VALUE (0x2 << 30) @@ -41,6 +72,9 @@ #define MPIDR_LEVEL2_MASK 0xFF #define MPIDR_LEVEL2_SHIFT 16 +/* + * cpu topology table + */ struct cputopo_arm cpu_topology[NR_CPUS]; const struct cpumask *cpu_coregroup_mask(int cpu) @@ -134,7 +168,7 @@ void init_cpu_topology(void) { unsigned int cpu; - /* init core mask */ + /* init core mask and power*/ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -143,6 +177,8 @@ void init_cpu_topology(void) cpu_topo->socket_id = -1; cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); + + set_power_scale(cpu, SCHED_POWER_SCALE); } smp_wmb(); } -- cgit v1.2.3 From cb75dacb39494164e6b1f7aa747fb639bf18584c Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 10 Jul 2012 14:11:11 +0100 Subject: ARM: 7462/1: topology: factorize the update of sibling masks This factorization has also been proposed in another patch that has not been merged yet: http://lists.infradead.org/pipermail/linux-arm-kernel/2012-January/080873.html So, this patch could be dropped depending of the state of the other one. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Vincent Guittot Reviewed-by: Namhyung Kim Acked-by: Peter Zijlstra Signed-off-by: Russell King --- arch/arm/kernel/topology.c | 48 ++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 51f23b3ed0a6..eb5fc8132c02 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -82,6 +82,32 @@ const struct cpumask *cpu_coregroup_mask(int cpu) return &cpu_topology[cpu].core_sibling; } +void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + /* * store_cpu_topology is called at boot when only one cpu is running * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, @@ -91,7 +117,6 @@ void store_cpu_topology(unsigned int cpuid) { struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; unsigned int mpidr; - unsigned int cpu; /* If the cpu topology has been already set, just return */ if (cpuid_topo->core_id != -1) @@ -133,26 +158,7 @@ void store_cpu_topology(unsigned int cpuid) cpuid_topo->socket_id = -1; } - /* update core and thread sibling masks */ - for_each_possible_cpu(cpu) { - struct cputopo_arm *cpu_topo = &cpu_topology[cpu]; - - if (cpuid_topo->socket_id == cpu_topo->socket_id) { - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, - &cpuid_topo->core_sibling); - - if (cpuid_topo->core_id == cpu_topo->core_id) { - cpumask_set_cpu(cpuid, - &cpu_topo->thread_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, - &cpuid_topo->thread_sibling); - } - } - } - smp_wmb(); + update_siblings_masks(cpuid); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, -- cgit v1.2.3 From 339ca09d7adac80eda8d097ab473c6c23ee86b17 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 10 Jul 2012 14:13:12 +0100 Subject: ARM: 7463/1: topology: Update cpu_power according to DT information Use cpu compatibility field and clock-frequency field of DT to estimate the capacity of each core of the system and to update the cpu_power field accordingly. This patch enables to put more running tasks on big cores than on LITTLE ones. But this patch doesn't ensure that long running tasks will run on big cores and short ones on LITTLE cores. Signed-off-by: Vincent Guittot Reviewed-by: Namhyung Kim Acked-by: Peter Zijlstra Signed-off-by: Russell King --- arch/arm/kernel/topology.c | 153 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index eb5fc8132c02..198b08456e90 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -17,7 +17,9 @@ #include #include #include +#include #include +#include #include #include @@ -49,6 +51,152 @@ static void set_power_scale(unsigned int cpu, unsigned long power) per_cpu(cpu_scale, cpu) = power; } +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +struct cpu_efficiency table_efficiency[] = { + {"arm,cortex-a15", 3891}, + {"arm,cortex-a7", 2048}, + {NULL, }, +}; + +struct cpu_capacity { + unsigned long hwid; + unsigned long capacity; +}; + +struct cpu_capacity *cpu_capacity; + +unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static void __init parse_dt_topology(void) +{ + struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = (unsigned long)(-1); + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int alloc_size, cpu = 0; + + alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity); + cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT); + + while ((cn = of_find_node_by_type(cn, "cpu"))) { + const u32 *rate, *reg; + int len; + + if (cpu >= num_possible_cpus()) + break; + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) + continue; + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s missing clock-frequency property\n", + cn->full_name); + continue; + } + + reg = of_get_property(cn, "reg", &len); + if (!reg || len != 4) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity[cpu].capacity = capacity; + cpu_capacity[cpu++].hwid = be32_to_cpup(reg); + } + + if (cpu < num_possible_cpus()) + cpu_capacity[cpu].hwid = (unsigned long)(-1); + + /* If min and max capacities are equals, we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + cpu_capacity[0].hwid = (unsigned long)(-1); + else if (4*max_capacity < (3*(max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_capacity table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +void update_cpu_power(unsigned int cpu, unsigned long hwid) +{ + unsigned int idx = 0; + + /* look for the cpu's hwid in the cpu capacity table */ + for (idx = 0; idx < num_possible_cpus(); idx++) { + if (cpu_capacity[idx].hwid == hwid) + break; + + if (cpu_capacity[idx].hwid == -1) + return; + } + + if (idx == num_possible_cpus()) + return; + + set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity); + + printk(KERN_INFO "CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} +#endif + + /* * cpu topology management */ @@ -62,6 +210,7 @@ static void set_power_scale(unsigned int cpu, unsigned long power) * These masks reflect the current use of the affinity levels. * The affinity level can be up to 16 bits according to ARM ARM */ +#define MPIDR_HWID_BITMASK 0xFFFFFF #define MPIDR_LEVEL0_MASK 0x3 #define MPIDR_LEVEL0_SHIFT 0 @@ -160,6 +309,8 @@ void store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); + update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK); + printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, cpu_topology[cpuid].core_id, @@ -187,4 +338,6 @@ void init_cpu_topology(void) set_power_scale(cpu, SCHED_POWER_SCALE); } smp_wmb(); + + parse_dt_topology(); } -- cgit v1.2.3