19 files changed, 244 insertions, 537 deletions
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index ed2e95d46e29..62e75475e57e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -1,7 +1,10 @@
 #ifndef __ASMARM_ARCH_TIMER_H
 #define __ASMARM_ARCH_TIMER_H
 
+#include <asm/errno.h>
+
 #ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_HAS_READ_CURRENT_TIMER
 int arch_timer_of_register(void);
 int arch_timer_sched_clock_init(void);
 #else
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 004c1bc95d2b..e4448e16046d 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -215,7 +215,9 @@ static inline void vivt_flush_cache_mm(struct mm_struct *mm)
 static inline void
 vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
 }
@@ -223,7 +225,9 @@ vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
 static inline void
 vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
-	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 		unsigned long addr = user_addr & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
 #ifndef __ASM_ARM_DELAY_H
 #define __ASM_ARM_DELAY_H
 
+#include <asm/memory.h>
 #include <asm/param.h>	/* HZ */
 
-extern void __delay(int loops);
+#define MAX_UDELAY_MS	2
+#define UDELAY_MULT	((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+extern struct arm_delay_ops {
+	void (*delay)(unsigned long);
+	void (*const_udelay)(unsigned long);
+	void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n)		arm_delay_ops.delay(n)
 
 /*
  * This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
  * division by multiplication: you don't have to worry about
  * loss of precision.
  *
- * Use only for very small delays ( < 1 msec).  Should probably use a
+ * Use only for very small delays ( < 2 msec).  Should probably use a
  * lookup table, really, as the multiplications take much too long with
  * short delays.  This is a "reasonable" implementation, though (and the
  * first constant multiplications gets optimized away if the delay is
  * a constant)
  */
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n)		arm_delay_ops.udelay(n)
+#define __const_udelay(n)	arm_delay_ops.const_udelay(n)
 
 #define udelay(n)							\
 	(__builtin_constant_p(n) ?					\
 	  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :		\
-			__const_udelay((n) * ((2199023U*HZ)>>11))) :	\
+			__const_udelay((n) * UDELAY_MULT)) :		\
 	  __udelay(n))
 
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* defined(_ARM_DELAY_H) */
 
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index bbef15d04890..2ae842df4551 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -186,17 +186,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
 			struct dma_attrs *attrs);
 
-#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
-
-static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
-				  void *cpu_addr, dma_addr_t dma_addr,
-				  size_t size, struct dma_attrs *attrs)
-{
-	struct dma_map_ops *ops = get_dma_ops(dev);
-	BUG_ON(!ops);
-	return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-}
-
 static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag)
 {
@@ -213,20 +202,12 @@ static inline void dma_free_writecombine(struct device *dev, size_t size,
 	return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
 }
 
-static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
-		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
-}
-
 /*
  * This can be called during boot to increase the size of the consistent
  * DMA region above it's default value of 2MB. It must be called before the
  * memory allocator is initialised, i.e. before any core_initcall.
  */
-extern void __init init_consistent_dma_size(unsigned long size);
+static inline void init_consistent_dma_size(unsigned long size) { }
 
 /*
  * For SA-1111, IXP425, and ADI systems  the dma-mapping functions are "magic"
@@ -280,6 +261,9 @@ extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
 extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int,
 		enum dma_data_direction);
+extern int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		struct dma_attrs *attrs);
 
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h
index e51b1e81df05..83eb2f772911 100644
--- a/arch/arm/include/asm/kmap_types.h
+++ b/arch/arm/include/asm/kmap_types.h
@@ -4,30 +4,6 @@
 /*
  * This is the "bare minimum".  AIO seems to require this.
  */
-enum km_type {
-	KM_BOUNCE_READ,
-	KM_SKB_SUNRPC_DATA,
-	KM_SKB_DATA_SOFTIRQ,
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-	KM_IRQ0,
-	KM_IRQ1,
-	KM_SOFTIRQ0,
-	KM_SOFTIRQ1,
-	KM_L1_CACHE,
-	KM_L2_CACHE,
-	KM_KDB,
-	KM_TYPE_NR
-};
-
-#ifdef CONFIG_DEBUG_HIGHMEM
-#define KM_NMI		(-1)
-#define KM_NMI_PTE	(-1)
-#define KM_IRQ_PTE	(-1)
-#endif
+#define KM_TYPE_NR 16
 
 #endif
diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h
deleted file mode 100644
index ef4c897772d1..000000000000
--- a/arch/arm/include/asm/locks.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  arch/arm/include/asm/locks.h
- *
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Interrupt safe locking assembler. 
- */
-#ifndef __ASM_PROC_LOCKS_H
-#define __ASM_PROC_LOCKS_H
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"1:	ldrex	lr, [%1]\n"			\
-"	sub	lr, lr, %2\n"			\
-"	strex	ip, lr, [%1]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	cmp	lr, #0\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#else
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%1]\n"			\
-"	subs	lr, lr, %2\n"			\
-"	str	lr, [%1]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index fcb575747e5e..e965f1b560f1 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,7 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
 
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h
index 93226cf23ae0..b1479fd04a95 100644
--- a/arch/arm/include/asm/mutex.h
+++ b/arch/arm/include/asm/mutex.h
@@ -7,121 +7,10 @@
  */
 #ifndef _ASM_MUTEX_H
 #define _ASM_MUTEX_H
-
-#if __LINUX_ARM_ARCH__ < 6
-/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
-# include <asm-generic/mutex-xchg.h>
-#else
-
 /*
- * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
- * atomic decrement (it is not a reliable atomic decrement but it satisfies
- * the defined semantics for our purpose, while being smaller and faster
- * than a real atomic decrement or atomic swap.  The idea is to attempt
- * decrementing the lock value only once.  If once decremented it isn't zero,
- * or if its store-back fails due to a dispute on the exclusive store, we
- * simply bail out immediately through the slow path where the lock will be
- * reattempted until it succeeds.
+ * On pre-ARMv6 hardware this results in a swp-based implementation,
+ * which is the most efficient. For ARMv6+, we emit a pair of exclusive
+ * accesses instead.
  */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		fail_fn(count);
-}
-
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res;
-
-	__asm__ (
-
-		"ldrex	%0, [%2]	\n\t"
-		"sub	%0, %0, #1	\n\t"
-		"strex	%1, %0, [%2]	"
-
-		: "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__res |= __ex_flag;
-	if (unlikely(__res != 0))
-		__res = fail_fn(count);
-	return __res;
-}
-
-/*
- * Same trick is used for the unlock fast path. However the original value,
- * rather than the result, is used to test for success in order to have
- * better generated assembly.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"ldrex	%0, [%3]	\n\t"
-		"add	%1, %0, #1	\n\t"
-		"strex	%2, %1, [%3]	"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&(count)->counter)
-		: "cc","memory" );
-
-	__orig |= __ex_flag;
-	if (unlikely(__orig != 0))
-		fail_fn(count);
-}
-
-/*
- * If the unlock was done on a contended lock, or if the unlock simply fails
- * then the mutex remains locked.
- */
-#define __mutex_slowpath_needs_to_unlock()	1
-
-/*
- * For __mutex_fastpath_trylock we use another construct which could be
- * described as a "single value cmpxchg".
- *
- * This provides the needed trylock semantics like cmpxchg would, but it is
- * lighter and less generic than a true cmpxchg implementation.
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
-	int __ex_flag, __res, __orig;
-
-	__asm__ (
-
-		"1: ldrex	%0, [%3]	\n\t"
-		"subs		%1, %0, #1	\n\t"
-		"strexeq	%2, %1, [%3]	\n\t"
-		"movlt		%0, #0		\n\t"
-		"cmpeq		%2, #0		\n\t"
-		"bgt		1b		"
-
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
-		: "r" (&count->counter)
-		: "cc", "memory" );
-
-	return __orig;
-}
-
-#endif
+#include <asm-generic/mutex-xchg.h>
 #endif
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00cbe10a50e3..e074948d8143 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,21 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM perf PMU IDs for use by internal perf clients. */
-enum arm_perf_pmu_ids {
-	ARM_PERF_PMU_ID_XSCALE1	= 0,
-	ARM_PERF_PMU_ID_XSCALE2,
-	ARM_PERF_PMU_ID_V6,
-	ARM_PERF_PMU_ID_V6MP,
-	ARM_PERF_PMU_ID_CA8,
-	ARM_PERF_PMU_ID_CA9,
-	ARM_PERF_PMU_ID_CA5,
-	ARM_PERF_PMU_ID_CA15,
-	ARM_PERF_PMU_ID_CA7,
-	ARM_NUM_PMU_IDS,
-};
-
-extern enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void);
+/* Nothing to see here... */
 
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f66626d71e7d..41dc31f834c3 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -195,6 +195,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 
 #define pte_clear(mm,addr,ptep)	set_pte_ext(ptep, __pte(0), 0)
 
+#define pte_none(pte)		(!pte_val(pte))
+#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
+#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
+#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
+#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
+#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
+#define pte_special(pte)	(0)
+
+#define pte_present_user(pte) \
+	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
+	 (L_PTE_PRESENT | L_PTE_USER))
+
 #if __LINUX_ARM_ARCH__ < 6
 static inline void __sync_icache_dcache(pte_t pteval)
 {
@@ -206,25 +218,15 @@ extern void __sync_icache_dcache(pte_t pteval);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	if (addr >= TASK_SIZE)
-		set_pte_ext(ptep, pteval, 0);
-	else {
+	unsigned long ext = 0;
+
+	if (addr < TASK_SIZE && pte_present_user(pteval)) {
 		__sync_icache_dcache(pteval);
-		set_pte_ext(ptep, pteval, PTE_EXT_NG);
+		ext |= PTE_EXT_NG;
 	}
-}
 
-#define pte_none(pte)		(!pte_val(pte))
-#define pte_present(pte)	(pte_val(pte) & L_PTE_PRESENT)
-#define pte_write(pte)		(!(pte_val(pte) & L_PTE_RDONLY))
-#define pte_dirty(pte)		(pte_val(pte) & L_PTE_DIRTY)
-#define pte_young(pte)		(pte_val(pte) & L_PTE_YOUNG)
-#define pte_exec(pte)		(!(pte_val(pte) & L_PTE_XN))
-#define pte_special(pte)	(0)
-
-#define pte_present_user(pte) \
-	((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \
-	 (L_PTE_PRESENT | L_PTE_USER))
+	set_pte_ext(ptep, pteval, ext);
+}
 
 #define PTE_BIT_FUNC(fn,op) \
 static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
@@ -251,13 +253,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  *
  *   3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
  *   1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
- *   <--------------- offset --------------------> <- type --> 0 0 0
+ *   <--------------- offset ----------------------> < type -> 0 0 0
  *
- * This gives us up to 63 swap files and 32GB per swap file.  Note that
+ * This gives us up to 31 swap files and 64GB per swap file.  Note that
  * the offset field is always non-zero.
  */
 #define __SWP_TYPE_SHIFT	3
-#define __SWP_TYPE_BITS		6
+#define __SWP_TYPE_BITS		5
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
 
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 90114faa9f3c..4432305f4a2a 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -103,10 +103,9 @@ struct pmu_hw_events {
 
 struct arm_pmu {
 	struct pmu	pmu;
-	enum arm_perf_pmu_ids id;
 	enum arm_pmu_type type;
 	cpumask_t	active_irqs;
-	const char	*name;
+	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct hw_perf_event *evt, int idx);
 	void		(*disable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h
index e3f757263438..05b8e82ec9f5 100644
--- a/arch/arm/include/asm/sched_clock.h
+++ b/arch/arm/include/asm/sched_clock.h
@@ -10,5 +10,7 @@
 
 extern void sched_clock_postinit(void);
 extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
+extern void setup_sched_clock_needs_suspend(u32 (*read)(void), int bits,
+		unsigned long rate);
 
 #endif
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index 23ebc0c82a39..24d284a1bfc7 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -196,7 +196,7 @@ static const struct tagtable __tagtable_##fn __tag = { tag, fn }
 
 struct membank {
 	phys_addr_t start;
-	unsigned long size;
+	phys_addr_t size;
 	unsigned int highmem;
 };
 
@@ -217,7 +217,7 @@ extern struct meminfo meminfo;
 #define bank_phys_end(bank)	((bank)->start + (bank)->size)
 #define bank_phys_size(bank)	(bank)->size
 
-extern int arm_add_memory(phys_addr_t start, unsigned long size);
+extern int arm_add_memory(phys_addr_t start, phys_addr_t size);
 extern void early_print(const char *str, ...);
 extern void dump_machine_table(void);
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 65fa3c88095c..b4ca707d0a69 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -59,18 +59,13 @@ static inline void dsb_sev(void)
 }
 
 /*
- * ARMv6 Spin-locking.
+ * ARMv6 ticket-based spin-locking.
  *
- * We exclusively read the old value.  If it is zero, we may have
- * won the lock, so we try exclusively storing it.  A memory barrier
- * is required after we get a lock, and before we release it, because
- * V6 CPUs are assumed to have weakly ordered memory.
- *
- * Unlocked value: 0
- * Locked value: 1
+ * A memory barrier is required after we get a lock, and before we
+ * release it, because V6 CPUs are assumed to have weakly ordered
+ * memory.
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -79,31 +74,39 @@ static inline void dsb_sev(void)
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 newval;
+	arch_spinlock_t lockval;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-	WFE("ne")
-"	strexeq	%0, %2, [%1]\n"
-"	teqeq	%0, #0\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%1, %0, %4\n"
+"	strex	%2, %1, [%3]\n"
+"	teq	%2, #0\n"
 "	bne	1b"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
+	while (lockval.tickets.next != lockval.tickets.owner) {
+		wfe();
+		lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+	}
+
 	smp_mb();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 slock;
 
 	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+"	ldrex	%0, [%2]\n"
+"	subs	%1, %0, %0, ror #16\n"
+"	addeq	%0, %0, %3\n"
+"	strexeq	%1, %0, [%2]"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
 	if (tmp == 0) {
@@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+	unsigned long tmp;
+	u32 slock;
+
 	smp_mb();
 
 	__asm__ __volatile__(
-"	str	%1, [%0]\n"
-	:
-	: "r" (&lock->lock), "r" (0)
+"	mov	%1, #1\n"
+"1:	ldrex	%0, [%2]\n"
+"	uadd16	%0, %0, %1\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock)
 	: "cc");
 
 	dsb_sev();
 }
 
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return tickets.owner != tickets.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return (tickets.next - tickets.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
 /*
  * RWLOCKS
  *
@@ -158,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	unsigned long tmp;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
+"	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
 "	strexeq	%0, %2, [%1]"
 	: "=&r" (tmp)
@@ -244,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	unsigned long tmp, tmp2 = 1;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%2]\n"
+"	ldrex	%0, [%2]\n"
 "	adds	%0, %0, #1\n"
 "	strexpl	%1, %0, [%2]\n"
 	: "=&r" (tmp), "+r" (tmp2)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index d14d197ae04a..b262d2f8b478 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
 # error "please don't include this file directly"
 #endif
 
+#define TICKET_SHIFT	16
+
 typedef struct {
-	volatile unsigned int lock;
+	union {
+		u32 slock;
+		struct __raw_tickets {
+#ifdef __ARMEB__
+			u16 next;
+			u16 owner;
+#else
+			u16 owner;
+			u16 next;
+#endif
+		} tickets;
+	};
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..ce119442277c 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -12,13 +12,15 @@
 #ifndef _ASMARM_TIMEX_H
 #define _ASMARM_TIMEX_H
 
+#include <asm/arch_timer.h>
 #include <mach/timex.h>
 
 typedef unsigned long cycles_t;
 
-static inline cycles_t get_cycles (void)
-{
-	return 0;
-}
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#define get_cycles()	({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#else
+#define get_cycles()	(0)
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 71f6536d17ac..479a6352e0b5 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(type,addr,size)	(__range_ok(addr,size) == 0)
 
+#define user_addr_max() \
+	(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+
 /*
  * The "__xxx" versions of the user access functions do not verify the
  * address space - it must have been done previously with a separate
@@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l
 #define __clear_user(addr,n)		(memset((void __force *)addr, 0, n), 0)
 #endif
 
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
@@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = -EFAULT;
-	if (access_ok(VERIFY_READ, src, 1))
-		res = __strncpy_from_user(dst, src, count);
-	return res;
-}
-
-#define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
-	unsigned long res = 0;
-
-	if (__addr_ok(s))
-		res = __strnlen_user(s, n);
-
-	return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif /* _ASMARM_UACCESS_H */
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 512cd1473454..0cab47d4a83f 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -446,7 +446,6 @@
 
 #ifdef __KERNEL__
 
-#define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..4d52f92967a6
--- /dev/null
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_ARM_WORD_AT_A_TIME_H
+#define __ASM_ARM_WORD_AT_A_TIME_H
+
+#ifndef __ARMEB__
+
+/*
+ * Little-endian word-at-a-time zero byte handling.
+ * Heavily based on the x86 algorithm.
+ */
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	unsigned long ret;
+
+#if __LINUX_ARM_ARCH__ >= 5
+	/* We have clz available. */
+	ret = fls(mask) >> 3;
+#else
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	ret = (0x0ff0001 + mask) >> 23;
+	/* Fix the 1 for 00 case */
+	ret &= mask;
+#endif
+
+	return ret;
+}
+
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#define zero_bytemask(mask) (mask)
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, [%2]\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x3\n"
+	"	bic	%2, %2, #0x3\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+	"	lsr	%0, %0, %1\n"
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.long	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Qo" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+
+#endif	/* DCACHE_WORD_ACCESS */
+
+#else	/* __ARMEB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+#endif /* __ASM_ARM_WORD_AT_A_TIME_H */