35 files changed, 2022 insertions, 399 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 636bcb81d068..2508a6f31588 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -85,6 +85,7 @@ config S390
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select ARCH_INLINE_SPIN_TRYLOCK
@@ -341,26 +342,16 @@ config STACK_GUARD
 	  The minimum size for the stack guard should be 256 for 31 bit and
 	  512 for 64 bit.
 
-config WARN_STACK
+config WARN_DYNAMIC_STACK
 	def_bool n
-	prompt "Emit compiler warnings for function with broken stack usage"
+	prompt "Emit compiler warnings for function with dynamic stack usage"
 	help
-	  This option enables the compiler options -mwarn-framesize and
-	  -mwarn-dynamicstack. If the compiler supports these options it
-	  will generate warnings for function which either use alloca or
-	  create a stack frame bigger than CONFIG_WARN_STACK_SIZE.
+	  This option enables the compiler option -mwarn-dynamicstack. If the
+	  compiler supports this options generates warnings for functions
+	  that dynamically allocate stack space using alloca.
 
 	  Say N if you are unsure.
 
-config WARN_STACK_SIZE
-	int "Maximum frame size considered safe (128-2048)"
-	range 128 2048
-	depends on WARN_STACK
-	default "2048"
-	help
-	  This allows you to specify the maximum frame size a function may
-	  have without the compiler complaining about it.
-
 config ARCH_POPULATES_NODE_MAP
 	def_bool y
 
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 2b380df95606..d76cef3fef37 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -31,4 +31,7 @@ config DEBUG_STRICT_USER_COPY_CHECKS
 
 	  If unsure, or if you run an older (pre 4.4) gcc, say N.
 
+config DEBUG_SET_MODULE_RONX
+	def_bool y
+	depends on MODULES
 endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index d5b8a6ade525..27a0b5df5ead 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -80,8 +80,7 @@ endif
 endif
 
 ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
-cflags-$(CONFIG_WARN_STACK) += -mwarn-dynamicstack
-cflags-$(CONFIG_WARN_STACK) += -mwarn-framesize=$(CONFIG_WARN_STACK_SIZE)
+cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
 endif
 
 KBUILD_CFLAGS	+= -mbackchain -msoft-float $(cflags-y)
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 8800cf090694..635d677d3281 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -6,7 +6,7 @@ COMPILE_VERSION := __linux_compile_version_id__`hostname |  \
 			tr -c '[0-9A-Za-z]' '_'`__`date | \
 			tr -c '[0-9A-Za-z]' '_'`_t
 
-EXTRA_CFLAGS  := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
+ccflags-y  := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
 
 targets := image
 targets += bzImage
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 1c999f726a58..10e22c4ec4a7 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -7,7 +7,8 @@
 BITS := $(if $(CONFIG_64BIT),64,31)
 
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
-	   vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o sizes.h head$(BITS).o
+	   vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \
+	   sizes.h head$(BITS).o
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += $(cflags-y)
@@ -48,6 +49,7 @@ suffix-$(CONFIG_KERNEL_GZIP)  := gz
 suffix-$(CONFIG_KERNEL_BZIP2) := bz2
 suffix-$(CONFIG_KERNEL_LZMA)  := lzma
 suffix-$(CONFIG_KERNEL_LZO)  := lzo
+suffix-$(CONFIG_KERNEL_XZ)  := xz
 
 $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
 	$(call if_changed,gzip)
@@ -57,6 +59,8 @@ $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
 	$(call if_changed,lzma)
 $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
 	$(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+	$(call if_changed,xzkern)
 
 LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
 $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 2751b3a8a66f..028f23ea81d1 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -19,6 +19,7 @@
 #undef memset
 #undef memcpy
 #undef memmove
+#define memmove memmove
 #define memzero(s, n) memset((s), 0, (n))
 
 /* Symbols defined by linker scripts */
@@ -54,6 +55,10 @@ static unsigned long free_mem_end_ptr;
 #include "../../../../lib/decompress_unlzo.c"
 #endif
 
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
 extern _sclp_print_early(const char *);
 
 int puts(const char *s)
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 2e05972c5085..e1c8f3a49884 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -742,18 +742,42 @@ static inline int sched_find_first_bit(unsigned long *b)
  *    23 22 21 20 19 18 17 16 31 30 29 28 27 26 25 24
  */
 
-#define ext2_set_bit(nr, addr)       \
-	__test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-#define ext2_set_bit_atomic(lock, nr, addr)       \
-	test_and_set_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-#define ext2_clear_bit(nr, addr)     \
-	__test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-#define ext2_clear_bit_atomic(lock, nr, addr)     \
-	test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-#define ext2_test_bit(nr, addr)      \
-	test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
-
-static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size)
+static inline void __set_bit_le(unsigned long nr, void *addr)
+{
+	__set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline void __clear_bit_le(unsigned long nr, void *addr)
+{
+	__clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int __test_and_set_bit_le(unsigned long nr, void *addr)
+{
+	return __test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int test_and_set_bit_le(unsigned long nr, void *addr)
+{
+	return test_and_set_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int __test_and_clear_bit_le(unsigned long nr, void *addr)
+{
+	return __test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int test_and_clear_bit_le(unsigned long nr, void *addr)
+{
+	return test_and_clear_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int test_bit_le(unsigned long nr, const void *addr)
+{
+	return test_bit(nr ^ (__BITOPS_WORDSIZE - 8), addr);
+}
+
+static inline int find_first_zero_bit_le(void *vaddr, unsigned int size)
 {
 	unsigned long bytes, bits;
 
@@ -764,7 +788,7 @@ static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size)
 	return (bits < size) ? bits : size;
 }
 
-static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size,
+static inline int find_next_zero_bit_le(void *vaddr, unsigned long size,
 					  unsigned long offset)
 {
         unsigned long *addr = vaddr, *p;
@@ -790,11 +814,10 @@ static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size,
 		size -= __BITOPS_WORDSIZE;
 		p++;
         }
-	return offset + ext2_find_first_zero_bit(p, size);
+	return offset + find_first_zero_bit_le(p, size);
 }
 
-static inline unsigned long ext2_find_first_bit(void *vaddr,
-						unsigned long size)
+static inline unsigned long find_first_bit_le(void *vaddr, unsigned long size)
 {
 	unsigned long bytes, bits;
 
@@ -805,7 +828,7 @@ static inline unsigned long ext2_find_first_bit(void *vaddr,
 	return (bits < size) ? bits : size;
 }
 
-static inline int ext2_find_next_bit(void *vaddr, unsigned long size,
+static inline int find_next_bit_le(void *vaddr, unsigned long size,
 				     unsigned long offset)
 {
 	unsigned long *addr = vaddr, *p;
@@ -831,10 +854,14 @@ static inline int ext2_find_next_bit(void *vaddr, unsigned long size,
 		size -= __BITOPS_WORDSIZE;
 		p++;
 	}
-	return offset + ext2_find_first_bit(p, size);
+	return offset + find_first_bit_le(p, size);
 }
 
-#include <asm-generic/bitops/minix.h>
+#define ext2_set_bit_atomic(lock, nr, addr)	\
+	test_and_set_bit_le(nr, addr)
+#define ext2_clear_bit_atomic(lock, nr, addr)	\
+	test_and_clear_bit_le(nr, addr)
+
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
index 7e1f77620624..43a5c78046db 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/cacheflush.h
@@ -8,4 +8,8 @@
 void kernel_map_pages(struct page *page, int numpages, int enable);
 #endif
 
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+
 #endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index ff6f62e0ec3e..623f2fb71774 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -112,7 +112,6 @@ enum uc_todo {
 
 /**
  * struct ccw driver - device driver for channel attached devices
- * @owner: owning module
  * @ids: ids supported by this driver
  * @probe: function called on probe
  * @remove: function called on remove
@@ -128,10 +127,8 @@ enum uc_todo {
  * @restore: callback for restoring after hibernation
  * @uc_handler: callback for unit check handler
  * @driver: embedded device driver structure
- * @name: device driver name
  */
 struct ccw_driver {
-	struct module *owner;
 	struct ccw_device_id *ids;
 	int (*probe) (struct ccw_device *);
 	void (*remove) (struct ccw_device *);
@@ -147,7 +144,6 @@ struct ccw_driver {
 	int (*restore)(struct ccw_device *);
 	enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
 	struct device_driver driver;
-	char *name;
 };
 
 extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index c79c1e787b86..f2ea2c56a7e1 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -29,8 +29,6 @@ struct ccwgroup_device {
 
 /**
  * struct ccwgroup_driver - driver for ccw group devices
- * @owner: driver owner
- * @name: driver name
  * @max_slaves: maximum number of slave devices
  * @driver_id: unique id
  * @probe: function called on probe
@@ -46,8 +44,6 @@ struct ccwgroup_device {
  * @driver: embedded driver structure
  */
 struct ccwgroup_driver {
-	struct module *owner;
-	char *name;
 	int max_slaves;
 	unsigned long driver_id;
 
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..7488e52efa97
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/types.h>
+
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long x, void *ptr, int size)
+{
+	unsigned long addr, old;
+	int shift;
+
+	switch (size) {
+	case 1:
+		addr = (unsigned long) ptr;
+		shift = (3 ^ (addr & 3)) << 3;
+		addr ^= addr & 3;
+		asm volatile(
+			"	l	%0,%4\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,%4\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) addr)
+			: "d" (x << shift), "d" (~(255 << shift)),
+			  "Q" (*(int *) addr) : "memory", "cc", "0");
+		return old >> shift;
+	case 2:
+		addr = (unsigned long) ptr;
+		shift = (2 ^ (addr & 2)) << 3;
+		addr ^= addr & 2;
+		asm volatile(
+			"	l	%0,%4\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,%4\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) addr)
+			: "d" (x << shift), "d" (~(65535 << shift)),
+			  "Q" (*(int *) addr) : "memory", "cc", "0");
+		return old >> shift;
+	case 4:
+		asm volatile(
+			"	l	%0,%3\n"
+			"0:	cs	%0,%2,%3\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=Q" (*(int *) ptr)
+			: "d" (x), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return old;
+#ifdef CONFIG_64BIT
+	case 8:
+		asm volatile(
+			"	lg	%0,%3\n"
+			"0:	csg	%0,%2,%3\n"
+			"	jl	0b\n"
+			: "=&d" (old), "=m" (*(long *) ptr)
+			: "d" (x), "Q" (*(long *) ptr)
+			: "memory", "cc");
+		return old;
+#endif /* CONFIG_64BIT */
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+#define xchg(ptr, x)							  \
+({									  \
+	__typeof__(*(ptr)) __ret;					  \
+	__ret = (__typeof__(*(ptr)))					  \
+		__xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\
+	__ret;								  \
+})
+
+/*
+ * Atomic compare and exchange.	 Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.	Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG
+
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(void *ptr, unsigned long old,
+				      unsigned long new, int size)
+{
+	unsigned long addr, prev, tmp;
+	int shift;
+
+	switch (size) {
+	case 1:
+		addr = (unsigned long) ptr;
+		shift = (3 ^ (addr & 3)) << 3;
+		addr ^= addr & 3;
+		asm volatile(
+			"	l	%0,%2\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%3\n"
+			"	or	%1,%4\n"
+			"	cs	%0,%1,%2\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
+			"1:"
+			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
+			: "d" (old << shift), "d" (new << shift),
+			  "d" (~(255 << shift)), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev >> shift;
+	case 2:
+		addr = (unsigned long) ptr;
+		shift = (2 ^ (addr & 2)) << 3;
+		addr ^= addr & 2;
+		asm volatile(
+			"	l	%0,%2\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%3\n"
+			"	or	%1,%4\n"
+			"	cs	%0,%1,%2\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
+			"1:"
+			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
+			: "d" (old << shift), "d" (new << shift),
+			  "d" (~(65535 << shift)), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev >> shift;
+	case 4:
+		asm volatile(
+			"	cs	%0,%3,%1\n"
+			: "=&d" (prev), "=Q" (*(int *) ptr)
+			: "0" (old), "d" (new), "Q" (*(int *) ptr)
+			: "memory", "cc");
+		return prev;
+#ifdef CONFIG_64BIT
+	case 8:
+		asm volatile(
+			"	csg	%0,%3,%1\n"
+			: "=&d" (prev), "=Q" (*(long *) ptr)
+			: "0" (old), "d" (new), "Q" (*(long *) ptr)
+			: "memory", "cc");
+		return prev;
+#endif /* CONFIG_64BIT */
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						\
+	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
+				       (unsigned long)(n), sizeof(*(ptr))))
+
+#ifdef CONFIG_64BIT
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg((ptr), (o), (n));					\
+})
+#else /* CONFIG_64BIT */
+static inline unsigned long long __cmpxchg64(void *ptr,
+					     unsigned long long old,
+					     unsigned long long new)
+{
+	register_pair rp_old = {.pair = old};
+	register_pair rp_new = {.pair = new};
+
+	asm volatile(
+		"	cds	%0,%2,%1"
+		: "+&d" (rp_old), "=Q" (ptr)
+		: "d" (rp_new), "Q" (ptr)
+		: "cc");
+	return rp_old.pair;
+}
+#define cmpxchg64(ptr, o, n)						\
+	((__typeof__(*(ptr)))__cmpxchg64((ptr),				\
+					 (unsigned long long)(o),	\
+					 (unsigned long long)(n)))
+#endif /* CONFIG_64BIT */
+
+#include <asm-generic/cmpxchg-local.h>
+
+static inline unsigned long __cmpxchg_local(void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+	case 2:
+	case 4:
+#ifdef CONFIG_64BIT
+	case 8:
+#endif
+		return __cmpxchg(ptr, old, new, size);
+	default:
+		return __cmpxchg_local_generic(ptr, old, new, size);
+	}
+
+	return old;
+}
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n)					\
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))))
+
+#define cmpxchg64_local(ptr, o, n)	cmpxchg64((ptr), (o), (n))
+
+#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e9..81cf36b691f1 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
 #include <linux/uaccess.h>
 #include <asm/errno.h>
 
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	return ret;
 }
 
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr,
-						int oldval, int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+						u32 oldval, u32 newval)
 {
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
 		return -EFAULT;
 
-	return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval);
+	return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
-	signed long		count;
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	dep_map;
-#endif
-};
-
 #ifndef __s390x__
 #define RWSEM_UNLOCKED_VALUE	0x00000000
 #define RWSEM_ACTIVE_BIAS	0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS	(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 
 /*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
-   LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
-			 struct lock_class_key *key);
-
-#define init_rwsem(sem)				\
-do {						\
-	static struct lock_class_key __key;	\
-						\
-	__init_rwsem((sem), #sem, &__key);	\
-} while (0)
-
-
-/*
  * lock for reading
  */
 static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 	return new;
 }
 
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
-	return (sem->count != 0);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 8f8d759f6a7b..d382629a0172 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -14,6 +14,7 @@
 #include <asm/setup.h>
 #include <asm/processor.h>
 #include <asm/lowcore.h>
+#include <asm/cmpxchg.h>
 
 #ifdef __KERNEL__
 
@@ -120,161 +121,6 @@ extern int memcpy_real(void *, void *, size_t);
 
 #define nop() asm volatile("nop")
 
-#define xchg(ptr,x)							  \
-({									  \
-	__typeof__(*(ptr)) __ret;					  \
-	__ret = (__typeof__(*(ptr)))					  \
-		__xchg((unsigned long)(x), (void *)(ptr),sizeof(*(ptr))); \
-	__ret;								  \
-})
-
-extern void __xchg_called_with_bad_pointer(void);
-
-static inline unsigned long __xchg(unsigned long x, void * ptr, int size)
-{
-	unsigned long addr, old;
-	int shift;
-
-        switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" (x << shift), "d" (~(255 << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%4\n"
-			"0:	lr	0,%0\n"
-			"	nr	0,%3\n"
-			"	or	0,%2\n"
-			"	cs	%0,0,%4\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) addr)
-			: "d" (x << shift), "d" (~(65535 << shift)),
-			  "Q" (*(int *) addr) : "memory", "cc", "0");
-		return old >> shift;
-	case 4:
-		asm volatile(
-			"	l	%0,%3\n"
-			"0:	cs	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=Q" (*(int *) ptr)
-			: "d" (x), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return old;
-#ifdef __s390x__
-	case 8:
-		asm volatile(
-			"	lg	%0,%3\n"
-			"0:	csg	%0,%2,%3\n"
-			"	jl	0b\n"
-			: "=&d" (old), "=m" (*(long *) ptr)
-			: "d" (x), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return old;
-#endif /* __s390x__ */
-	}
-	__xchg_called_with_bad_pointer();
-	return x;
-}
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-
-#define __HAVE_ARCH_CMPXCHG 1
-
-#define cmpxchg(ptr, o, n)						\
-	((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	\
-					(unsigned long)(n), sizeof(*(ptr))))
-
-extern void __cmpxchg_called_with_bad_pointer(void);
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
-{
-	unsigned long addr, prev, tmp;
-	int shift;
-
-        switch (size) {
-	case 1:
-		addr = (unsigned long) ptr;
-		shift = (3 ^ (addr & 3)) << 3;
-		addr ^= addr & 3;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
-			: "d" (old << shift), "d" (new << shift),
-			  "d" (~(255 << shift)), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev >> shift;
-	case 2:
-		addr = (unsigned long) ptr;
-		shift = (2 ^ (addr & 2)) << 3;
-		addr ^= addr & 2;
-		asm volatile(
-			"	l	%0,%2\n"
-			"0:	nr	%0,%5\n"
-			"	lr	%1,%0\n"
-			"	or	%0,%3\n"
-			"	or	%1,%4\n"
-			"	cs	%0,%1,%2\n"
-			"	jnl	1f\n"
-			"	xr	%1,%0\n"
-			"	nr	%1,%5\n"
-			"	jnz	0b\n"
-			"1:"
-			: "=&d" (prev), "=&d" (tmp), "=Q" (*(int *) ptr)
-			: "d" (old << shift), "d" (new << shift),
-			  "d" (~(65535 << shift)), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev >> shift;
-	case 4:
-		asm volatile(
-			"	cs	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(int *) ptr)
-			: "0" (old), "d" (new), "Q" (*(int *) ptr)
-			: "memory", "cc");
-		return prev;
-#ifdef __s390x__
-	case 8:
-		asm volatile(
-			"	csg	%0,%3,%1\n"
-			: "=&d" (prev), "=Q" (*(long *) ptr)
-			: "0" (old), "d" (new), "Q" (*(long *) ptr)
-			: "memory", "cc");
-		return prev;
-#endif /* __s390x__ */
-        }
-	__cmpxchg_called_with_bad_pointer();
-	return old;
-}
-
 /*
  * Force strict CPU ordering.
  * And yes, this is required on UP too when we're talking
@@ -353,46 +199,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 	__ctl_load(__dummy, cr, cr);	\
 })
 
-#include <linux/irqflags.h>
-
-#include <asm-generic/cmpxchg-local.h>
-
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-				      unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-	case 2:
-	case 4:
-#ifdef __s390x__
-	case 8:
-#endif
-		return __cmpxchg(ptr, old, new, size);
-	default:
-		return __cmpxchg_local_generic(ptr, old, new, size);
-	}
-
-	return old;
-}
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)					\
-	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
-			(unsigned long)(n), sizeof(*(ptr))))
-#ifdef __s390x__
-#define cmpxchg64_local(ptr, o, n)					\
-  ({									\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
-  })
-#else
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-#endif
-
 /*
  * Use to set psw mask except for the first byte which
  * won't be changed by this function.
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 04d6b95a89c6..eeb52ccf499f 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -30,14 +30,6 @@ typedef __signed__ long saddr_t;
 
 #ifndef __ASSEMBLY__
 
-typedef u64 dma64_addr_t;
-#ifdef __s390x__
-/* DMA addresses come in 32-bit and 64-bit flavours. */
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
-
 #ifndef __s390x__
 typedef union {
 	unsigned long long pair;
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52b..2d9ea11f919a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
 	size_t (*clear_user)(size_t, void __user *);
 	size_t (*strnlen_user)(size_t, const char __user *);
 	size_t (*strncpy_from_user)(size_t, const char __user *, char *);
-	int (*futex_atomic_op)(int op, int __user *, int oparg, int *old);
-	int (*futex_atomic_cmpxchg)(int __user *, int old, int new);
+	int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
+	int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
 };
 
 extern struct uaccess_ops uaccess;
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 1049ef27c15e..e82152572377 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -272,7 +272,11 @@
 #define __NR_fanotify_init	332
 #define __NR_fanotify_mark	333
 #define __NR_prlimit64		334
-#define NR_syscalls 335
+#define __NR_name_to_handle_at	335
+#define __NR_open_by_handle_at	336
+#define __NR_clock_adjtime	337
+#define __NR_syncfs		338
+#define NR_syscalls 339
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 8e60fb23b90d..1dc96ea08fa8 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1877,3 +1877,30 @@ sys_prlimit64_wrapper:
 	llgtr	%r4,%r4			# const struct rlimit64 __user *
 	llgtr	%r5,%r5			# struct rlimit64 __user *
 	jg	sys_prlimit64		# branch to system call
+
+	.globl	sys_name_to_handle_at_wrapper
+sys_name_to_handle_at_wrapper:
+	lgfr	%r2,%r2			# int
+	llgtr	%r3,%r3			# const char __user *
+	llgtr	%r4,%r4			# struct file_handle __user *
+	llgtr	%r5,%r5			# int __user *
+	lgfr	%r6,%r6			# int
+	jg	sys_name_to_handle_at
+
+	.globl	compat_sys_open_by_handle_at_wrapper
+compat_sys_open_by_handle_at_wrapper:
+	lgfr	%r2,%r2			# int
+	llgtr	%r3,%r3			# struct file_handle __user *
+	lgfr	%r4,%r4			# int
+	jg	compat_sys_open_by_handle_at
+
+	.globl	compat_sys_clock_adjtime_wrapper
+compat_sys_clock_adjtime_wrapper:
+	lgfr	%r2,%r2			# clockid_t (int)
+	llgtr	%r3,%r3			# struct compat_timex __user *
+	jg	compat_sys_clock_adjtime
+
+	.globl	sys_syncfs_wrapper
+sys_syncfs_wrapper:
+	lgfr	%r2,%r2			# int
+	jg	sys_syncfs
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3b7e7dddc324..068f8465c4ee 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -94,6 +94,7 @@ static noinline __init void create_kernel_nss(void)
 	unsigned int sinitrd_pfn, einitrd_pfn;
 #endif
 	int response;
+	int hlen;
 	size_t len;
 	char *savesys_ptr;
 	char defsys_cmd[DEFSYS_CMD_SIZE];
@@ -124,24 +125,27 @@ static noinline __init void create_kernel_nss(void)
 	end_pfn = PFN_UP(__pa(&_end));
 	min_size = end_pfn << 2;
 
-	sprintf(defsys_cmd, "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
-		kernel_nss_name, stext_pfn - 1, stext_pfn, eshared_pfn - 1,
-		eshared_pfn, end_pfn);
+	hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
+			"DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
+			kernel_nss_name, stext_pfn - 1, stext_pfn,
+			eshared_pfn - 1, eshared_pfn, end_pfn);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (INITRD_START && INITRD_SIZE) {
 		sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
 		einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
 		min_size = einitrd_pfn << 2;
-		sprintf(defsys_cmd, "%s EW %.5X-%.5X", defsys_cmd,
-		sinitrd_pfn, einitrd_pfn);
+		hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+				 " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
 	}
 #endif
 
-	sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13",
-		defsys_cmd, min_size);
-	sprintf(savesys_cmd, "SAVESYS %s \n IPL %s",
-		kernel_nss_name, kernel_nss_name);
+	snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+		 " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
+	defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
+	snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
+		 kernel_nss_name, kernel_nss_name);
+	savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
 
 	__cpcmd(defsys_cmd, NULL, 0, &response);
 
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index a922d51df6bf..b09b9c62573e 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -12,6 +12,7 @@
 #include <linux/kexec.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
+#include <linux/ftrace.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
 #include <asm/pgtable.h>
@@ -71,6 +72,7 @@ static void __machine_kexec(void *data)
 
 void machine_kexec(struct kimage *image)
 {
+	tracer_disable();
 	smp_send_stop();
 	smp_switch_to_ipl_cpu(__machine_kexec, image);
 }
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 6f6350826c81..ed183c2c6168 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -102,16 +102,6 @@ EXPORT_SYMBOL(lowcore_ptr);
 
 #include <asm/setup.h>
 
-static struct resource code_resource = {
-	.name  = "Kernel code",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
-
-static struct resource data_resource = {
-	.name = "Kernel data",
-	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
-};
-
 /*
  * condev= and conmode= setup parameter.
  */
@@ -436,21 +426,43 @@ setup_lowcore(void)
 	lowcore_ptr[0] = lc;
 }
 
-static void __init
-setup_resources(void)
+static struct resource code_resource = {
+	.name  = "Kernel code",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource data_resource = {
+	.name = "Kernel data",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource bss_resource = {
+	.name = "Kernel bss",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+	&code_resource,
+	&data_resource,
+	&bss_resource,
+};
+
+static void __init setup_resources(void)
 {
-	struct resource *res, *sub_res;
-	int i;
+	struct resource *res, *std_res, *sub_res;
+	int i, j;
 
 	code_resource.start = (unsigned long) &_text;
 	code_resource.end = (unsigned long) &_etext - 1;
 	data_resource.start = (unsigned long) &_etext;
 	data_resource.end = (unsigned long) &_edata - 1;
+	bss_resource.start = (unsigned long) &__bss_start;
+	bss_resource.end = (unsigned long) &__bss_stop - 1;
 
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
-		res = alloc_bootmem_low(sizeof(struct resource));
+		res = alloc_bootmem_low(sizeof(*res));
 		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
 		switch (memory_chunk[i].type) {
 		case CHUNK_READ_WRITE:
@@ -464,40 +476,24 @@ setup_resources(void)
 			res->name = "reserved";
 		}
 		res->start = memory_chunk[i].addr;
-		res->end = memory_chunk[i].addr +  memory_chunk[i].size - 1;
+		res->end = res->start + memory_chunk[i].size - 1;
 		request_resource(&iomem_resource, res);
 
-		if (code_resource.start >= res->start  &&
-			code_resource.start <= res->end &&
-			code_resource.end > res->end) {
-			sub_res = alloc_bootmem_low(sizeof(struct resource));
-			memcpy(sub_res, &code_resource,
-				sizeof(struct resource));
-			sub_res->end = res->end;
-			code_resource.start = res->end + 1;
-			request_resource(res, sub_res);
-		}
-
-		if (code_resource.start >= res->start &&
-			code_resource.start <= res->end &&
-			code_resource.end <= res->end)
-			request_resource(res, &code_resource);
-
-		if (data_resource.start >= res->start &&
-			data_resource.start <= res->end &&
-			data_resource.end > res->end) {
-			sub_res = alloc_bootmem_low(sizeof(struct resource));
-			memcpy(sub_res, &data_resource,
-				sizeof(struct resource));
-			sub_res->end = res->end;
-			data_resource.start = res->end + 1;
-			request_resource(res, sub_res);
+		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+			std_res = standard_resources[j];
+			if (std_res->start < res->start ||
+			    std_res->start > res->end)
+				continue;
+			if (std_res->end > res->end) {
+				sub_res = alloc_bootmem_low(sizeof(*sub_res));
+				*sub_res = *std_res;
+				sub_res->end = res->end;
+				std_res->start = res->end + 1;
+				request_resource(res, sub_res);
+			} else {
+				request_resource(res, std_res);
+			}
 		}
-
-		if (data_resource.start >= res->start &&
-			data_resource.start <= res->end &&
-			data_resource.end <= res->end)
-			request_resource(res, &data_resource);
 	}
 }
 
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index a8fee1b14395..9c65fd4ddce0 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -343,3 +343,7 @@ SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
 SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper)
 SYSCALL(sys_fanotify_mark,sys_fanotify_mark,sys_fanotify_mark_wrapper)
 SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper)
+SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */
+SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper)
+SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper)
+SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper)
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index f438d74dedbd..d73630b4fe1d 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -337,17 +337,17 @@ static int __init vdso_init(void)
 }
 arch_initcall(vdso_init);
 
-int in_gate_area_no_task(unsigned long addr)
+int in_gate_area_no_mm(unsigned long addr)
 {
 	return 0;
 }
 
-int in_gate_area(struct task_struct *task, unsigned long addr)
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
 {
 	return 0;
 }
 
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
 	return NULL;
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a68ac10213b2..1bc18cdb525b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -77,7 +77,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	INIT_DATA_SECTION(0x100)
 
-	PERCPU(PAGE_SIZE)
+	PERCPU(0x100, PAGE_SIZE)
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index e5221ec0b8e3..860d26514c08 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
 
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f1..1d2536cb630b 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
 extern size_t copy_to_user_std(size_t, void __user *, const void *);
 extern size_t strnlen_user_std(size_t, const char __user *);
 extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(int __user *, int, int);
-extern int futex_atomic_op_std(int, int __user *, int, int *);
+extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
+extern int futex_atomic_op_std(int, u32 __user *, int, int *);
 
 extern size_t copy_from_user_pt(size_t, const void __user *, void *);
 extern size_t copy_to_user_pt(size_t, void __user *, const void *);
-extern int futex_atomic_op_pt(int, int __user *, int, int *);
-extern int futex_atomic_cmpxchg_pt(int __user *, int, int);
+extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
+extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
 
 #endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296dc..74833831417f 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
 		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		       "m" (*uaddr) : "cc" );
 
-static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 	return ret;
 }
 
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
 {
 	int ret;
 
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 	return ret;
 }
 
-static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+				     u32 oldval, u32 newval)
 {
 	int ret;
 
 	asm volatile("0: cs   %1,%4,0(%5)\n"
-		     "1: lr   %0,%1\n"
+		     "1: la   %0,0\n"
 		     "2:\n"
 		     EX_TABLE(0b,2b) EX_TABLE(1b,2b)
 		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
 		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
 		     : "cc", "memory" );
+	*uval = oldval;
 	return ret;
 }
 
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+			    u32 oldval, u32 newval)
 {
 	int ret;
 
 	if (segment_eq(get_fs(), KERNEL_DS))
-		return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+		return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
 	if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
 	}
 	get_page(virt_to_page(uaddr));
 	spin_unlock(&current->mm->page_table_lock);
-	ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+	ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
 	put_page(virt_to_page(uaddr));
 	return ret;
 }
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a4..bb1a7eed42ce 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
 		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		  "m" (*uaddr) : "cc");
 
-int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
 	return ret;
 }
 
-int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
+			     u32 oldval, u32 newval)
 {
 	int ret;
 
 	asm volatile(
 		"   sacf 256\n"
 		"0: cs   %1,%4,0(%5)\n"
-		"1: lr   %0,%1\n"
+		"1: la   %0,0\n"
 		"2: sacf 0\n"
 		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
 		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
 		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
 		: "cc", "memory" );
+	*uval = oldval;
 	return ret;
 }
 
diff --git a/arch/s390/math-emu/Makefile b/arch/s390/math-emu/Makefile
index c84890341052..51d399549f60 100644
--- a/arch/s390/math-emu/Makefile
+++ b/arch/s390/math-emu/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_MATHEMU) := math.o
 
-EXTRA_CFLAGS := -I$(src) -Iinclude/math-emu -w
+ccflags-y := -I$(src) -Iinclude/math-emu -w
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 6fbc6f3fbdf2..d98fe9004a52 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -6,3 +6,4 @@ obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
 	    page-states.o gup.o
 obj-$(CONFIG_CMM) += cmm.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 000000000000..122ffbd08ce0
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+
+static void change_page_attr(unsigned long addr, int numpages,
+			     pte_t (*set) (pte_t))
+{
+	pte_t *ptep, pte;
+	pmd_t *pmdp;
+	pud_t *pudp;
+	pgd_t *pgdp;
+	int i;
+
+	for (i = 0; i < numpages; i++) {
+		pgdp = pgd_offset(&init_mm, addr);
+		pudp = pud_offset(pgdp, addr);
+		pmdp = pmd_offset(pudp, addr);
+		if (pmd_huge(*pmdp)) {
+			WARN_ON_ONCE(1);
+			continue;
+		}
+		ptep = pte_offset_kernel(pmdp, addr + i * PAGE_SIZE);
+
+		pte = *ptep;
+		pte = set(pte);
+		ptep_invalidate(&init_mm, addr + i * PAGE_SIZE, ptep);
+		*ptep = pte;
+	}
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	change_page_attr(addr, numpages, pte_wrprotect);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_memory_ro);
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	change_page_attr(addr, numpages, pte_mkwrite);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_memory_rw);
+
+/* not possible */
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(set_memory_nx);
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 537b2d840e69..524c4b615821 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,4 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o  \
 		timer_int.o )
 
-oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o
+oprofile-$(CONFIG_64BIT)	+= hwsampler.o
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
new file mode 100644
index 000000000000..3d48f4db246d
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.c
@@ -0,0 +1,1256 @@
+/**
+ * arch/s390/oprofile/hwsampler.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/semaphore.h>
+#include <linux/oom.h>
+#include <linux/oprofile.h>
+
+#include <asm/lowcore.h>
+#include <asm/s390_ext.h>
+
+#include "hwsampler.h"
+
+#define MAX_NUM_SDB 511
+#define MIN_NUM_SDB 1
+
+#define ALERT_REQ_MASK   0x4000000000000000ul
+#define BUFFER_FULL_MASK 0x8000000000000000ul
+
+#define EI_IEA      (1 << 31)	/* invalid entry address              */
+#define EI_ISE      (1 << 30)	/* incorrect SDBT entry               */
+#define EI_PRA      (1 << 29)	/* program request alert              */
+#define EI_SACA     (1 << 23)	/* sampler authorization change alert */
+#define EI_LSDA     (1 << 22)	/* loss of sample data alert          */
+
+DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+
+struct hws_execute_parms {
+	void *buffer;
+	signed int rc;
+};
+
+DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
+
+static DEFINE_MUTEX(hws_sem);
+static DEFINE_MUTEX(hws_sem_oom);
+
+static unsigned char hws_flush_all;
+static unsigned int hws_oom;
+static struct workqueue_struct *hws_wq;
+
+static unsigned int hws_state;
+enum {
+	HWS_INIT = 1,
+	HWS_DEALLOCATED,
+	HWS_STOPPED,
+	HWS_STARTED,
+	HWS_STOPPING };
+
+/* set to 1 if called by kernel during memory allocation */
+static unsigned char oom_killer_was_active;
+/* size of SDBT and SDB as of allocate API */
+static unsigned long num_sdbt = 100;
+static unsigned long num_sdb = 511;
+/* sampling interval (machine cycles) */
+static unsigned long interval;
+
+static unsigned long min_sampler_rate;
+static unsigned long max_sampler_rate;
+
+static int ssctl(void *buffer)
+{
+	int cc;
+
+	/* set in order to detect a program check */
+	cc = 1;
+
+	asm volatile(
+		"0: .insn s,0xB2870000,0(%1)\n"
+		"1: ipm %0\n"
+		"   srl %0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (buffer)
+		: "m" (*((struct hws_ssctl_request_block *)buffer))
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0 ;
+}
+
+static int qsi(void *buffer)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0: .insn s,0xB2860000,0(%1)\n"
+		"1: lhi %0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (buffer)
+		: "m" (*((struct hws_qsi_info_block *)buffer))
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+static void execute_qsi(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = qsi(ep->buffer);
+}
+
+static void execute_ssctl(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = ssctl(ep->buffer);
+}
+
+static int smp_ctl_ssctl_stop(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 0;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc) {
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+		dump_stack();
+	}
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.es || cb->qsi.cs) {
+		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
+		dump_stack();
+	}
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_deactivate(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.cs)
+		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.h = 1;
+	cb->ssctl.tear = cb->first_sdbt;
+	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
+	cb->ssctl.interval = interval;
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 1;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+	if (ep.rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
+
+	return rc;
+}
+
+static int smp_ctl_qsi(int cpu)
+{
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	return ep.rc;
+}
+
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *)v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* prototypes for external interrupt handler and worker */
+static void hws_ext_handler(unsigned int ext_int_code,
+				unsigned int param32, unsigned long param64);
+
+static void worker(struct work_struct *work);
+
+static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
+				unsigned long *dear);
+
+static void init_all_cpu_buffers(void)
+{
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		memset(cb, 0, sizeof(struct hws_cpu_buffer));
+	}
+}
+
+static int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+static unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
+
+static int prepare_cpu_buffers(void)
+{
+	int cpu;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		atomic_set(&cb->ext_params, 0);
+		cb->worker_entry = 0;
+		cb->sample_overflow = 0;
+		cb->req_alert = 0;
+		cb->incorrect_sdbt_entry = 0;
+		cb->invalid_entry_address = 0;
+		cb->loss_of_sample_data = 0;
+		cb->sample_auth_change_alert = 0;
+		cb->finish = 0;
+		cb->oom = 0;
+		cb->stop_mode = 0;
+	}
+
+	return rc;
+}
+
+/*
+ * allocate_sdbt() - allocate sampler memory
+ * @cpu: the cpu for which sampler memory is allocated
+ *
+ * A 4K page is allocated for each requested SDBT.
+ * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
+ * Set ALERT_REQ mask in each SDBs trailer.
+ * Returns zero if successful, <0 otherwise.
+ */
+static int allocate_sdbt(int cpu)
+{
+	int j, k, rc;
+	unsigned long *sdbt;
+	unsigned long  sdb;
+	unsigned long *tail;
+	unsigned long *trailer;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->first_sdbt)
+		return -EINVAL;
+
+	sdbt = NULL;
+	tail = sdbt;
+
+	for (j = 0; j < num_sdbt; j++) {
+		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+
+		mutex_lock(&hws_sem_oom);
+		/* OOM killer might have been activated */
+		barrier();
+		if (oom_killer_was_active || !sdbt) {
+			if (sdbt)
+				free_page((unsigned long)sdbt);
+
+			goto allocate_sdbt_error;
+		}
+		if (cb->first_sdbt == 0)
+			cb->first_sdbt = (unsigned long)sdbt;
+
+		/* link current page to tail of chain */
+		if (tail)
+			*tail = (unsigned long)(void *)sdbt + 1;
+
+		mutex_unlock(&hws_sem_oom);
+
+		for (k = 0; k < num_sdb; k++) {
+			/* get and set SDB page */
+			sdb = get_zeroed_page(GFP_KERNEL);
+
+			mutex_lock(&hws_sem_oom);
+			/* OOM killer might have been activated */
+			barrier();
+			if (oom_killer_was_active || !sdb) {
+				if (sdb)
+					free_page(sdb);
+
+				goto allocate_sdbt_error;
+			}
+			*sdbt = sdb;
+			trailer = trailer_entry_ptr(*sdbt);
+			*trailer = ALERT_REQ_MASK;
+			sdbt++;
+			mutex_unlock(&hws_sem_oom);
+		}
+		tail = sdbt;
+	}
+	mutex_lock(&hws_sem_oom);
+	if (oom_killer_was_active)
+		goto allocate_sdbt_error;
+
+	rc = 0;
+	if (tail)
+		*tail = (unsigned long)
+			((void *)cb->first_sdbt) + 1;
+
+allocate_sdbt_exit:
+	mutex_unlock(&hws_sem_oom);
+	return rc;
+
+allocate_sdbt_error:
+	rc = -ENOMEM;
+	goto allocate_sdbt_exit;
+}
+
+/*
+ * deallocate_sdbt() - deallocate all sampler memory
+ *
+ * For each online CPU all SDBT trees are deallocated.
+ * Returns the number of freed pages.
+ */
+static int deallocate_sdbt(void)
+{
+	int cpu;
+	int counter;
+
+	counter = 0;
+
+	for_each_online_cpu(cpu) {
+		unsigned long start;
+		unsigned long sdbt;
+		unsigned long *curr;
+		struct hws_cpu_buffer *cb;
+
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->first_sdbt)
+			continue;
+
+		sdbt = cb->first_sdbt;
+		curr = (unsigned long *) sdbt;
+		start = sdbt;
+
+		/* we'll free the SDBT after all SDBs are processed... */
+		while (1) {
+			if (!*curr || !sdbt)
+				break;
+
+			/* watch for link entry reset if found */
+			if (is_link_entry(curr)) {
+				curr = get_next_sdbt(curr);
+				if (sdbt)
+					free_page(sdbt);
+
+				/* we are done if we reach the start */
+				if ((unsigned long) curr == start)
+					break;
+				else
+					sdbt = (unsigned long) curr;
+			} else {
+				/* process SDB pointer */
+				if (*curr) {
+					free_page(*curr);
+					curr++;
+				}
+			}
+			counter++;
+		}
+		cb->first_sdbt = 0;
+	}
+	return counter;
+}
+
+static int start_sampling(int cpu)
+{
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
+		goto start_exit;
+	}
+
+	rc = -EINVAL;
+	if (!cb->qsi.es) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
+		goto start_exit;
+	}
+
+	if (!cb->qsi.cs) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
+		goto start_exit;
+	}
+
+	printk(KERN_INFO
+		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
+		cpu, interval);
+
+	rc = 0;
+
+start_exit:
+	return rc;
+}
+
+static int stop_sampling(int cpu)
+{
+	unsigned long v;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = smp_ctl_qsi(cpu);
+	WARN_ON(rc);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (!rc && !cb->qsi.es)
+		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
+
+	rc = smp_ctl_ssctl_stop(cpu);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
+				cpu, rc);
+		goto stop_exit;
+	}
+
+	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
+
+stop_exit:
+	v = cb->req_alert;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->loss_of_sample_data;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->invalid_entry_address;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->incorrect_sdbt_entry;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->sample_auth_change_alert;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Sample authorization change,"
+				" count=%lu.\n", cpu, v);
+
+	return rc;
+}
+
+static int check_hardware_prerequisites(void)
+{
+	unsigned long long facility_bits[2];
+
+	memcpy(facility_bits, S390_lowcore.stfle_fac_list, 32);
+	if (!(facility_bits[1] & (1ULL << 59)))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+/*
+ * hws_oom_callback() - the OOM callback function
+ *
+ * In case the callback is invoked during memory allocation for the
+ *  hw sampler, all obtained memory is deallocated and a flag is set
+ *  so main sampler memory allocation can exit with a failure code.
+ * In case the callback is invoked during sampling the hw sampler
+ *  is deactivated for all CPUs.
+ */
+static int hws_oom_callback(struct notifier_block *nfb,
+	unsigned long dummy, void *parm)
+{
+	unsigned long *freed;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	freed = parm;
+
+	mutex_lock(&hws_sem_oom);
+
+	if (hws_state == HWS_DEALLOCATED) {
+		/* during memory allocation */
+		if (oom_killer_was_active == 0) {
+			oom_killer_was_active = 1;
+			*freed += deallocate_sdbt();
+		}
+	} else {
+		int i;
+		cpu = get_cpu();
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->oom) {
+			for_each_online_cpu(i) {
+				smp_ctl_ssctl_deactivate(i);
+				cb->oom = 1;
+			}
+			cb->finish = 1;
+
+			printk(KERN_INFO
+				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
+				cpu);
+		}
+	}
+
+	mutex_unlock(&hws_sem_oom);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hws_oom_notifier = {
+	.notifier_call = hws_oom_callback
+};
+
+static int hws_cpu_callback(struct notifier_block *nfb,
+	unsigned long action, void *hcpu)
+{
+	/* We do not have sampler space available for all possible CPUs.
+	   All CPUs should be online when hw sampling is activated. */
+	return NOTIFY_BAD;
+}
+
+static struct notifier_block hws_cpu_notifier = {
+	.notifier_call = hws_cpu_callback
+};
+
+/**
+ * hwsampler_deactivate() - set hardware sampling temporarily inactive
+ * @cpu:  specifies the CPU to be set inactive.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deactivate(unsigned int cpu)
+{
+	/*
+	 * Deactivate hw sampling temporarily and flush the buffer
+	 * by pushing all the pending samples to oprofile buffer.
+	 *
+	 * This function can be called under one of the following conditions:
+	 *     Memory unmap, task is exiting.
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.cs) {
+			rc = smp_ctl_ssctl_deactivate(cpu);
+			if (rc) {
+				printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
+				cb->finish = 1;
+				hws_state = HWS_STOPPING;
+			} else  {
+				hws_flush_all = 1;
+				/* Add work to queue to read pending samples.*/
+				queue_work_on(cpu, hws_wq, &cb->worker);
+			}
+		}
+	}
+	mutex_unlock(&hws_sem);
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	return rc;
+}
+
+/**
+ * hwsampler_activate() - activate/resume hardware sampling which was deactivated
+ * @cpu:  specifies the CPU to be set active.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_activate(unsigned int cpu)
+{
+	/*
+	 * Re-activate hw sampling. This should be called in pair with
+	 * hwsampler_deactivate().
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (!cb->qsi.cs) {
+			hws_flush_all = 0;
+			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+			if (rc) {
+				printk(KERN_ERR
+				"CPU %d, CPUMF activate sampling failed.\n",
+					 cpu);
+			}
+		}
+	}
+
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+static void hws_ext_handler(unsigned int ext_int_code,
+			    unsigned int param32, unsigned long param64)
+{
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	cpu = smp_processor_id();
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	atomic_xchg(
+			&cb->ext_params,
+			atomic_read(&cb->ext_params)
+				| S390_lowcore.ext_params);
+
+	if (hws_wq)
+		queue_work(hws_wq, &cb->worker);
+}
+
+static int check_qsi_on_setup(void)
+{
+	int rc;
+	unsigned int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (rc)
+			return -EOPNOTSUPP;
+
+		if (!cb->qsi.as) {
+			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
+			return -EINVAL;
+		}
+
+		if (cb->qsi.es) {
+			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				return -EINVAL;
+
+			printk(KERN_INFO
+				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
+		}
+	}
+	return 0;
+}
+
+static int check_qsi_on_start(void)
+{
+	unsigned int cpu;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+
+		if (!cb->qsi.as)
+			return -EINVAL;
+
+		if (cb->qsi.es)
+			return -EINVAL;
+
+		if (cb->qsi.cs)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static void worker_on_start(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	cb->worker_entry = cb->first_sdbt;
+}
+
+static int worker_check_error(unsigned int cpu, int ext_params)
+{
+	int rc;
+	unsigned long *sdbt;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	if (!sdbt || !*sdbt)
+		return -EINVAL;
+
+	if (ext_params & EI_IEA)
+		cb->req_alert++;
+
+	if (ext_params & EI_LSDA)
+		cb->loss_of_sample_data++;
+
+	if (ext_params & EI_IEA) {
+		cb->invalid_entry_address++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & EI_ISE) {
+		cb->incorrect_sdbt_entry++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & EI_SACA) {
+		cb->sample_auth_change_alert++;
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static void worker_on_finish(unsigned int cpu)
+{
+	int rc, i;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->finish) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.es) {
+			printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
+				cpu);
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				printk(KERN_INFO
+					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
+					cpu);
+
+			for_each_online_cpu(i) {
+				if (i == cpu)
+					continue;
+				if (!cb->finish) {
+					cb->finish = 1;
+					queue_work_on(i, hws_wq,
+						&cb->worker);
+				}
+			}
+		}
+	}
+}
+
+static void worker_on_interrupt(unsigned int cpu)
+{
+	unsigned long *sdbt;
+	unsigned char done;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	done = 0;
+	/* do not proceed if stop was entered,
+	 * forget the buffers not yet processed */
+	while (!done && !cb->stop_mode) {
+		unsigned long *trailer;
+		struct hws_trailer_entry *te;
+		unsigned long *dear = 0;
+
+		trailer = trailer_entry_ptr(*sdbt);
+		/* leave loop if no more work to do */
+		if (!(*trailer & BUFFER_FULL_MASK)) {
+			done = 1;
+			if (!hws_flush_all)
+				continue;
+		}
+
+		te = (struct hws_trailer_entry *)trailer;
+		cb->sample_overflow += te->overflow;
+
+		add_samples_to_oprofile(cpu, sdbt, dear);
+
+		/* reset trailer */
+		xchg((unsigned char *) te, 0x40);
+
+		/* advance to next sdb slot in current sdbt */
+		sdbt++;
+		/* in case link bit is set use address w/o link bit */
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		cb->worker_entry = (unsigned long)sdbt;
+	}
+}
+
+static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
+		unsigned long *dear)
+{
+	struct hws_data_entry *sample_data_ptr;
+	unsigned long *trailer;
+
+	trailer = trailer_entry_ptr(*sdbt);
+	if (dear) {
+		if (dear > trailer)
+			return;
+		trailer = dear;
+	}
+
+	sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+
+	while ((unsigned long *)sample_data_ptr < trailer) {
+		struct pt_regs *regs = NULL;
+		struct task_struct *tsk = NULL;
+
+		/*
+		 * Check sampling mode, 1 indicates basic (=customer) sampling
+		 * mode.
+		 */
+		if (sample_data_ptr->def != 1) {
+			/* sample slot is not yet written */
+			break;
+		} else {
+			/* make sure we don't use it twice,
+			 * the next time the sampler will set it again */
+			sample_data_ptr->def = 0;
+		}
+
+		/* Get pt_regs. */
+		if (sample_data_ptr->P == 1) {
+			/* userspace sample */
+			unsigned int pid = sample_data_ptr->prim_asn;
+			rcu_read_lock();
+			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
+			if (tsk)
+				regs = task_pt_regs(tsk);
+			rcu_read_unlock();
+		} else {
+			/* kernelspace sample */
+			regs = task_pt_regs(current);
+		}
+
+		mutex_lock(&hws_sem);
+		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
+				!sample_data_ptr->P, tsk);
+		mutex_unlock(&hws_sem);
+
+		sample_data_ptr++;
+	}
+}
+
+static void worker(struct work_struct *work)
+{
+	unsigned int cpu;
+	int ext_params;
+	struct hws_cpu_buffer *cb;
+
+	cb = container_of(work, struct hws_cpu_buffer, worker);
+	cpu = smp_processor_id();
+	ext_params = atomic_xchg(&cb->ext_params, 0);
+
+	if (!cb->worker_entry)
+		worker_on_start(cpu);
+
+	if (worker_check_error(cpu, ext_params))
+		return;
+
+	if (!cb->finish)
+		worker_on_interrupt(cpu);
+
+	if (cb->finish)
+		worker_on_finish(cpu);
+}
+
+/**
+ * hwsampler_allocate() - allocate memory for the hardware sampler
+ * @sdbt:  number of SDBTs per online CPU (must be > 0)
+ * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
+{
+	int cpu, rc;
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_DEALLOCATED)
+		goto allocate_exit;
+
+	if (sdbt < 1)
+		goto allocate_exit;
+
+	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
+		goto allocate_exit;
+
+	num_sdbt = sdbt;
+	num_sdb = sdb;
+
+	oom_killer_was_active = 0;
+	register_oom_notifier(&hws_oom_notifier);
+
+	for_each_online_cpu(cpu) {
+		if (allocate_sdbt(cpu)) {
+			unregister_oom_notifier(&hws_oom_notifier);
+			goto allocate_error;
+		}
+	}
+	unregister_oom_notifier(&hws_oom_notifier);
+	if (oom_killer_was_active)
+		goto allocate_error;
+
+	hws_state = HWS_STOPPED;
+	rc = 0;
+
+allocate_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+
+allocate_error:
+	rc = -ENOMEM;
+	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
+	goto allocate_exit;
+}
+
+/**
+ * hwsampler_deallocate() - deallocate hardware sampler memory
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deallocate()
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto deallocate_exit;
+
+	smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+	deallocate_sdbt();
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+deallocate_exit:
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+long hwsampler_query_min_interval(void)
+{
+	if (min_sampler_rate)
+		return min_sampler_rate;
+	else
+		return -EINVAL;
+}
+
+long hwsampler_query_max_interval(void)
+{
+	if (max_sampler_rate)
+		return max_sampler_rate;
+	else
+		return -EINVAL;
+}
+
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	return cb->sample_overflow;
+}
+
+int hwsampler_setup()
+{
+	int rc;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state)
+		goto setup_exit;
+
+	hws_state = HWS_INIT;
+
+	init_all_cpu_buffers();
+
+	rc = check_hardware_prerequisites();
+	if (rc)
+		goto setup_exit;
+
+	rc = check_qsi_on_setup();
+	if (rc)
+		goto setup_exit;
+
+	rc = -EINVAL;
+	hws_wq = create_workqueue("hwsampler");
+	if (!hws_wq)
+		goto setup_exit;
+
+	register_cpu_notifier(&hws_cpu_notifier);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		INIT_WORK(&cb->worker, worker);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
+			if (min_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different min sampler rate values.\n");
+				if (min_sampler_rate < cb->qsi.min_sampl_rate)
+					min_sampler_rate =
+						cb->qsi.min_sampl_rate;
+			} else
+				min_sampler_rate = cb->qsi.min_sampl_rate;
+		}
+		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
+			if (max_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different max sampler rate values.\n");
+				if (max_sampler_rate > cb->qsi.max_sampl_rate)
+					max_sampler_rate =
+						cb->qsi.max_sampl_rate;
+			} else
+				max_sampler_rate = cb->qsi.max_sampl_rate;
+		}
+	}
+	register_external_interrupt(0x1407, hws_ext_handler);
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+setup_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+}
+
+int hwsampler_shutdown()
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
+		mutex_unlock(&hws_sem);
+
+		if (hws_wq)
+			flush_workqueue(hws_wq);
+
+		mutex_lock(&hws_sem);
+
+		if (hws_state == HWS_STOPPED) {
+			smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+			deallocate_sdbt();
+		}
+		if (hws_wq) {
+			destroy_workqueue(hws_wq);
+			hws_wq = NULL;
+		}
+
+		unregister_external_interrupt(0x1407, hws_ext_handler);
+		hws_state = HWS_INIT;
+		rc = 0;
+	}
+	mutex_unlock(&hws_sem);
+
+	unregister_cpu_notifier(&hws_cpu_notifier);
+
+	return rc;
+}
+
+/**
+ * hwsampler_start_all() - start hardware sampling on all online CPUs
+ * @rate:  specifies the used interval when samples are taken
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_start_all(unsigned long rate)
+{
+	int rc, cpu;
+
+	mutex_lock(&hws_sem);
+
+	hws_oom = 0;
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto start_all_exit;
+
+	interval = rate;
+
+	/* fail if rate is not valid */
+	if (interval < min_sampler_rate || interval > max_sampler_rate)
+		goto start_all_exit;
+
+	rc = check_qsi_on_start();
+	if (rc)
+		goto start_all_exit;
+
+	rc = prepare_cpu_buffers();
+	if (rc)
+		goto start_all_exit;
+
+	for_each_online_cpu(cpu) {
+		rc = start_sampling(cpu);
+		if (rc)
+			break;
+	}
+	if (rc) {
+		for_each_online_cpu(cpu) {
+			stop_sampling(cpu);
+		}
+		goto start_all_exit;
+	}
+	hws_state = HWS_STARTED;
+	rc = 0;
+
+start_all_exit:
+	mutex_unlock(&hws_sem);
+
+	if (rc)
+		return rc;
+
+	register_oom_notifier(&hws_oom_notifier);
+	hws_oom = 1;
+	hws_flush_all = 0;
+	/* now let them in, 1407 CPUMF external interrupts */
+	smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */
+
+	return 0;
+}
+
+/**
+ * hwsampler_stop_all() - stop hardware sampling on all online CPUs
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_stop_all()
+{
+	int tmp_rc, rc, cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = 0;
+	if (hws_state == HWS_INIT) {
+		mutex_unlock(&hws_sem);
+		return rc;
+	}
+	hws_state = HWS_STOPPING;
+	mutex_unlock(&hws_sem);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		cb->stop_mode = 1;
+		tmp_rc = stop_sampling(cpu);
+		if (tmp_rc)
+			rc = tmp_rc;
+	}
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	mutex_lock(&hws_sem);
+	if (hws_oom) {
+		unregister_oom_notifier(&hws_oom_notifier);
+		hws_oom = 0;
+	}
+	hws_state = HWS_STOPPED;
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
new file mode 100644
index 000000000000..8c72b59316b5
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.h
@@ -0,0 +1,113 @@
+/*
+ * CPUMF HW sampler functions and internal structures
+ *
+ *    Copyright IBM Corp. 2010
+ *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+
+struct hws_qsi_info_block          /* QUERY SAMPLING information block  */
+{ /* Bit(s) */
+	unsigned int b0_13:14;      /* 0-13: zeros                       */
+	unsigned int as:1;          /* 14: sampling authorisation control*/
+	unsigned int b15_21:7;      /* 15-21: zeros                      */
+	unsigned int es:1;          /* 22: sampling enable control       */
+	unsigned int b23_29:7;      /* 23-29: zeros                      */
+	unsigned int cs:1;          /* 30: sampling activation control   */
+	unsigned int:1;             /* 31: reserved                      */
+	unsigned int bsdes:16;      /* 4-5: size of sampling entry       */
+	unsigned int:16;            /* 6-7: reserved                     */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;         /* 24-31: TEAR contents              */
+	unsigned long dear;         /* 32-39: DEAR contents              */
+	unsigned int rsvrd0;        /* 40-43: reserved                   */
+	unsigned int cpu_speed;     /* 44-47: CPU speed                  */
+	unsigned long long rsvrd1;  /* 48-55: reserved                   */
+	unsigned long long rsvrd2;  /* 56-63: reserved                   */
+};
+
+struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
+{ /* bytes 0 - 7  Bit(s) */
+	unsigned int s:1;           /* 0: maximum buffer indicator       */
+	unsigned int h:1;           /* 1: part. level reserved for VM use*/
+	unsigned long b2_53:52;     /* 2-53: zeros                       */
+	unsigned int es:1;          /* 54: sampling enable control       */
+	unsigned int b55_61:7;      /* 55-61: - zeros                    */
+	unsigned int cs:1;          /* 62: sampling activation control   */
+	unsigned int b63:1;         /* 63: zero                          */
+	unsigned long interval;     /* 8-15: sampling interval           */
+	unsigned long tear;         /* 16-23: TEAR contents              */
+	unsigned long dear;         /* 24-31: DEAR contents              */
+	/* 32-63:                                                        */
+	unsigned long rsvrd1;       /* reserved                          */
+	unsigned long rsvrd2;       /* reserved                          */
+	unsigned long rsvrd3;       /* reserved                          */
+	unsigned long rsvrd4;       /* reserved                          */
+};
+
+struct hws_cpu_buffer {
+	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
+	unsigned long worker_entry;
+	unsigned long sample_overflow;  /* taken from SDB ...            */
+	struct hws_qsi_info_block qsi;
+	struct hws_ssctl_request_block ssctl;
+	struct work_struct worker;
+	atomic_t ext_params;
+	unsigned long req_alert;
+	unsigned long loss_of_sample_data;
+	unsigned long invalid_entry_address;
+	unsigned long incorrect_sdbt_entry;
+	unsigned long sample_auth_change_alert;
+	unsigned int finish:1;
+	unsigned int oom:1;
+	unsigned int stop_mode:1;
+};
+
+struct hws_data_entry {
+	unsigned int def:16;        /* 0-15  Data Entry Format           */
+	unsigned int R:4;           /* 16-19 reserved                    */
+	unsigned int U:4;           /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;           /* zeros                             */
+	unsigned int T:1;           /* 26 PSW DAT mode                   */
+	unsigned int W:1;           /* 27 PSW wait state                 */
+	unsigned int P:1;           /* 28 PSW Problem state              */
+	unsigned int AS:2;          /* 29-30 PSW address-space control   */
+	unsigned int I:1;           /* 31 entry valid or invalid         */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN                       */
+	unsigned long long ia;      /* Instruction Address               */
+	unsigned long long lpp;     /* Logical-Partition Program Param.  */
+	unsigned long long vpp;     /* Virtual-Machine Program Param.    */
+};
+
+struct hws_trailer_entry {
+	unsigned int f:1;           /* 0 - Block Full Indicator          */
+	unsigned int a:1;           /* 1 - Alert request control         */
+	unsigned long:62;           /* 2 - 63: Reserved                  */
+	unsigned long overflow;     /* 64 - sample Overflow count        */
+	unsigned long timestamp;    /* 16 - time-stamp                   */
+	unsigned long timestamp1;   /*                                   */
+	unsigned long reserved1;    /* 32 -Reserved                      */
+	unsigned long reserved2;    /*                                   */
+	unsigned long progusage1;   /* 48 - reserved for programming use */
+	unsigned long progusage2;   /*                                   */
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+long hwsampler_query_min_interval(void);
+long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 7a995113b918..c63d7e58352b 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -4,23 +4,193 @@
  * S390 Version
  *   Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
+ *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
+ *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2011 OProfile authors
  */
 
 #include <linux/oprofile.h>
 #include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
 
+#include "../../../drivers/oprofile/oprof.h"
 
 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 
-int __init oprofile_arch_init(struct oprofile_operations* ops)
+#ifdef CONFIG_64BIT
+
+#include "hwsampler.h"
+
+#define DEFAULT_INTERVAL	4096
+
+#define DEFAULT_SDBT_BLOCKS	1
+#define DEFAULT_SDB_BLOCKS	511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+static unsigned long oprofile_min_interval;
+static unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static int hwsampler_file;
+static int hwsampler_running;	/* start_mutex must be held to change */
+
+static struct oprofile_operations timer_ops;
+
+static int oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	hwsampler_running = hwsampler_file;
+
+	if (!hwsampler_running)
+		return timer_ops.start();
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+	if (!hwsampler_running) {
+		timer_ops.stop();
+		return;
+	}
+
+	hwsampler_stop_all();
+	hwsampler_deallocate();
+	return;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+		size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+		size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (oprofile_started)
+		/*
+		 * save to do without locking as we set
+		 * hwsampler_running in start() when start_mutex is
+		 * held
+		 */
+		return -EBUSY;
+
+	hwsampler_file = val;
+
+	return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+	.read		= hwsampler_read,
+	.write		= hwsampler_write,
+};
+
+static int oprofile_create_hwsampling_files(struct super_block *sb,
+						struct dentry *root)
+{
+	struct dentry *hw_dir;
+
+	/* reinitialize default values */
+	hwsampler_file = 1;
+
+	hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+	if (!hw_dir)
+		return -EINVAL;
+
+	oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+	oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+				&oprofile_hw_interval);
+	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+				&oprofile_min_interval);
+	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+				&oprofile_max_interval);
+	oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+				&oprofile_sdbt_blocks);
+
+	return 0;
+}
+
+static int oprofile_hwsampler_init(struct oprofile_operations *ops)
+{
+	if (hwsampler_setup())
+		return -ENODEV;
+
+	/*
+	 * create hwsampler files only if hwsampler_setup() succeeds.
+	 */
+	oprofile_min_interval = hwsampler_query_min_interval();
+	if (oprofile_min_interval < 0) {
+		oprofile_min_interval = 0;
+		return -ENODEV;
+	}
+	oprofile_max_interval = hwsampler_query_max_interval();
+	if (oprofile_max_interval < 0) {
+		oprofile_max_interval = 0;
+		return -ENODEV;
+	}
+
+	if (oprofile_timer_init(ops))
+		return -ENODEV;
+
+	printk(KERN_INFO "oprofile: using hardware sampling\n");
+
+	memcpy(&timer_ops, ops, sizeof(timer_ops));
+
+	ops->start = oprofile_hwsampler_start;
+	ops->stop = oprofile_hwsampler_stop;
+	ops->create_files = oprofile_create_hwsampling_files;
+
+	return 0;
+}
+
+static void oprofile_hwsampler_exit(void)
+{
+	oprofile_timer_exit();
+	hwsampler_shutdown();
+}
+
+#endif /* CONFIG_64BIT */
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
+
+#ifdef CONFIG_64BIT
+	return oprofile_hwsampler_init(ops);
+#else
 	return -ENODEV;
+#endif
 }
 
 void oprofile_arch_exit(void)
 {
+#ifdef CONFIG_64BIT
+	oprofile_hwsampler_exit();
+#endif
 }