Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky: "There are a couple of new things for s390 with this merge request: - a new scheduling domain "drawer" is added to reflect the unusual topology found on z13 machines. Performance tests showed up to 8 percent gain with the additional domain. - the new crc-32 checksum crypto module uses the vector-galois-field multiply and sum SIMD instruction to speed up crc-32 and crc-32c. - proper __ro_after_init support, this requires RO_AFTER_INIT_DATA in the generic vmlinux.lds linker script definitions. - kcov instrumentation support. A prerequisite for that is the inline assembly basic block cleanup, which is the reason for the net/iucv/iucv.c change. - support for 2GB pages is added to the hugetlbfs backend. Then there are two removals: - the oprofile hardware sampling support is dead code and is removed. The oprofile user space uses the perf interface nowadays. - the ETR clock synchronization is removed, this has been superseeded be the STP clock synchronization. And it always has been "interesting" code.. And the usual bug fixes and cleanups" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (82 commits) s390/pci: Delete an unnecessary check before the function call "pci_dev_put" s390/smp: clean up a condition s390/cio/chp : Remove deprecated create_singlethread_workqueue s390/chsc: improve channel path descriptor determination s390/chsc: sanitize fmt check for chp_desc determination s390/cio: make fmt1 channel path descriptor optional s390/chsc: fix ioctl CHSC_INFO_CU command s390/cio/device_ops: fix kernel doc s390/cio: allow to reset channel measurement block s390/console: Make preferred console handling more consistent s390/mm: fix gmap tlb flush issues s390/mm: add support for 2GB hugepages s390: have unique symbol for __switch_to address s390/cpuinfo: show maximum thread id s390/ptrace: clarify bits in the per_struct s390: stack address vs thread_info s390: remove pointless load within __switch_to s390: enable kcov support s390/cpumf: use basic block for ecctr inline assembly s390/hypfs: use basic block for diag inline assembly ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-26 21:22:51 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-07-26 21:22:51 +0200
commit: 015cd867e566e3a27b5e8062eb24eeaa4d77297f (patch)
tree: d96c90119b3c454b5c5cfb07f2409a87bbd29754
parent: Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff)
parent: s390/pci: Delete an unnecessary check before the function call "pci_dev_put" (diff)
download: linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.tar.xz
linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.zip
111 files changed, 2861 insertions, 3961 deletions
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index 12b1b25b4da9..f722f227a73b 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -20,48 +20,70 @@ to /proc/cpuinfo output of some architectures:
 	identifier (rather than the kernel's).	The actual value is
 	architecture and platform dependent.
 
-4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
+
+	the drawer ID of cpuX. Typically it is the hardware platform's
+	identifier (rather than the kernel's).	The actual value is
+	architecture and platform dependent.
+
+5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	core as cpuX.
 
-5) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
+6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	core as cpuX.
 
-6) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	physical_package_id.
 
-7) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
+8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	physical_package_id.
 
-8) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
 
 	internal kernel map of cpuX's hardware threads within the same
 	book_id.
 
-9) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
+10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
 
 	human-readable list of cpuX's hardware threads within the same
 	book_id.
 
+11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
+
+	internal kernel map of cpuX's hardware threads within the same
+	drawer_id.
+
+12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
+
+	human-readable list of cpuX's hardware threads within the same
+	drawer_id.
+
 To implement it in an architecture-neutral way, a new source file,
-drivers/base/topology.c, is to export the 6 or 9 attributes. The three book
-related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.
+drivers/base/topology.c, is to export the 6 to 12 attributes. The book
+and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
+and CONFIG_SCHED_DRAWER are selected.
+
+CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
+they reflect the cpu and cache hierarchy.
 
 For an architecture to support this feature, it must define some of
 these macros in include/asm-XXX/topology.h:
 #define topology_physical_package_id(cpu)
 #define topology_core_id(cpu)
 #define topology_book_id(cpu)
+#define topology_drawer_id(cpu)
 #define topology_sibling_cpumask(cpu)
 #define topology_core_cpumask(cpu)
 #define topology_book_cpumask(cpu)
+#define topology_drawer_cpumask(cpu)
 
 The type of **_id macros is int.
 The type of **_cpumask macros is (const) struct cpumask *. The latter
@@ -78,6 +100,8 @@ not defined by include/asm-XXX/topology.h:
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
+For architectures that don't support drawes (CONFIG_SCHED_DRAWER) there are
+no default definitions for topology_drawer_id() and topology_drawer_cpumask().
 
 Additionally, CPU topology information is provided under
 /sys/devices/system/cpu and includes these files.  The internal
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 17e33dbbf226..f1fac4d3dc51 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2794,8 +2794,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			timer: [X86] Force use of architectural NMI
 				timer mode (see also oprofile.timer
 				for generic hr timer mode)
-				[s390] Force legacy basic mode sampling
-                                (report cpu_type "timer")
 
 	oops=panic	Always panic on oopses. Default is to just kill the
 			process, but there is a small probability of
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 3da163383c93..61329fd62e89 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -405,7 +405,7 @@ Example:
 
 > ls /sys/kernel/debug/s390dbf/dasd
 flush  hex_ascii  level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1
+> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
 00 00974733272:680099 2 - 02 0006ad7e  07 ea 4a 90 | ....
 00 00974733272:682210 2 - 02 0006ade6  46 52 45 45 | FREE
 00 00974733272:682213 2 - 02 0006adf6  07 ea 4a 90 | ....
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a8c259059adf..9e607bf2d640 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -72,6 +72,7 @@ config S390
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
@@ -163,6 +164,7 @@ config S390
 	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
+	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select TTY
 	select VIRT_CPU_ACCOUNTING
@@ -477,6 +479,9 @@ config SCHED_MC
 config SCHED_BOOK
 	def_bool n
 
+config SCHED_DRAWER
+	def_bool n
+
 config SCHED_TOPOLOGY
 	def_bool y
 	prompt "Topology scheduler support"
@@ -484,6 +489,7 @@ config SCHED_TOPOLOGY
 	select SCHED_SMT
 	select SCHED_MC
 	select SCHED_BOOK
+	select SCHED_DRAWER
 	help
 	  Topology scheduler support improves the CPU scheduler's decision
 	  making when dealing with machines that have multi-threading,
@@ -605,16 +611,6 @@ config PCI_NR_FUNCTIONS
 	  This allows you to specify the maximum number of PCI functions which
 	  this kernel will support.
 
-config PCI_NR_MSI
-	int "Maximum number of MSI interrupts (64-32768)"
-	range 64 32768
-	default "256"
-	help
-	  This defines the number of virtual interrupts the kernel will
-	  provide for MSI interrupts. If you configure your system to have
-	  too few drivers will fail to allocate MSI interrupts for all
-	  PCI devices.
-
 source "drivers/pci/Kconfig"
 
 endif	# PCI
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 1dd210347e12..98ec652cc332 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -4,6 +4,8 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
+KCOV_INSTRUMENT := n
+
 targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
 targets += misc.o piggy.o sizes.h head.o
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index d5ec71b2ed02..889ea3450210 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -678,6 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f46a35115d2d..1bcfd764910a 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -616,6 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index ba0f2a58b8cd..13ff090139c8 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -615,6 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index 7f0b7cda6259..d1033de4c4ee 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -9,3 +9,6 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
 obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
 obj-$(CONFIG_S390_PRNG) += prng.o
 obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
+obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
+
+crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
new file mode 100644
index 000000000000..577ae1d4ae89
--- /dev/null
+++ b/arch/s390/crypto/crc32-vx.c
@@ -0,0 +1,310 @@
+/*
+ * Crypto-API module for CRC-32 algorithms implemented with the
+ * z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"crc32-vx"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <asm/fpu/api.h>
+
+
+#define CRC32_BLOCK_SIZE	1
+#define CRC32_DIGEST_SIZE	4
+
+#define VX_MIN_LEN		64
+#define VX_ALIGNMENT		16L
+#define VX_ALIGN_MASK		(VX_ALIGNMENT - 1)
+
+struct crc_ctx {
+	u32 key;
+};
+
+struct crc_desc_ctx {
+	u32 crc;
+};
+
+/* Prototypes for functions in assembly files */
+u32 crc32_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32_be_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used.   Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ *
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw)		    \
+	static u32 __pure ___fname(u32 crc,				    \
+				unsigned char const *data, size_t datalen)  \
+	{								    \
+		struct kernel_fpu vxstate;				    \
+		unsigned long prealign, aligned, remaining;		    \
+									    \
+		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
+			prealign = VX_ALIGNMENT -			    \
+				  ((unsigned long)data & VX_ALIGN_MASK);    \
+			datalen -= prealign;				    \
+			crc = ___crc32_sw(crc, data, prealign);		    \
+			data = (void *)((unsigned long)data + prealign);    \
+		}							    \
+									    \
+		if (datalen < VX_MIN_LEN)				    \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
+		aligned = datalen & ~VX_ALIGN_MASK;			    \
+		remaining = datalen & VX_ALIGN_MASK;			    \
+									    \
+		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
+		crc = ___crc32_vx(crc, data, aligned);			    \
+		kernel_fpu_end(&vxstate);				    \
+									    \
+		if (remaining)						    \
+			crc = ___crc32_sw(crc, data + aligned, remaining);  \
+									    \
+		return crc;						    \
+	}
+
+DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
+DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
+DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
+
+
+static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
+{
+	struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static int crc32_vx_init(struct shash_desc *desc)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+	return 0;
+}
+
+static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			   unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = le32_to_cpu(*(__le32 *)newkey);
+	return 0;
+}
+
+static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
+			     unsigned int newkeylen)
+{
+	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (newkeylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = be32_to_cpu(*(__be32 *)newkey);
+	return 0;
+}
+
+static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__le32 *)out = cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__be32 *)out = cpu_to_be32p(&ctx->crc);
+	return 0;
+}
+
+static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
+{
+	struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+	return 0;
+}
+
+static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			      u8 *out)
+{
+	*(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
+	return 0;
+}
+
+static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
+			     u8 *out)
+{
+	/*
+	 * Perform a final XOR with 0xFFFFFFFF to be in sync
+	 * with the generic crc32c shash implementation.
+	 */
+	*(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
+	return 0;
+}
+
+
+#define CRC32_VX_FINUP(alg, func)					      \
+	static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data,  \
+				   unsigned int datalen, u8 *out)	      \
+	{								      \
+		return __ ## alg ## _vx_finup(shash_desc_ctx(desc),	      \
+					      data, datalen, out);	      \
+	}
+
+CRC32_VX_FINUP(crc32le, crc32_le_vx)
+CRC32_VX_FINUP(crc32be, crc32_be_vx)
+CRC32_VX_FINUP(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_DIGEST(alg, func)					      \
+	static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
+				     unsigned int len, u8 *out)		      \
+	{								      \
+		return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm),    \
+					      data, len, out);		      \
+	}
+
+CRC32_VX_DIGEST(crc32le, crc32_le_vx)
+CRC32_VX_DIGEST(crc32be, crc32_be_vx)
+CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
+
+#define CRC32_VX_UPDATE(alg, func)					      \
+	static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
+				     unsigned int datalen)		      \
+	{								      \
+		struct crc_desc_ctx *ctx = shash_desc_ctx(desc);	      \
+		ctx->crc = func(ctx->crc, data, datalen);		      \
+		return 0;						      \
+	}
+
+CRC32_VX_UPDATE(crc32le, crc32_le_vx)
+CRC32_VX_UPDATE(crc32be, crc32_be_vx)
+CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
+
+
+static struct shash_alg crc32_vx_algs[] = {
+	/* CRC-32 LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32le_vx_update,
+		.final		=	crc32le_vx_final,
+		.finup		=	crc32le_vx_finup,
+		.digest		=	crc32le_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32",
+			.cra_driver_name = "crc32-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32 BE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32be_vx_setkey,
+		.update		=	crc32be_vx_update,
+		.final		=	crc32be_vx_final,
+		.finup		=	crc32be_vx_finup,
+		.digest		=	crc32be_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32be",
+			.cra_driver_name = "crc32be-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_zero,
+		},
+	},
+	/* CRC-32C LE */
+	{
+		.init		=	crc32_vx_init,
+		.setkey		=	crc32_vx_setkey,
+		.update		=	crc32c_vx_update,
+		.final		=	crc32c_vx_final,
+		.finup		=	crc32c_vx_finup,
+		.digest		=	crc32c_vx_digest,
+		.descsize	=	sizeof(struct crc_desc_ctx),
+		.digestsize	=	CRC32_DIGEST_SIZE,
+		.base		=	{
+			.cra_name	 = "crc32c",
+			.cra_driver_name = "crc32c-vx",
+			.cra_priority	 = 200,
+			.cra_blocksize	 = CRC32_BLOCK_SIZE,
+			.cra_ctxsize	 = sizeof(struct crc_ctx),
+			.cra_module	 = THIS_MODULE,
+			.cra_init	 = crc32_vx_cra_init_invert,
+		},
+	},
+};
+
+
+static int __init crc_vx_mod_init(void)
+{
+	return crypto_register_shashes(crc32_vx_algs,
+				       ARRAY_SIZE(crc32_vx_algs));
+}
+
+static void __exit crc_vx_mod_exit(void)
+{
+	crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
+}
+
+module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_exit(crc_vx_mod_exit);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-vx");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
new file mode 100644
index 000000000000..8013989cd2e5
--- /dev/null
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -0,0 +1,207 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of CRC-32 checksums.
+ *
+ * This CRC-32 implementation algorithm processes the most-significant
+ * bit first (BE).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_R1R2		%v9
+#define CONST_R3R4		%v10
+#define CONST_R5		%v11
+#define CONST_R6		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these defintions:
+ *
+ *	R1 = x4*128+64 mod P(x)
+ *	R2 = x4*128    mod P(x)
+ *	R3 = x128+64   mod P(x)
+ *	R4 = x128      mod P(x)
+ *	R5 = x96       mod P(x)
+ *	R6 = x64       mod P(x)
+ *
+ *	Barret reduction constant, u, is defined as floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * Note that the constant definitions below are extended in order to compute
+ * intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
+ * The righmost doubleword can be 0 to prevent contribution to the result or
+ * can be multiplied by 1 to perform an XOR without the need for a separate
+ * VECTOR EXCLUSIVE OR instruction.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ */
+
+.Lconstants_CRC_32_BE:
+	.quad		0x08833794c, 0x0e6228b11	# R1, R2
+	.quad		0x0c5b9cd4c, 0x0e8a45605	# R3, R4
+	.quad		0x0f200aa66, 1 << 32		# R5, x32
+	.quad		0x0490d678d, 1			# R6, 1
+	.quad		0x104d101df, 0			# u
+	.quad		0x104C11DB7, 0			# P(x)
+
+.previous
+
+.text
+/*
+ * The CRC-32 function(s) use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *
+ *	V9..V14: CRC-32 constants.
+ */
+ENTRY(crc32_be_vgfm_16)
+	/* Load CRC-32 constants */
+	larl	%r5,.Lconstants_CRC_32_BE
+	VLM	CONST_R1R2,CONST_CRC_POLY,0,%r5
+
+	/* Load the initial CRC value into the leftmost word of V0. */
+	VZERO	%v0
+	VLVGF	%v0,%r2,0
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	/* Check remaining buffer size and jump to proper folding method */
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the reduction constants in V0.  The intermediate result is
+	 * then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1.  Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R1R2,%v1,%v5
+	VGFMAG	%v2,CONST_R1R2,%v2,%v6
+	VGFMAG	%v3,CONST_R1R2,%v3,%v7
+	VGFMAG	%v4,CONST_R1R2,%v4,%v8
+
+	/* Adjust buffer pointer and length for next loop */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/* Fold V1 to V4 into a single 128-bit value in V1 */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2
+	VGFMAG	%v1,CONST_R3R4,%v1,%v3
+	VGFMAG	%v1,CONST_R3R4,%v1,%v4
+
+	/* Check whether to continue with 64-bit folding */
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VGFMAG	%v1,CONST_R3R4,%v1,%v2	/* Fold next data chunk */
+
+	/* Adjust buffer pointer and size for folding next data chunk */
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	/* Process remaining data chunks */
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * The R5 constant is used to fold a 128-bit value into an 96-bit value
+	 * that is XORed with the next 96-bit input data chunk.  To use a single
+	 * VGFMG instruction, multiply the rightmost 64-bit with x^32 (1<<32) to
+	 * form an intermediate 96-bit value (with appended zeros) which is then
+	 * XORed with the intermediate reduction result.
+	 */
+	VGFMG	%v1,CONST_R5,%v1
+
+	/*
+	 * Further reduce the remaining 96-bit value to a 64-bit value using a
+	 * single VGFMG, the rightmost doubleword is multiplied with 0x1. The
+	 * intermediate result is then XORed with the product of the leftmost
+	 * doubleword with R6.	The result is a 64-bit value and is subject to
+	 * the Barret reduction.
+	 */
+	VGFMG	%v1,CONST_R6,%v1
+
+	/*
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 * Note: To compensate the division by x^32, use the vector unpack
+	 * instruction to move the leftmost word into the leftmost doubleword
+	 * of the vector register.  The rightmost doubleword is multiplied
+	 * with zero to not contribute to the intermedate results.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x),  with the value in V1.
+	 * The final result is in the rightmost word of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,3
+	br	%r14
+
+.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
new file mode 100644
index 000000000000..17f2504c2633
--- /dev/null
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -0,0 +1,268 @@
+/*
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
+ *
+ * Use the z/Architecture Vector Extension Facility to accelerate the
+ * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
+ * and Castagnoli.
+ *
+ * This CRC-32 implementation algorithm is bitreflected and processes
+ * the least-significant bit first (Little-Endian).
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/vx-insn.h>
+
+/* Vector register range containing CRC-32 constants */
+#define CONST_PERM_LE2BE	%v9
+#define CONST_R2R1		%v10
+#define CONST_R4R3		%v11
+#define CONST_R5		%v12
+#define CONST_RU_POLY		%v13
+#define CONST_CRC_POLY		%v14
+
+.data
+.align 8
+
+/*
+ * The CRC-32 constant block contains reduction constants to fold and
+ * process particular chunks of the input data stream in parallel.
+ *
+ * For the CRC-32 variants, the constants are precomputed according to
+ * these definitions:
+ *
+ *	R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
+ *	R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
+ *	R3 = [(x128+32 mod P'(x) << 32)]'   << 1
+ *	R4 = [(x128-32 mod P'(x) << 32)]'   << 1
+ *	R5 = [(x64 mod P'(x) << 32)]'	    << 1
+ *	R6 = [(x32 mod P'(x) << 32)]'	    << 1
+ *
+ *	The bitreflected Barret reduction constant, u', is defined as
+ *	the bit reversal of floor(x**64 / P(x)).
+ *
+ *	where P(x) is the polynomial in the normal domain and the P'(x) is the
+ *	polynomial in the reversed (bitreflected) domain.
+ *
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
+ *
+ *	P(x)  = 0x04C11DB7
+ *	P'(x) = 0xEDB88320
+ *
+ * CRC-32C (Castagnoli) polynomials:
+ *
+ *	P(x)  = 0x1EDC6F41
+ *	P'(x) = 0x82F63B78
+ */
+
+.Lconstants_CRC_32_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x1c6e41596, 0x154442bd4		# R2, R1
+	.quad		0x0ccaa009e, 0x1751997d0		# R4, R3
+	.octa		0x163cd6124				# R5
+	.octa		0x1F7011641				# u'
+	.octa		0x1DB710641				# P'(x) << 1
+
+.Lconstants_CRC_32C_LE:
+	.octa		0x0F0E0D0C0B0A09080706050403020100	# BE->LE mask
+	.quad		0x09e4addf8, 0x740eef02			# R2, R1
+	.quad		0x14cd00bd6, 0xf20c0dfe			# R4, R3
+	.octa		0x0dd45aab8				# R5
+	.octa		0x0dea713f1				# u'
+	.octa		0x105ec76f0				# P'(x) << 1
+
+.previous
+
+
+.text
+
+/*
+ * The CRC-32 functions use these calling conventions:
+ *
+ * Parameters:
+ *
+ *	%r2:	Initial CRC value, typically ~0; and final CRC (return) value.
+ *	%r3:	Input buffer pointer, performance might be improved if the
+ *		buffer is on a doubleword boundary.
+ *	%r4:	Length of the buffer, must be 64 bytes or greater.
+ *
+ * Register usage:
+ *
+ *	%r5:	CRC-32 constant pool base pointer.
+ *	V0:	Initial CRC value and intermediate constants and results.
+ *	V1..V4:	Data for CRC computation.
+ *	V5..V8:	Next data chunks that are fetched from the input buffer.
+ *	V9:	Constant for BE->LE conversion and shift operations
+ *
+ *	V10..V14: CRC-32 constants.
+ */
+
+ENTRY(crc32_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32_LE
+	j	crc32_le_vgfm_generic
+
+ENTRY(crc32c_le_vgfm_16)
+	larl	%r5,.Lconstants_CRC_32C_LE
+	j	crc32_le_vgfm_generic
+
+
+crc32_le_vgfm_generic:
+	/* Load CRC-32 constants */
+	VLM	CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
+
+	/*
+	 * Load the initial CRC value.
+	 *
+	 * The CRC value is loaded into the rightmost word of the
+	 * vector register and is later XORed with the LSB portion
+	 * of the loaded input data.
+	 */
+	VZERO	%v0			/* Clear V0 */
+	VLVGF	%v0,%r2,3		/* Load CRC into rightmost word */
+
+	/* Load a 64-byte data chunk and XOR with CRC */
+	VLM	%v1,%v4,0,%r3		/* 64-bytes into V1..V4 */
+	VPERM	%v1,%v1,%v1,CONST_PERM_LE2BE
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VPERM	%v3,%v3,%v3,CONST_PERM_LE2BE
+	VPERM	%v4,%v4,%v4,CONST_PERM_LE2BE
+
+	VX	%v1,%v0,%v1		/* V1 ^= CRC */
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jl	.Lless_than_64bytes
+
+.Lfold_64bytes_loop:
+	/* Load the next 64-byte data chunk into V5 to V8 */
+	VLM	%v5,%v8,0,%r3
+	VPERM	%v5,%v5,%v5,CONST_PERM_LE2BE
+	VPERM	%v6,%v6,%v6,CONST_PERM_LE2BE
+	VPERM	%v7,%v7,%v7,CONST_PERM_LE2BE
+	VPERM	%v8,%v8,%v8,CONST_PERM_LE2BE
+
+	/*
+	 * Perform a GF(2) multiplication of the doublewords in V1 with
+	 * the R1 and R2 reduction constants in V0.  The intermediate result
+	 * is then folded (accumulated) with the next data chunk in V5 and
+	 * stored in V1. Repeat this step for the register contents
+	 * in V2, V3, and V4 respectively.
+	 */
+	VGFMAG	%v1,CONST_R2R1,%v1,%v5
+	VGFMAG	%v2,CONST_R2R1,%v2,%v6
+	VGFMAG	%v3,CONST_R2R1,%v3,%v7
+	VGFMAG	%v4,CONST_R2R1,%v4,%v8
+
+	aghi	%r3,64			/* BUF = BUF + 64 */
+	aghi	%r4,-64			/* LEN = LEN - 64 */
+
+	cghi	%r4,64
+	jnl	.Lfold_64bytes_loop
+
+.Lless_than_64bytes:
+	/*
+	 * Fold V1 to V4 into a single 128-bit value in V1.  Multiply V1 with R3
+	 * and R4 and accumulating the next 128-bit chunk until a single 128-bit
+	 * value remains.
+	 */
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2
+	VGFMAG	%v1,CONST_R4R3,%v1,%v3
+	VGFMAG	%v1,CONST_R4R3,%v1,%v4
+
+	cghi	%r4,16
+	jl	.Lfinal_fold
+
+.Lfold_16bytes_loop:
+
+	VL	%v2,0,,%r3		/* Load next data chunk */
+	VPERM	%v2,%v2,%v2,CONST_PERM_LE2BE
+	VGFMAG	%v1,CONST_R4R3,%v1,%v2	/* Fold next data chunk */
+
+	aghi	%r3,16
+	aghi	%r4,-16
+
+	cghi	%r4,16
+	jnl	.Lfold_16bytes_loop
+
+.Lfinal_fold:
+	/*
+	 * Set up a vector register for byte shifts.  The shift value must
+	 * be loaded in bits 1-4 in byte element 7 of a vector register.
+	 * Shift by 8 bytes: 0x40
+	 * Shift by 4 bytes: 0x20
+	 */
+	VLEIB	%v9,0x40,7
+
+	/*
+	 * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
+	 * to move R4 into the rightmost doubleword and set the leftmost
+	 * doubleword to 0x1.
+	 */
+	VSRLB	%v0,CONST_R4R3,%v9
+	VLEIG	%v0,1,0
+
+	/*
+	 * Compute GF(2) product of V1 and V0.	The rightmost doubleword
+	 * of V1 is multiplied with R4.  The leftmost doubleword of V1 is
+	 * multiplied by 0x1 and is then XORed with rightmost product.
+	 * Implicitly, the intermediate leftmost product becomes padded
+	 */
+	VGFMG	%v1,%v0,%v1
+
+	/*
+	 * Now do the final 32-bit fold by multiplying the rightmost word
+	 * in V1 with R5 and XOR the result with the remaining bits in V1.
+	 *
+	 * To achieve this by a single VGFMAG, right shift V1 by a word
+	 * and store the result in V2 which is then accumulated.  Use the
+	 * vector unpack instruction to load the rightmost half of the
+	 * doubleword into the rightmost doubleword element of V1; the other
+	 * half is loaded in the leftmost doubleword.
+	 * The vector register with CONST_R5 contains the R5 constant in the
+	 * rightmost doubleword and the leftmost doubleword is zero to ignore
+	 * the leftmost product of V1.
+	 */
+	VLEIB	%v9,0x20,7		  /* Shift by words */
+	VSRLB	%v2,%v1,%v9		  /* Store remaining bits in V2 */
+	VUPLLF	%v1,%v1			  /* Split rightmost doubleword */
+	VGFMAG	%v1,CONST_R5,%v1,%v2	  /* V1 = (V1 * R5) XOR V2 */
+
+	/*
+	 * Apply a Barret reduction to compute the final 32-bit CRC value.
+	 *
+	 * The input values to the Barret reduction are the degree-63 polynomial
+	 * in V1 (R(x)), degree-32 generator polynomial, and the reduction
+	 * constant u.	The Barret reduction result is the CRC value of R(x) mod
+	 * P(x).
+	 *
+	 * The Barret reduction algorithm is defined as:
+	 *
+	 *    1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
+	 *    2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
+	 *    3. C(x)  = R(x) XOR T2(x) mod x^32
+	 *
+	 *  Note: The leftmost doubleword of vector register containing
+	 *  CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
+	 *  is zero and does not contribute to the final result.
+	 */
+
+	/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
+	VUPLLF	%v2,%v1
+	VGFMG	%v2,CONST_RU_POLY,%v2
+
+	/*
+	 * Compute the GF(2) product of the CRC polynomial with T1(x) in
+	 * V2 and XOR the intermediate result, T2(x), with the value in V1.
+	 * The final result is stored in word element 2 of V2.
+	 */
+	VUPLLF	%v2,%v2
+	VGFMAG	%v2,CONST_CRC_POLY,%v2,%v1
+
+.Ldone:
+	VLGVF	%r2,%v2,2
+	br	%r14
+
+.previous
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 3f571ea89509..ccccebeeaaf6 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -225,12 +225,16 @@ CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_LZ4=m
 CONFIG_CRYPTO_LZ4HC=m
 CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
 CONFIG_CRYPTO_SHA1_S390=m
 CONFIG_CRYPTO_SHA256_S390=m
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_CRC32_S390=m
 CONFIG_CRC7=m
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 045035796ca7..67d43a0eabb4 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -337,25 +337,27 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 /* Diagnose 204 functions */
 
-static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
 {
-	register unsigned long _subcode asm("0") = subcode;
+	register unsigned long _subcode asm("0") = *subcode;
 	register unsigned long _size asm("1") = size;
 
 	asm volatile(
 		"	diag	%2,%0,0x204\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
-	if (_subcode)
-		return -1;
+	*subcode = _subcode;
 	return _size;
 }
 
 static int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 	diag_stat_inc(DIAG_STAT_X204);
-	return __diag204(subcode, size, addr);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
 }
 
 /*
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 44feac38ccfc..012919d9833b 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -70,7 +70,7 @@ static int diag2fc(int size, char* query, void *addr)
 	diag_stat_inc(DIAG_STAT_X2FC);
 	asm volatile(
 		"	diag    %0,%1,0x2fc\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
 
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 22da3b34c655..05219a5e0b2f 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -13,9 +13,6 @@
 #define L1_CACHE_SHIFT     8
 #define NET_SKB_PAD	   32
 
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
-
-/* Read-only memory is marked before mark_rodata_ro() is called. */
-#define __ro_after_init __read_mostly
+#define __read_mostly __section(.data..read_mostly)
 
 #endif
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index d1e7b0a0feeb..f7ed88cc066e 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -320,7 +320,7 @@ struct cio_iplinfo {
 extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
 
 /* Function from drivers/s390/cio/chsc.c */
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
 
 #endif
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 9dd04b9e9782..03516476127b 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -169,16 +169,27 @@ static inline int lcctl(u64 ctl)
 }
 
 /* Extract CPU counter */
-static inline int ecctr(u64 ctr, u64 *val)
+static inline int __ecctr(u64 ctr, u64 *content)
 {
-	register u64 content asm("4") = 0;
+	register u64 _content asm("4") = 0;
 	int cc;
 
 	asm volatile (
 		"	.insn	rre,0xb2e40000,%0,%2\n"
 		"	ipm	%1\n"
 		"	srl	%1,28\n"
-		: "=d" (content), "=d" (cc) : "d" (ctr) : "cc");
+		: "=d" (_content), "=d" (cc) : "d" (ctr) : "cc");
+	*content = _content;
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+	u64 content;
+	int cc;
+
+	cc = __ecctr(ctr, &content);
 	if (!cc)
 		*val = content;
 	return cc;
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 5fac921c1c42..86cae09e076a 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -49,7 +49,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
 		"0:	diag	%0,%1,0x10\n"
-		"1:\n"
+		"1:	nopr	%%r7\n"
 		EX_TABLE(0b, 1b)
 		EX_TABLE(1b, 1b)
 		: : "a" (start_addr), "a" (end_addr));
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
deleted file mode 100644
index 105f90e63a0e..000000000000
--- a/arch/s390/include/asm/etr.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- *  Copyright IBM Corp. 2006
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#ifndef __S390_ETR_H
-#define __S390_ETR_H
-
-/* ETR attachment control register */
-struct etr_eacr {
-	unsigned int e0		: 1;	/* port 0 stepping control */
-	unsigned int e1		: 1;	/* port 1 stepping control */
-	unsigned int _pad0	: 5;	/* must be 00100 */
-	unsigned int dp		: 1;	/* data port control */
-	unsigned int p0		: 1;	/* port 0 change recognition control */
-	unsigned int p1		: 1;	/* port 1 change recognition control */
-	unsigned int _pad1	: 3;	/* must be 000 */
-	unsigned int ea		: 1;	/* ETR alert control */
-	unsigned int es		: 1;	/* ETR sync check control */
-	unsigned int sl		: 1;	/* switch to local control */
-} __attribute__ ((packed));
-
-/* Port state returned by steai */
-enum etr_psc {
-	etr_psc_operational = 0,
-	etr_psc_semi_operational = 1,
-	etr_psc_protocol_error =  4,
-	etr_psc_no_symbols = 8,
-	etr_psc_no_signal = 12,
-	etr_psc_pps_mode = 13
-};
-
-/* Logical port state returned by stetr */
-enum etr_lpsc {
-	etr_lpsc_operational_step = 0,
-	etr_lpsc_operational_alt = 1,
-	etr_lpsc_semi_operational = 2,
-	etr_lpsc_protocol_error =  4,
-	etr_lpsc_no_symbol_sync = 8,
-	etr_lpsc_no_signal = 12,
-	etr_lpsc_pps_mode = 13
-};
-
-/* ETR status words */
-struct etr_esw {
-	struct etr_eacr eacr;		/* attachment control register */
-	unsigned int y		: 1;	/* stepping mode */
-	unsigned int _pad0	: 5;	/* must be 00000 */
-	unsigned int p		: 1;	/* stepping port number */
-	unsigned int q		: 1;	/* data port number */
-	unsigned int psc0	: 4;	/* port 0 state code */
-	unsigned int psc1	: 4;	/* port 1 state code */
-} __attribute__ ((packed));
-
-/* Second level data register status word */
-struct etr_slsw {
-	unsigned int vv1	: 1;	/* copy of validity bit data frame 1 */
-	unsigned int vv2	: 1;	/* copy of validity bit data frame 2 */
-	unsigned int vv3	: 1;	/* copy of validity bit data frame 3 */
-	unsigned int vv4	: 1;	/* copy of validity bit data frame 4 */
-	unsigned int _pad0	: 19;	/* must by all zeroes */
-	unsigned int n		: 1;	/* EAF port number */
-	unsigned int v1		: 1;	/* validity bit ETR data frame 1 */
-	unsigned int v2		: 1;	/* validity bit ETR data frame 2 */
-	unsigned int v3		: 1;	/* validity bit ETR data frame 3 */
-	unsigned int v4		: 1;	/* validity bit ETR data frame 4 */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-} __attribute__ ((packed));
-
-/* ETR data frames */
-struct etr_edf1 {
-	unsigned int u		: 1;	/* untuned bit */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int r		: 1;	/* service request bit */
-	unsigned int _pad1	: 4;	/* must be 0000 */
-	unsigned int a		: 1;	/* time adjustment bit */
-	unsigned int net_id	: 8;	/* ETR network id */
-	unsigned int etr_id	: 8;	/* id of ETR which sends data frames */
-	unsigned int etr_pn	: 8;	/* port number of ETR output port */
-} __attribute__ ((packed));
-
-struct etr_edf2 {
-	unsigned int etv	: 32;	/* Upper 32 bits of TOD. */
-} __attribute__ ((packed));
-
-struct etr_edf3 {
-	unsigned int rc		: 8;	/* failure reason code */
-	unsigned int _pad0	: 3;	/* must be 000 */
-	unsigned int c		: 1;	/* ETR coupled bit */
-	unsigned int tc		: 4;	/* ETR type code */
-	unsigned int blto	: 8;	/* biased local time offset */
-					/* (blto - 128) * 15 = minutes */
-	unsigned int buo	: 8;	/* biased utc offset */
-					/* (buo - 128) = leap seconds */
-} __attribute__ ((packed));
-
-struct etr_edf4 {
-	unsigned int ed		: 8;	/* ETS device dependent data */
-	unsigned int _pad0	: 1;	/* must be 0 */
-	unsigned int buc	: 5;	/* biased ut1 correction */
-					/* (buc - 16) * 0.1 seconds */
-	unsigned int em		: 6;	/* ETS error magnitude */
-	unsigned int dc		: 6;	/* ETS drift code */
-	unsigned int sc		: 6;	/* ETS steering code */
-} __attribute__ ((packed));
-
-/*
- * ETR attachment information block, two formats
- * format 1 has 4 reserved words with a size of 64 bytes
- * format 2 has 16 reserved words with a size of 96 bytes
- */
-struct etr_aib {
-	struct etr_esw esw;
-	struct etr_slsw slsw;
-	unsigned long long tsp;
-	struct etr_edf1 edf1;
-	struct etr_edf2 edf2;
-	struct etr_edf3 edf3;
-	struct etr_edf4 edf4;
-	unsigned int reserved[16];
-} __attribute__ ((packed,aligned(8)));
-
-/* ETR interruption parameter */
-struct etr_irq_parm {
-	unsigned int _pad0	: 8;
-	unsigned int pc0	: 1;	/* port 0 state change */
-	unsigned int pc1	: 1;	/* port 1 state change */
-	unsigned int _pad1	: 3;
-	unsigned int eai	: 1;	/* ETR alert indication */
-	unsigned int _pad2	: 18;
-} __attribute__ ((packed));
-
-/* Query TOD offset result */
-struct etr_ptff_qto {
-	unsigned long long physical_clock;
-	unsigned long long tod_offset;
-	unsigned long long logical_tod_offset;
-	unsigned long long tod_epoch_difference;
-} __attribute__ ((packed));
-
-/* Inline assembly helper functions */
-static inline int etr_setr(struct etr_eacr *ctrl)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2160000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*ctrl));
-	return rc;
-}
-
-/* Stores a format 1 aib with 64 bytes */
-static inline int etr_stetr(struct etr_aib *aib)
-{
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2170000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib));
-	return rc;
-}
-
-/* Stores a format 2 aib with 96 bytes for specified port */
-static inline int etr_steai(struct etr_aib *aib, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.insn	s,0xb2b30000,%1\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (rc) : "Q" (*aib), "d" (reg0));
-	return rc;
-}
-
-/* Function codes for the steai instruction. */
-#define ETR_STEAI_STEPPING_PORT		0x10
-#define ETR_STEAI_ALTERNATE_PORT	0x11
-#define ETR_STEAI_PORT_0		0x12
-#define ETR_STEAI_PORT_1		0x13
-
-static inline int etr_ptff(void *ptff_block, unsigned int func)
-{
-	register unsigned int reg0 asm("0") = func;
-	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
-	int rc = -EOPNOTSUPP;
-
-	asm volatile(
-		"	.word	0x0104\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (rc), "=m" (ptff_block)
-		: "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc");
-	return rc;
-}
-
-/* Function codes for the ptff instruction. */
-#define ETR_PTFF_QAF	0x00	/* query available functions */
-#define ETR_PTFF_QTO	0x01	/* query tod offset */
-#define ETR_PTFF_QSI	0x02	/* query steering information */
-#define ETR_PTFF_ATO	0x40	/* adjust tod offset */
-#define ETR_PTFF_STO	0x41	/* set tod offset */
-#define ETR_PTFF_SFS	0x42	/* set fine steering rate */
-#define ETR_PTFF_SGS	0x43	/* set gross steering rate */
-
-/* Functions needed by the machine check handler */
-int etr_switch_to_local(void);
-int etr_sync_check(void);
-void etr_queue_work(void);
-
-/* notifier for syncs */
-extern struct atomic_notifier_head s390_epoch_delta_notifier;
-
-/* STP interruption parameter */
-struct stp_irq_parm {
-	unsigned int _pad0	: 14;
-	unsigned int tsc	: 1;	/* Timing status change */
-	unsigned int lac	: 1;	/* Link availability change */
-	unsigned int tcpc	: 1;	/* Time control parameter change */
-	unsigned int _pad2	: 15;
-} __attribute__ ((packed));
-
-#define STP_OP_SYNC	1
-#define STP_OP_CTRL	3
-
-struct stp_sstpi {
-	unsigned int rsvd0;
-	unsigned int rsvd1 : 8;
-	unsigned int stratum : 8;
-	unsigned int vbits : 16;
-	unsigned int leaps : 16;
-	unsigned int tmd : 4;
-	unsigned int ctn : 4;
-	unsigned int rsvd2 : 3;
-	unsigned int c : 1;
-	unsigned int tst : 4;
-	unsigned int tzo : 16;
-	unsigned int dsto : 16;
-	unsigned int ctrl : 16;
-	unsigned int rsvd3 : 16;
-	unsigned int tto;
-	unsigned int rsvd4;
-	unsigned int ctnid[3];
-	unsigned int rsvd5;
-	unsigned int todoff[4];
-	unsigned int rsvd6[48];
-} __attribute__ ((packed));
-
-/* Functions needed by the machine check handler */
-int stp_sync_check(void);
-int stp_island_check(void);
-void stp_queue_work(void);
-
-#endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index 7ecb92b469b6..04cb4b4bcc5f 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -6,7 +6,7 @@
  */
 
 #ifndef _ASM_S390_FCX_H
-#define _ASM_S390_FCX_H _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H
 
 #include <linux/types.h>
 
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 8ae236b0f80b..6aba6fc406ad 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -1,6 +1,41 @@
 /*
  * In-kernel FPU support functions
  *
+ *
+ * Consider these guidelines before using in-kernel FPU functions:
+ *
+ *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
+ *     use of floating-point or vector registers and instructions.
+ *
+ *  2. For kernel_fpu_begin(), specify the vector register range you want to
+ *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
+ *
+ *     a) If your function typically runs in process-context, use the lower
+ *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
+ *     b) If your function typically runs in soft-irq or hard-irq context,
+ *	  prefer using the upper half of the vector registers, for example,
+ *	  specify KERNEL_VXR_HIGH.
+ *
+ *     If you adhere to these guidelines, an interrupted process context
+ *     does not require to save and restore vector registers because of
+ *     disjoint register ranges.
+ *
+ *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
+ *     includes logic to save and restore up to 16 vector registers at once.
+ *
+ *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
+ *     struct kernel_fpu states.  Vector registers that are in use by outer
+ *     levels are saved and restored.  You can minimize the save and restore
+ *     effort by choosing disjoint vector register ranges.
+ *
+ *  5. To use vector floating-point instructions, specify the KERNEL_FPC
+ *     flag to save and restore floating-point controls in addition to any
+ *     vector register range.
+ *
+ *  6. To use floating-point registers and instructions only, specify the
+ *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
+ *     registers V0 to V15 and floating-point controls.
+ *
  * Copyright IBM Corp. 2015
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
  */
@@ -8,6 +43,8 @@
 #ifndef _ASM_S390_FPU_API_H
 #define _ASM_S390_FPU_API_H
 
+#include <linux/preempt.h>
+
 void save_fpu_regs(void);
 
 static inline int test_fp_ctl(u32 fpc)
@@ -27,4 +64,42 @@ static inline int test_fp_ctl(u32 fpc)
 	return rc;
 }
 
+#define KERNEL_VXR_V0V7		1
+#define KERNEL_VXR_V8V15	2
+#define KERNEL_VXR_V16V23	4
+#define KERNEL_VXR_V24V31	8
+#define KERNEL_FPR		16
+#define KERNEL_FPC		256
+
+#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
+#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
+#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
+
+#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+
+struct kernel_fpu;
+
+/*
+ * Note the functions below must be called with preemption disabled.
+ * Do not enable preemption before calling __kernel_fpu_end() to prevent
+ * an corruption of an existing kernel FPU state.
+ *
+ * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
+ */
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
+void __kernel_fpu_end(struct kernel_fpu *state);
+
+
+static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	preempt_disable();
+	__kernel_fpu_begin(state, flags);
+}
+
+static inline void kernel_fpu_end(struct kernel_fpu *state)
+{
+	__kernel_fpu_end(state);
+	preempt_enable();
+}
+
 #endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
index fe937c9b6471..bce255ead72b 100644
--- a/arch/s390/include/asm/fpu/types.h
+++ b/arch/s390/include/asm/fpu/types.h
@@ -24,4 +24,14 @@ struct fpu {
 /* VX array structure for address operand constraints in inline assemblies */
 struct vx_array { __vector128 _[__NUM_VXRS]; };
 
+/* In-kernel FPU state structure */
+struct kernel_fpu {
+	u32	    mask;
+	u32	    fpc;
+	union {
+		freg_t fprs[__NUM_FPRS];
+		__vector128 vxrs[__NUM_VXRS];
+	};
+};
+
 #endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index d9be7c0c1291..4c7fac75090e 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -41,7 +41,10 @@ static inline int prepare_hugepage_range(struct file *file,
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep)
 {
-	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pte_val(*ptep) = _REGION3_ENTRY_EMPTY;
+	else
+		pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
 }
 
 static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 6fc44dca193e..4da22b2f0521 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -141,11 +141,11 @@ extern void setup_ipl(void);
  * DIAG 308 support
  */
 enum diag308_subcode  {
-	DIAG308_REL_HSA	= 2,
-	DIAG308_IPL	= 3,
-	DIAG308_DUMP	= 4,
-	DIAG308_SET	= 5,
-	DIAG308_STORE	= 6,
+	DIAG308_REL_HSA = 2,
+	DIAG308_LOAD_CLEAR = 3,
+	DIAG308_LOAD_NORMAL_DUMP = 4,
+	DIAG308_SET = 5,
+	DIAG308_STORE = 6,
 };
 
 enum diag308_ipl_type {
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index f97b055de76a..70c9bce766f5 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -7,11 +7,8 @@
 
 #define NR_IRQS_BASE	3
 
-#ifdef CONFIG_PCI_NR_MSI
-# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
-#else
-# define NR_IRQS	NR_IRQS_BASE
-#endif
+#define NR_IRQS	NR_IRQS_BASE
+#define NR_IRQS_LEGACY NR_IRQS_BASE
 
 /* External interruption codes */
 #define EXT_IRQ_INTERRUPT_KEY	0x0040
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 7f9fd5e3f1bf..9be198f5ee79 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -4,6 +4,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/stringify.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index b47ad3b642cc..591e5a5279b0 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -43,9 +43,9 @@ typedef u16 kprobe_opcode_t;
 #define MAX_INSN_SIZE		0x0003
 #define MAX_STACK_SIZE		64
 #define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
-	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	(((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR))) \
 	? (MAX_STACK_SIZE) \
-	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+	: (((unsigned long)task_stack_page(current)) + THREAD_SIZE - (ADDR)))
 
 #define kretprobe_blacklist_size 0
 
diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h
deleted file mode 100644
index 614dfaf47f71..000000000000
--- a/arch/s390/include/asm/mathemu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *    IEEE floating point emulation.
- *
- *  S390 version
- *    Copyright IBM Corp. 1999
- *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-
-#ifndef __MATHEMU__
-#define __MATHEMU__
-
-extern int math_emu_b3(__u8 *, struct pt_regs *);
-extern int math_emu_ed(__u8 *, struct pt_regs *);
-extern int math_emu_ldr(__u8 *);
-extern int math_emu_ler(__u8 *);
-extern int math_emu_std(__u8 *, struct pt_regs *);
-extern int math_emu_ld(__u8 *, struct pt_regs *);
-extern int math_emu_ste(__u8 *, struct pt_regs *);
-extern int math_emu_le(__u8 *, struct pt_regs *);
-extern int math_emu_lfpc(__u8 *, struct pt_regs *);
-extern int math_emu_stfpc(__u8 *, struct pt_regs *);
-extern int math_emu_srnm(__u8 *, struct pt_regs *);
-
-#endif                                 /* __MATHEMU__                      */
-
-
-
-
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 081b2ad99d73..18226437a832 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -6,7 +6,7 @@
 
 typedef struct {
 	cpumask_t cpu_attach_mask;
-	atomic_t attach_count;
+	atomic_t flush_count;
 	unsigned int flush_mm;
 	spinlock_t list_lock;
 	struct list_head pgtable_list;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c837b79b455d..f77c638bf397 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -19,7 +19,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
-	atomic_set(&mm->context.attach_count, 0);
+	atomic_set(&mm->context.flush_count, 0);
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
@@ -90,15 +90,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	S390_lowcore.user_asce = next->context.asce;
 	if (prev == next)
 		return;
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(next));
 	/* Clear old ASCE by loading the kernel ASCE. */
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
-	atomic_inc(&next->context.attach_count);
-	atomic_dec(&prev->context.attach_count);
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -110,10 +107,9 @@ static inline void finish_arch_post_lock_switch(void)
 	load_kernel_asce();
 	if (mm) {
 		preempt_disable();
-		while (atomic_read(&mm->context.attach_count) >> 16)
+		while (atomic_read(&mm->context.flush_count))
 			cpu_relax();
 
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 		if (mm->context.flush_mm)
 			__tlb_flush_mm(mm);
 		preempt_enable();
@@ -128,7 +124,6 @@ static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
 	switch_mm(prev, next, current);
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
 	set_user_asce(next);
 }
 
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 53eacbd4f09b..b2146c4119b2 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -21,6 +21,7 @@
 #define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
 #define HPAGE_MASK	(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HUGE_MAX_HSTATE		2
 
 #define ARCH_HAS_SETCLEAR_HUGE_PTE
 #define ARCH_HAS_HUGE_PTE_TYPE
@@ -30,11 +31,12 @@
 #include <asm/setup.h>
 #ifndef __ASSEMBLY__
 
+void __storage_key_init_range(unsigned long start, unsigned long end);
+
 static inline void storage_key_init_range(unsigned long start, unsigned long end)
 {
-#if PAGE_DEFAULT_KEY
-	__storage_key_init_range(start, end);
-#endif
+	if (PAGE_DEFAULT_KEY)
+		__storage_key_init_range(start, end);
 }
 
 #define clear_page(page)	memset((page), 0, PAGE_SIZE)
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 1f7ff85c5e4c..c64c0befd3f3 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -86,16 +86,4 @@ struct sf_raw_sample {
 	u8		    padding[];	  /* Padding to next multiple of 8 */
 } __packed;
 
-/* Perf hardware reserve and release functions */
-#ifdef CONFIG_PERF_EVENTS
-int perf_reserve_sampling(void);
-void perf_release_sampling(void);
-#else /* CONFIG_PERF_EVENTS */
-static inline int perf_reserve_sampling(void)
-{
-	return 0;
-}
-static inline void perf_release_sampling(void) {}
-#endif /* CONFIG_PERF_EVENTS */
-
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 18d2beb89340..ea1533e07271 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -28,12 +28,33 @@
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
 #include <linux/radix-tree.h>
+#include <linux/atomic.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 
-extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern pgd_t swapper_pg_dir[];
 extern void paging_init(void);
 extern void vmem_map_init(void);
+pmd_t *vmem_pmd_alloc(void);
+pte_t *vmem_pte_alloc(void);
+
+enum {
+	PG_DIRECT_MAP_4K = 0,
+	PG_DIRECT_MAP_1M,
+	PG_DIRECT_MAP_2G,
+	PG_DIRECT_MAP_MAX
+};
+
+extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+static inline void update_page_count(int level, long count)
+{
+	if (IS_ENABLED(CONFIG_PROC_FS))
+		atomic_long_add(count, &direct_pages_count[level]);
+}
+
+struct seq_file;
+void arch_report_meminfo(struct seq_file *m);
 
 /*
  * The S390 doesn't have any external MMU info: the kernel page
@@ -270,8 +291,23 @@ static inline int is_module_addr(void *addr)
 #define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
 #define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
 
-#define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
-#define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
+#define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address	     */
+#define _REGION3_ENTRY_ORIGIN  ~0x7ffUL/* region third table origin	     */
+
+#define _REGION3_ENTRY_DIRTY	0x2000	/* SW region dirty bit */
+#define _REGION3_ENTRY_YOUNG	0x1000	/* SW region young bit */
+#define _REGION3_ENTRY_LARGE	0x0400	/* RTTE-format control, large page  */
+#define _REGION3_ENTRY_READ	0x0002	/* SW region read bit */
+#define _REGION3_ENTRY_WRITE	0x0001	/* SW region write bit */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */
+#else
+#define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */
+#endif
+
+#define _REGION_ENTRY_BITS	 0xfffffffffffff227UL
+#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe27UL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
@@ -297,7 +333,8 @@ static inline int is_module_addr(void *addr)
 #endif
 
 /*
- * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ * Segment table and region3 table entry encoding
+ * (R = read-only, I = invalid, y = young bit):
  *				dy..R...I...rw
  * prot-none, clean, old	00..1...1...00
  * prot-none, clean, young	01..1...1...00
@@ -391,6 +428,33 @@ static inline int is_module_addr(void *addr)
 				 _SEGMENT_ENTRY_READ)
 #define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
 				 _SEGMENT_ENTRY_WRITE)
+#define SEGMENT_KERNEL	__pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_WRITE | \
+				 _SEGMENT_ENTRY_YOUNG | \
+				 _SEGMENT_ENTRY_DIRTY)
+#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY |	\
+				 _SEGMENT_ENTRY_LARGE |	\
+				 _SEGMENT_ENTRY_READ |	\
+				 _SEGMENT_ENTRY_YOUNG |	\
+				 _SEGMENT_ENTRY_PROTECT)
+
+/*
+ * Region3 entry (large page) protection definitions.
+ */
+
+#define REGION3_KERNEL	__pgprot(_REGION_ENTRY_TYPE_R3 | \
+				 _REGION3_ENTRY_LARGE |	 \
+				 _REGION3_ENTRY_READ |	 \
+				 _REGION3_ENTRY_WRITE |	 \
+				 _REGION3_ENTRY_YOUNG |	 \
+				 _REGION3_ENTRY_DIRTY)
+#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \
+				   _REGION3_ENTRY_LARGE |  \
+				   _REGION3_ENTRY_READ |   \
+				   _REGION3_ENTRY_YOUNG |  \
+				   _REGION_ENTRY_PROTECT)
 
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
@@ -424,6 +488,53 @@ static inline int mm_use_skey(struct mm_struct *mm)
 	return 0;
 }
 
+static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	csp	%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	unsigned long address = (unsigned long)ptr | 1;
+
+	asm volatile(
+		"	.insn	rre,0xb98a0000,%0,%3"
+		: "+d" (reg2), "+m" (*ptr)
+		: "d" (reg3), "d" (address)
+		: "cc");
+}
+
+#define CRDTE_DTT_PAGE		0x00UL
+#define CRDTE_DTT_SEGMENT	0x10UL
+#define CRDTE_DTT_REGION3	0x14UL
+#define CRDTE_DTT_REGION2	0x18UL
+#define CRDTE_DTT_REGION1	0x1cUL
+
+static inline void crdte(unsigned long old, unsigned long new,
+			 unsigned long table, unsigned long dtt,
+			 unsigned long address, unsigned long asce)
+{
+	register unsigned long reg2 asm("2") = old;
+	register unsigned long reg3 asm("3") = new;
+	register unsigned long reg4 asm("4") = table | dtt;
+	register unsigned long reg5 asm("5") = address;
+
+	asm volatile(".insn rrf,0xb98f0000,%0,%2,%4,0"
+		     : "+d" (reg2)
+		     : "d" (reg3), "d" (reg4), "d" (reg5), "a" (asce)
+		     : "memory", "cc");
+}
+
 /*
  * pgd/pmd/pte query functions
  */
@@ -465,7 +576,7 @@ static inline int pud_none(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
+	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 
 static inline int pud_large(pud_t pud)
@@ -475,17 +586,35 @@ static inline int pud_large(pud_t pud)
 	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 }
 
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION3_ENTRY_ORIGIN;
+	if (pud_large(pud))
+		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+	return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
 static inline int pud_bad(pud_t pud)
 {
-	/*
-	 * With dynamic page table levels the pud can be a region table
-	 * entry or a segment table entry. Check for the bit that are
-	 * invalid for either table entry.
-	 */
-	unsigned long mask =
-		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
-		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
-	return (pud_val(pud) & mask) != 0;
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+		return pmd_bad(__pmd(pud_val(pud)));
+	if (pud_large(pud))
+		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
+	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
 static inline int pmd_present(pmd_t pmd)
@@ -498,11 +627,6 @@ static inline int pmd_none(pmd_t pmd)
 	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
 }
 
-static inline int pmd_large(pmd_t pmd)
-{
-	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-}
-
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 	unsigned long origin_mask;
@@ -513,13 +637,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
 }
 
-static inline int pmd_bad(pmd_t pmd)
-{
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
-}
-
 #define __HAVE_ARCH_PMD_WRITE
 static inline int pmd_write(pmd_t pmd)
 {
@@ -963,6 +1080,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
 #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+#define pud_page(pud) pfn_to_page(pud_pfn(pud))
 
 /* Find an entry in the lowest level page table.. */
 #define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -970,20 +1088,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
 #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
 #define pte_unmap(pte) do { } while (0)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
-static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
-{
-	/*
-	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
-	 * Convert to segment table entry format.
-	 */
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
-		return pgprot_val(SEGMENT_NONE);
-	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
-		return pgprot_val(SEGMENT_READ);
-	return pgprot_val(SEGMENT_WRITE);
-}
-
 static inline pmd_t pmd_wrprotect(pmd_t pmd)
 {
 	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
@@ -1020,6 +1124,56 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
 	return pmd;
 }
 
+static inline pud_t pud_wrprotect(pud_t pud)
+{
+	pud_val(pud) &= ~_REGION3_ENTRY_WRITE;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkwrite(pud_t pud)
+{
+	pud_val(pud) |= _REGION3_ENTRY_WRITE;
+	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
+		return pud;
+	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	return pud;
+}
+
+static inline pud_t pud_mkclean(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+		pud_val(pud) |= _REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+static inline pud_t pud_mkdirty(pud_t pud)
+{
+	if (pud_large(pud)) {
+		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
+				_REGION3_ENTRY_SOFT_DIRTY;
+		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	}
+	return pud;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	/*
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+	 * Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
 	if (pmd_large(pmd)) {
@@ -1068,15 +1222,8 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
 
 static inline void __pmdp_csp(pmd_t *pmdp)
 {
-	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
-	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
-					       _SEGMENT_ENTRY_INVALID;
-	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
-
-	asm volatile(
-		"	csp %1,%3"
-		: "=m" (*pmdp)
-		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+	csp((unsigned int *)pmdp + 1, pmd_val(*pmdp),
+	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 
 static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
@@ -1091,6 +1238,19 @@ static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 }
 
+static inline void __pudp_idte(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 {
 	unsigned long sto;
@@ -1103,8 +1263,22 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
 		: "cc" );
 }
 
+static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
+{
+	unsigned long r3o;
+
+	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o |= _ASCE_TYPE_REGION3;
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
+		: "=m" (*pudp)
+		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		: "cc");
+}
+
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
 pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9d4d311d7e52..09529202ea77 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -77,7 +77,10 @@ static inline void get_cpu_id(struct cpuid *ptr)
 	asm volatile("stidp %0" : "=Q" (*ptr));
 }
 
-extern void s390_adjust_jiffies(void);
+void s390_adjust_jiffies(void);
+void s390_update_cpu_mhz(void);
+void cpu_detect_mhz_feature(void);
+
 extern const struct seq_operations cpuinfo_op;
 extern int sysctl_ieee_emulation_warnings;
 extern void execve_tail(void);
@@ -233,6 +236,18 @@ void cpu_relax(void);
 
 #define cpu_relax_lowlatency()  barrier()
 
+#define ECAG_CACHE_ATTRIBUTE	0
+#define ECAG_CPU_ATTRIBUTE	1
+
+static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
+{
+	unsigned long val;
+
+	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+		     : "=d" (val) : "a" (asi << 8 | parm));
+	return val;
+}
+
 static inline void psw_set_key(unsigned int key)
 {
 	asm volatile("spka 0(%0)" : : "d" (key));
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index fbd9116eb17b..5ce29fe100ba 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,5 +4,6 @@
 #include <asm-generic/sections.h>
 
 extern char _eshared[], _ehead[];
+extern char __start_ro_after_init[], __end_ro_after_init[];
 
 #endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index c0f0efbb6ab5..5e8d57e1cc5e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -86,9 +86,13 @@ extern char vmpoff_cmd[];
 #define CONSOLE_IS_SCLP		(console_mode == 1)
 #define CONSOLE_IS_3215		(console_mode == 2)
 #define CONSOLE_IS_3270		(console_mode == 3)
+#define CONSOLE_IS_VT220	(console_mode == 4)
+#define CONSOLE_IS_HVC		(console_mode == 5)
 #define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
 #define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
 #define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
+#define SET_CONSOLE_VT220	do { console_mode = 4; } while (0)
+#define SET_CONSOLE_HVC		do { console_mode = 5; } while (0)
 
 #define NSS_NAME_SIZE	8
 extern char kernel_nss_name[];
diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h
deleted file mode 100644
index 4e16aede4b06..000000000000
--- a/arch/s390/include/asm/sfp-machine.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Machine-dependent software floating-point definitions.
-   S/390 kernel version.
-   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Richard Henderson (rth@cygnus.com),
-		  Jakub Jelinek (jj@ultra.linux.cz),
-		  David S. Miller (davem@redhat.com) and
-		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If
-   not, write to the Free Software Foundation, Inc.,
-   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifndef _SFP_MACHINE_H
-#define _SFP_MACHINE_H
-   
-
-#define _FP_W_TYPE_SIZE		32
-#define _FP_W_TYPE		unsigned int
-#define _FP_WS_TYPE		signed int
-#define _FP_I_TYPE		int
-
-#define _FP_MUL_MEAT_S(R,X,Y)					\
-  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_D(R,X,Y)					\
-  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
-#define _FP_MUL_MEAT_Q(R,X,Y)					\
-  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
-
-#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
-#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
-#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
-
-#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
-#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
-#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
-#define _FP_NANSIGN_S		0
-#define _FP_NANSIGN_D		0
-#define _FP_NANSIGN_Q		0
-
-#define _FP_KEEPNANFRACP 1
-
-/*
- * If one NaN is signaling and the other is not,
- * we choose that one, otherwise we choose X.
- */
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)                      \
-  do {                                                          \
-    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)          \
-        && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))     \
-      {                                                         \
-        R##_s = Y##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,Y);                                \
-      }                                                         \
-    else                                                        \
-      {                                                         \
-        R##_s = X##_s;                                          \
-        _FP_FRAC_COPY_##wc(R,X);                                \
-      }                                                         \
-    R##_c = FP_CLS_NAN;                                         \
-  } while (0)
-
-/* Some assembly to speed things up. */
-#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) + (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	alr	%2,%3\n"			\
-		"	brc	12,0f\n"			\
-		"	lhi	0,1\n"				\
-		"	alr	%1,0\n"				\
-		"	brc	12,0f\n"			\
-		"	alr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0), "i" (1) : "cc", "0" );		\
-	asm volatile(						\
-		"	alr	%1,%2\n"			\
-		"	brc	12,0f\n"			\
-		"	ahi	%0,1\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
-	unsigned int __r2 = (x2) - (y2);			\
-	unsigned int __r1 = (x1);				\
-	unsigned int __r0 = (x0);				\
-	asm volatile(						\
-		"	slr   %2,%3\n"				\
-		"	brc	3,0f\n"				\
-		"	lhi	0,1\n"				\
-		"	slr	%1,0\n"				\
-		"	brc	3,0f\n"				\
-		"	slr	%0,0\n"				\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		: "d" (y0) : "cc", "0");			\
-	asm volatile(						\
-		"	slr	%1,%2\n"			\
-		"	brc	3,0f\n"				\
-		"	ahi	%0,-1\n"			\
-		"0:"						\
-		: "+&d" (__r2), "+&d" (__r1)			\
-		: "d" (y1) : "cc");				\
-	(r2) = __r2;						\
-	(r1) = __r1;						\
-	(r0) = __r0;						\
-})
-
-#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
-
-/* Obtain the current rounding mode. */
-#define FP_ROUNDMODE	mode
-
-/* Exception flags. */
-#define FP_EX_INVALID		0x800000
-#define FP_EX_DIVZERO		0x400000
-#define FP_EX_OVERFLOW		0x200000
-#define FP_EX_UNDERFLOW		0x100000
-#define FP_EX_INEXACT		0x080000
-
-/* We write the results always */
-#define FP_INHIBIT_RESULTS 0
-
-#endif
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
deleted file mode 100644
index c8b7cf9d6279..000000000000
--- a/arch/s390/include/asm/sfp-util.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	alr	%1,%3\n"		\
-		"	brc	12,0f\n"		\
-		"	ahi	%0,1\n"			\
-		"0:	alr  %0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-	unsigned int __sh = (ah);			\
-	unsigned int __sl = (al);			\
-	asm volatile(					\
-		"	slr	%1,%3\n"		\
-		"	brc	3,0f\n"			\
-		"	ahi	%0,-1\n"		\
-		"0:	slr	%0,%2"			\
-		: "+&d" (__sh), "+d" (__sl)		\
-		: "d" (bh), "d" (bl) : "cc");		\
-	(sh) = __sh;					\
-	(sl) = __sl;					\
-})
-
-/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
-#define umul_ppmm(wh, wl, u, v) ({			\
-	unsigned int __wh = u;				\
-	unsigned int __wl = v;				\
-	asm volatile(					\
-		"	ltr	1,%0\n"			\
-		"	mr	0,%1\n"			\
-		"	jnm	0f\n"				\
-		"	alr	0,%1\n"			\
-		"0:	ltr	%1,%1\n"			\
-		"	jnm	1f\n"				\
-		"	alr	0,%0\n"			\
-		"1:	lr	%0,0\n"			\
-		"	lr	%1,1\n"			\
-		: "+d" (__wh), "+d" (__wl)		\
-		: : "0", "1", "cc");			\
-	wh = __wh;					\
-	wl = __wl;					\
-})
-
-#define udiv_qrnnd(q, r, n1, n0, d)			\
-  do { unsigned long __n;				\
-       unsigned int __r, __d;				\
-    __n = ((unsigned long)(n1) << 32) + n0;		\
-    __d = (d);						\
-    (q) = __n / __d;					\
-    (r) = __n % __d;					\
-  } while (0)
-
-#define UDIV_NEEDS_NORMALIZATION 0
-
-#define abort() BUG()
-
-#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 1c8f33fca356..72df5f2de6b0 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -37,8 +37,8 @@
 
 #ifndef __ASSEMBLY__
 
-static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
-			      u32 *status)
+static inline int ____pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+				u32 *status)
 {
 	register unsigned long reg1 asm ("1") = parm;
 	int cc;
@@ -48,8 +48,19 @@ static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+	*status = reg1;
+	return cc;
+}
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
+{
+	u32 _status;
+	int cc;
+
+	cc = ____pcpu_sigp(addr, order, parm, &_status);
 	if (status && cc == 1)
-		*status = reg1;
+		*status = _status;
 	return cc;
 }
 
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
new file mode 100644
index 000000000000..7689727585b2
--- /dev/null
+++ b/arch/s390/include/asm/stp.h
@@ -0,0 +1,51 @@
+/*
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_STP_H
+#define __S390_STP_H
+
+/* notifier for syncs */
+extern struct atomic_notifier_head s390_epoch_delta_notifier;
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+	unsigned int _pad0	: 14;
+	unsigned int tsc	: 1;	/* Timing status change */
+	unsigned int lac	: 1;	/* Link availability change */
+	unsigned int tcpc	: 1;	/* Time control parameter change */
+	unsigned int _pad2	: 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC	1
+#define STP_OP_CTRL	3
+
+struct stp_sstpi {
+	unsigned int rsvd0;
+	unsigned int rsvd1 : 8;
+	unsigned int stratum : 8;
+	unsigned int vbits : 16;
+	unsigned int leaps : 16;
+	unsigned int tmd : 4;
+	unsigned int ctn : 4;
+	unsigned int rsvd2 : 3;
+	unsigned int c : 1;
+	unsigned int tst : 4;
+	unsigned int tzo : 16;
+	unsigned int dsto : 16;
+	unsigned int ctrl : 16;
+	unsigned int rsvd3 : 16;
+	unsigned int tto;
+	unsigned int rsvd4;
+	unsigned int ctnid[3];
+	unsigned int rsvd5;
+	unsigned int todoff[4];
+	unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
+
+#endif /* __S390_STP_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index dcb6312a0b91..0bb08f341c09 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -52,6 +52,70 @@ static inline void store_clock_comparator(__u64 *time)
 
 void clock_comparator_work(void);
 
+void __init ptff_init(void);
+
+extern unsigned char ptff_function_mask[16];
+extern unsigned long lpar_offset;
+extern unsigned long initial_leap_seconds;
+
+/* Function codes for the ptff instruction. */
+#define PTFF_QAF	0x00	/* query available functions */
+#define PTFF_QTO	0x01	/* query tod offset */
+#define PTFF_QSI	0x02	/* query steering information */
+#define PTFF_QUI	0x04	/* query UTC information */
+#define PTFF_ATO	0x40	/* adjust tod offset */
+#define PTFF_STO	0x41	/* set tod offset */
+#define PTFF_SFS	0x42	/* set fine steering rate */
+#define PTFF_SGS	0x43	/* set gross steering rate */
+
+/* Query TOD offset result */
+struct ptff_qto {
+	unsigned long long physical_clock;
+	unsigned long long tod_offset;
+	unsigned long long logical_tod_offset;
+	unsigned long long tod_epoch_difference;
+} __packed;
+
+static inline int ptff_query(unsigned int nr)
+{
+	unsigned char *ptr;
+
+	ptr = ptff_function_mask + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/* Query UTC information result */
+struct ptff_qui {
+	unsigned int tm : 2;
+	unsigned int ts : 2;
+	unsigned int : 28;
+	unsigned int pad_0x04;
+	unsigned long leap_event;
+	short old_leap;
+	short new_leap;
+	unsigned int pad_0x14;
+	unsigned long prt[5];
+	unsigned long cst[3];
+	unsigned int skew;
+	unsigned int pad_0x5c[41];
+} __packed;
+
+static inline int ptff(void *ptff_block, size_t len, unsigned int func)
+{
+	typedef struct { char _[len]; } addrtype;
+	register unsigned int reg0 asm("0") = func;
+	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
+	int rc;
+
+	asm volatile(
+		"	.word	0x0104\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc), "+m" (*(addrtype *) ptff_block)
+		: "d" (reg0), "d" (reg1) : "cc");
+	return rc;
+}
+
 static inline unsigned long long local_tick_disable(void)
 {
 	unsigned long long old;
@@ -105,7 +169,7 @@ static inline cycles_t get_cycles(void)
 	return (cycles_t) get_tod_clock() >> 2;
 }
 
-int get_sync_clock(unsigned long long *clock);
+int get_phys_clock(unsigned long long *clock);
 void init_cpu_timer(void);
 unsigned long long monotonic_clock(void);
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index a2e6ef32e054..1a691ef740cf 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <asm/processor.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 /*
  * Flush all TLB entries on the local CPU.
@@ -44,17 +45,9 @@ void smp_ptlb_all(void);
  */
 static inline void __tlb_flush_global(void)
 {
-	register unsigned long reg2 asm("2");
-	register unsigned long reg3 asm("3");
-	register unsigned long reg4 asm("4");
-	long dummy;
-
-	dummy = 0;
-	reg2 = reg3 = 0;
-	reg4 = ((unsigned long) &dummy) + 1;
-	asm volatile(
-		"	csp	%0,%2"
-		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
+	unsigned int dummy = 0;
+
+	csp(&dummy, 0, 0);
 }
 
 /*
@@ -64,7 +57,7 @@ static inline void __tlb_flush_global(void)
 static inline void __tlb_flush_full(struct mm_struct *mm)
 {
 	preempt_disable();
-	atomic_add(0x10000, &mm->context.attach_count);
+	atomic_inc(&mm->context.flush_count);
 	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		/* Local TLB flush */
 		__tlb_flush_local();
@@ -76,21 +69,19 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
 /*
- * Flush TLB entries for a specific ASCE on all CPUs.
+ * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
+ * when more than one asce (e.g. gmap) ran on this mm.
  */
 static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
-	int active, count;
-
 	preempt_disable();
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
 		__tlb_flush_idte_local(asce);
 	} else {
@@ -103,7 +94,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 			cpumask_copy(mm_cpumask(mm),
 				     &mm->context.cpu_attach_mask);
 	}
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 6b53962e807e..f15f5571ca2b 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -14,10 +14,12 @@ struct cpu_topology_s390 {
 	unsigned short core_id;
 	unsigned short socket_id;
 	unsigned short book_id;
+	unsigned short drawer_id;
 	unsigned short node_id;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
+	cpumask_t drawer_mask;
 };
 
 DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
@@ -30,6 +32,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 #define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
 #define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
 #define topology_book_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).book_mask)
+#define topology_drawer_id(cpu)		  (per_cpu(cpu_topology, cpu).drawer_id)
+#define topology_drawer_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).drawer_mask)
 
 #define mc_capable() 1
 
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index e0900ddf91dd..9b49cf1daa8f 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -151,8 +151,65 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 	__rc;							\
 })
 
-#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
-#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x810000UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char __user *)ptr,
+					(unsigned char *)x,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short __user *)ptr,
+					(unsigned short *)x,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int __user *)ptr,
+					(unsigned int *)x,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long __user *)ptr,
+					(unsigned long *)x,
+					size, spec);
+		break;
+	};
+	return rc;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+	unsigned long spec = 0x81UL;
+	int rc;
+
+	switch (size) {
+	case 1:
+		rc = __put_get_user_asm((unsigned char *)x,
+					(unsigned char __user *)ptr,
+					size, spec);
+		break;
+	case 2:
+		rc = __put_get_user_asm((unsigned short *)x,
+					(unsigned short __user *)ptr,
+					size, spec);
+		break;
+	case 4:
+		rc = __put_get_user_asm((unsigned int *)x,
+					(unsigned int __user *)ptr,
+					size, spec);
+		break;
+	case 8:
+		rc = __put_get_user_asm((unsigned long *)x,
+					(unsigned long __user *)ptr,
+					size, spec);
+		break;
+	};
+	return rc;
+}
 
 #else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
 
@@ -191,7 +248,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 		__put_user_bad();				\
 		break;						\
 	 }							\
-	__pu_err;						\
+	__builtin_expect(__pu_err, 0);				\
 })
 
 #define put_user(x, ptr)					\
@@ -240,7 +297,7 @@ int __put_user_bad(void) __attribute__((noreturn));
 		__get_user_bad();				\
 		break;						\
 	}							\
-	__gu_err;						\
+	__builtin_expect(__gu_err, 0);				\
 })
 
 #define get_user(x, ptr)					\
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index a150f4fabe43..77630c74f13b 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -359,9 +359,9 @@ typedef struct
 		per_cr_bits    bits;
 	} control_regs;
 	/*
-	 * Use these flags instead of setting em_instruction_fetch
-	 * directly they are used so that single stepping can be
-	 * switched on & off while not affecting other tracing
+	 * The single_step and instruction_fetch bits are obsolete,
+	 * the kernel always sets them to zero. To enable single
+	 * stepping use ptrace(PTRACE_SINGLESTEP) instead.
 	 */
 	unsigned  single_step       : 1;
 	unsigned  instruction_fetch : 1;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2f5586ab8a6a..f37be37edd3a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,6 +2,9 @@
 # Makefile for the linux kernel.
 #
 
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_sclp.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 # Don't trace early setup code and tracing code
 CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
@@ -45,7 +48,7 @@ obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o
 obj-y	+= entry.o reipl.o relocate_kernel.o
 
 extra-y				+= head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 77a84bd78be2..c8a83276a4dc 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
 
 static inline unsigned long ecag(int ai, int li, int ti)
 {
-	unsigned long cmd, val;
-
-	cmd = ai << 4 | li << 1 | ti;
-	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
-		     : "=d" (val) : "a" (cmd));
-	return val;
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
 }
 
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 8cb9bfdd3ea8..43446fa2a4e5 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -26,7 +26,6 @@
 #include <asm/dis.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 69f9908ac44c..6693383bc01b 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size);
-	if (task)
-		__dump_trace(func, data, sp,
-			     (unsigned long)task_stack_page(task),
-			     (unsigned long)task_stack_page(task) + THREAD_SIZE);
-	else
-		__dump_trace(func, data, sp,
-			     S390_lowcore.thread_info,
-			     S390_lowcore.thread_info + THREAD_SIZE);
+	task = task ?: current;
+	__dump_trace(func, data, sp,
+		     (unsigned long)task_stack_page(task),
+		     (unsigned long)task_stack_page(task) + THREAD_SIZE);
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index a0684de5a93b..717b03aa16b5 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
+				 mach->manufacturer,
+				 mach->type,
+				 mach->model,
+				 mach->model_capacity,
+				 MACHINE_IS_LPAR ? "LPAR" :
+				 MACHINE_IS_VM ? "z/VM" :
+				 MACHINE_IS_KVM ? "KVM" : "unknown");
+}
+
 static __init void setup_topology(void)
 {
 	int max_mnest;
@@ -447,11 +467,13 @@ void __init startup_init(void)
 	ipl_save_parameters();
 	rescue_initrd();
 	clear_bss_section();
+	ptff_init();
 	init_kernel_storage_key();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_facility_list();
 	detect_machine_type();
+	setup_arch_string();
 	ipl_update_parameters();
 	setup_boot_command_line();
 	create_kernel_nss();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 2d47f9cfcb36..c51650a1ed16 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -163,6 +163,16 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.endm
 
 	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * This nop exists only in order to avoid that __switch_to starts at
+	 * the beginning of the kprobes text section. In that case we would
+	 * have several symbols at the same address. E.g. objdump would take
+	 * an arbitrary symbol name when disassembling this code.
+	 * With the added nop in between the __switch_to symbol is unique
+	 * again.
+	 */
+	nop	0
 
 /*
  * Scheduler resume function, called by switch_to
@@ -175,7 +185,6 @@ ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lgr	%r1,%r2
 	aghi	%r1,__TASK_thread		# thread_struct of prev task
-	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
 	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
 	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
 	lgr	%r1,%r3
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 000000000000..81d1d1887507
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,249 @@
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Per-CPU variable to maintain FPU register ranges that are in use
+ * by the kernel.
+ */
+static DEFINE_PER_CPU(u32, kernel_fpu_state);
+
+#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
+
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	if (!__this_cpu_read(kernel_fpu_state)) {
+		/*
+		 * Save user space FPU state and register contents.  Multiple
+		 * calls because of interruptions do not matter and return
+		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
+		 * register contents when returning to user space.
+		 */
+		save_fpu_regs();
+	}
+
+	/* Update flags to use the vector facility for KERNEL_FPR */
+	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
+		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
+		flags &= ~KERNEL_FPR;
+	}
+
+	/* Save and update current kernel VX state */
+	state->mask = __this_cpu_read(kernel_fpu_state);
+	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
+
+	/*
+	 * If this is the first call to __kernel_fpu_begin(), no additional
+	 * work is required.
+	 */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		return;
+
+	/*
+	 * If KERNEL_FPR is still set, the vector facility is not available
+	 * and, thus, save floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		return;
+	}
+
+	/*
+	 * If this is a nested call to __kernel_fpu_begin(), check the saved
+	 * state mask to save and later restore the vector registers that
+	 * are already in use.	Let's start with checking floating-point
+	 * controls.
+	 */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("stfpc %0" : "=m" (state->fpc));
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	jo	18f\n"		/* -> save V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be saved at once... this speeds up
+		 * for KERNEL_fpu_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> save V8..V23 */
+
+		/* Test and save the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
+
+		/* Test and save the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> save V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to save multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		"20:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (state->mask)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state)
+{
+	/* Just update the per-CPU state if there is nothing to restore */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		goto update_fpu_state;
+
+	/*
+	 * If KERNEL_FPR is specified, the vector facility is not available
+	 * and, thus, restore floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		goto update_fpu_state;
+	}
+
+	/* Test and restore floating-point controls */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector registers must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load load area */
+		"	jo	18f\n"		/* -> load V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be restored at once... this speeds up
+		 * for KERNEL_VXR_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> load V8..V23 */
+
+		/* Test and load the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> load V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
+
+		/* Test and load the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> load V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to load multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		"20:"
+		:
+		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
+		  [m] "d" (state->mask)
+		: "1", "cc");
+
+update_fpu_state:
+	/* Update current kernel VX state */
+	__this_cpu_write(kernel_fpu_state, state->mask);
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index d14069d4b88d..295bfb7124bc 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type)
  * Must be in data section since the bss section
  * is not cleared when these are accessed.
  */
-static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
-static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
-u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+static u8 ipl_ssid __section(.data) = 0;
+static u16 ipl_devno __section(.data) = 0;
+u32 ipl_flags __section(.data) = 0;
 
 enum ipl_method {
 	REIPL_METHOD_CCW_CIO,
@@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
 
 	asm volatile(
 		"	diag	%0,%2,0x308\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
 		: "d" (subcode) : "cc", "memory");
@@ -563,7 +563,7 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	diag308(DIAG308_IPL, NULL);
+	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
 	else if (ipl_info.type == IPL_TYPE_CCW)
@@ -1085,21 +1085,24 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_IPL, NULL);
+		if (MACHINE_IS_LPAR)
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		else
+			diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_DIAG:
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_VM:
 		__cpcmd("IPL", NULL, 0, NULL);
 		break;
 	case REIPL_METHOD_NSS_DIAG:
 		diag308(DIAG308_SET, reipl_block_nss);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_NSS:
 		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
@@ -1108,7 +1111,7 @@ static void __reipl_run(void *unused)
 	case REIPL_METHOD_DEFAULT:
 		if (MACHINE_IS_VM)
 			__cpcmd("IPL", NULL, 0, NULL);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_DUMP:
 		break;
@@ -1423,7 +1426,7 @@ static void diag308_dump(void *dump_block)
 {
 	diag308(DIAG308_SET, dump_block);
 	while (1) {
-		if (diag308(DIAG308_DUMP, NULL) != 0x302)
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
 			break;
 		udelay_simple(USEC_PER_SEC);
 	}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c373a1d41d10..285d6561076d 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
 	}
-	if (index < NR_IRQS) {
-		if (index >= NR_IRQS_BASE)
-			goto out;
+	if (index < NR_IRQS_BASE) {
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		irq = irqclass_main_desc[index].irq;
 		for_each_online_cpu(cpu)
@@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		goto out;
 	}
+	if (index > NR_IRQS_BASE)
+		goto out;
+
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 0e64f08d3d69..3074c1d83829 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -24,6 +24,7 @@
 #include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
 
@@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
 static int __init machine_kdump_pm_init(void)
 {
 	pm_notifier(machine_kdump_pm_cb, 0);
-	/* Create initial mapping for crashkernel memory */
-	arch_kexec_unprotect_crashkres();
 	return 0;
 }
 arch_initcall(machine_kdump_pm_init);
@@ -150,42 +149,40 @@ static int kdump_csum_valid(struct kimage *image)
 
 #ifdef CONFIG_CRASH_DUMP
 
-/*
- * Map or unmap crashkernel memory
- */
-static void crash_map_pages(int enable)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 {
-	unsigned long size = resource_size(&crashk_res);
-
-	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
-	       size % KEXEC_CRASH_MEM_ALIGN);
-	if (enable)
-		vmem_add_mapping(crashk_res.start, size);
-	else {
-		vmem_remove_mapping(crashk_res.start, size);
-		if (size)
-			os_info_crashkernel_add(crashk_res.start, size);
-		else
-			os_info_crashkernel_add(0, 0);
-	}
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
 }
 
-/*
- * Unmap crashkernel memory
- */
 void arch_kexec_protect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(0);
+	crash_protect_pages(1);
 }
 
-/*
- * Map crashkernel memory
- */
 void arch_kexec_unprotect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(1);
+	crash_protect_pages(0);
 }
 
 #endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 07302ce37648..29376f0e725c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
@@ -27,7 +27,6 @@ struct mcck_struct {
 	unsigned int kill_task : 1;
 	unsigned int channel_report : 1;
 	unsigned int warning : 1;
-	unsigned int etr_queue : 1;
 	unsigned int stp_queue : 1;
 	unsigned long mcck_code;
 };
@@ -82,8 +81,6 @@ void s390_handle_mcck(void)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
-	if (mcck.etr_queue)
-		etr_queue_work();
 	if (mcck.stp_queue)
 		stp_queue_work();
 	if (mcck.kill_task) {
@@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci)
 
 #define ED_STP_ISLAND	6	/* External damage STP island check */
 #define ED_STP_SYNC	7	/* External damage STP sync check */
-#define ED_ETR_SYNC	12	/* External damage ETR sync check */
-#define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
 
 /*
  * machine check handler.
@@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	if (mci.ed && mci.ec) {
 		/* External damage */
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			mcck->etr_queue |= etr_sync_check();
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
-		if (mcck->etr_queue || mcck->stp_queue)
+		if (mcck->stp_queue)
 			set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.se)
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index a8e832166417..9ea26dface38 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -601,17 +601,12 @@ static void release_pmc_hardware(void)
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	perf_release_sampling();
 }
 
 static int reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
-	int err;
 
-	err = perf_reserve_sampling();
-	if (err)
-		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	if (flags & PMC_FAILURE) {
 		release_pmc_hardware();
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 87035fa58bbe..17431f63de00 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 }
-
-/* Reserve/release functions for sharing perf hardware */
-static DEFINE_SPINLOCK(perf_hw_owner_lock);
-static void *perf_sampling_owner;
-
-int perf_reserve_sampling(void)
-{
-	int err;
-
-	err = 0;
-	spin_lock(&perf_hw_owner_lock);
-	if (perf_sampling_owner) {
-		pr_warn("The sampling facility is already reserved by %p\n",
-			perf_sampling_owner);
-		err = -EBUSY;
-	} else
-		perf_sampling_owner = __builtin_return_address(0);
-	spin_unlock(&perf_hw_owner_lock);
-	return err;
-}
-EXPORT_SYMBOL(perf_reserve_sampling);
-
-void perf_release_sampling(void)
-{
-	spin_lock(&perf_hw_owner_lock);
-	WARN_ON(!perf_sampling_owner);
-	perf_sampling_owner = NULL;
-	spin_unlock(&perf_hw_owner_lock);
-}
-EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index de7451065c34..81d0808085e6 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -13,12 +13,45 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
+#include <asm/facility.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
 #include <asm/smp.h>
 
-static DEFINE_PER_CPU(struct cpuid, cpu_id);
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = 1;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
 
 void notrace cpu_relax(void)
 {
@@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax);
  */
 void cpu_init(void)
 {
-	struct cpuid *id = this_cpu_ptr(&cpu_id);
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
 
 	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
@@ -53,10 +88,7 @@ int cpu_have_feature(unsigned int num)
 }
 EXPORT_SYMBOL(cpu_have_feature);
 
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
+static void show_cpu_summary(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
@@ -65,34 +97,55 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	static const char * const int_hwcap_str[] = {
 		"sie"
 	};
-	unsigned long n = (unsigned long) v - 1;
-	int i;
-
-	if (!n) {
-		s390_adjust_jiffies();
-		seq_printf(m, "vendor_id       : IBM/S390\n"
-			   "# processors    : %i\n"
-			   "bogomips per cpu: %lu.%02lu\n",
-			   num_online_cpus(), loops_per_jiffy/(500000/HZ),
-			   (loops_per_jiffy/(5000/HZ))%100);
-		seq_puts(m, "features\t: ");
-		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
-			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", hwcap_str[i]);
-		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
-			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", int_hwcap_str[i]);
-		seq_puts(m, "\n");
-		show_cacheinfo(m);
-	}
-	if (cpu_online(n)) {
-		struct cpuid *id = &per_cpu(cpu_id, n);
-		seq_printf(m, "processor %li: "
+	int i, cpu;
+
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+		if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", int_hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
 			   "version = %02X,  "
 			   "identification = %06X,  "
 			   "machine = %04X\n",
-			   n, id->version, id->ident, id->machine);
+			   cpu, id->version, id->ident, id->machine);
 	}
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+
+	if (!n)
+		show_cpu_summary(m, v);
+	if (!machine_has_cpu_mhz)
+		return 0;
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_mhz(m, n);
 	return 0;
 }
 
@@ -126,4 +179,3 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
-
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f31939147ccd..ba5f456edaa9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -130,17 +130,14 @@ __setup("condev=", condev_setup);
 
 static void __init set_preferred_console(void)
 {
-	if (MACHINE_IS_KVM) {
-		if (sclp.has_vt220)
-			add_preferred_console("ttyS", 1, NULL);
-		else if (sclp.has_linemode)
-			add_preferred_console("ttyS", 0, NULL);
-		else
-			add_preferred_console("hvc", 0, NULL);
-	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
 	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttyS", 1, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
 }
 
 static int __init conmode_setup(char *str)
@@ -206,6 +203,15 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 #endif
 		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 &&
+		    config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode &&
+			 config_enabled(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
 	} else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
@@ -289,7 +295,7 @@ static int __init parse_vmalloc(char *arg)
 }
 early_param("vmalloc", parse_vmalloc);
 
-void *restart_stack __attribute__((__section__(".data")));
+void *restart_stack __section(.data);
 
 static void __init setup_lowcore(void)
 {
@@ -432,6 +438,20 @@ static void __init setup_resources(void)
 			}
 		}
 	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add(crashk_res.start, resource_size(&crashk_res));
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
 }
 
 static void __init setup_memory_end(void)
@@ -602,7 +622,6 @@ static void __init reserve_crashkernel(void)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
-	insert_resource(&iomem_resource, &crashk_res);
 	memblock_remove(crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
@@ -901,6 +920,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
         cpu_init();
 	numa_setup();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7b89a7572100..35531fe1c5ea 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
 	struct lowcore *lc = pcpu->lowcore;
 
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
-	atomic_inc(&init_mm.context.attach_count);
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->percpu_offset = __per_cpu_offset[cpu];
@@ -320,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
  */
 static int pcpu_set_smt(unsigned int mtid)
 {
-	register unsigned long reg1 asm ("1") = (unsigned long) mtid;
 	int cc;
 
 	if (smp_cpu_mtid == mtid)
 		return 0;
-	asm volatile(
-		"	sigp	%1,0,%2	# sigp set multi-threading\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
-		: "cc");
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
 	if (cc == 0) {
 		smp_cpu_mtid = mtid;
 		smp_cpu_mt_shift = 0;
@@ -876,10 +868,8 @@ void __cpu_die(unsigned int cpu)
 	while (!pcpu_stopped(pcpu))
 		cpu_relax();
 	pcpu_free_lowcore(pcpu);
-	atomic_dec(&init_mm.context.attach_count);
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
 }
 
 void __noreturn cpu_die(void)
@@ -897,7 +887,7 @@ void __init smp_fill_possible_mask(void)
 
 	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
 	sclp_max = min(smp_max_threads, sclp_max);
-	sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
 	possible = setup_possible_cpus ?: nr_cpu_ids;
 	possible = min(possible, sclp_max);
 	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index f7dba3887a54..050b8d067d3b 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -16,21 +16,11 @@
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
-
-/* Sigh, math-emu. Don't ask. */
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
+#include <asm/fpu/api.h>
 
 int topology_max_mnest;
 
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
 {
 	register int r0 asm("0") = (fc << 28) | sel1;
 	register int r1 asm("1") = sel2;
@@ -45,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
 		: "+d" (r0), "+d" (rc)
 		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
 		: "cc", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
 	if (rc)
 		return rc;
-	return fc ? 0 : ((unsigned int) r0) >> 28;
+	return fc ? 0 : lvl;
 }
 EXPORT_SYMBOL(stsi);
 
@@ -414,10 +419,8 @@ subsys_initcall(create_proc_service_level);
 void s390_adjust_jiffies(void)
 {
 	struct sysinfo_1_2_2 *info;
-	const unsigned int fmil = 0x4b189680;	/* 1e7 as 32-bit float. */
-	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-	FP_DECL_EX;
-	unsigned int capability;
+	unsigned long capability;
+	struct kernel_fpu fpu;
 
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!info)
@@ -433,15 +436,25 @@ void s390_adjust_jiffies(void)
 		 * higher cpu capacity. Bogomips are the other way round.
 		 * To get to a halfway suitable number we divide 1e7
 		 * by the cpu capability number. Yes, that means a floating
-		 * point division .. math-emu here we come :-)
+		 * point division ..
 		 */
-		FP_UNPACK_SP(SA, &fmil);
-		if ((info->capability >> 23) == 0)
-			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
-		else
-			FP_UNPACK_SP(SB, &info->capability);
-		FP_DIV_S(SR, SA, SB);
-		FP_TO_INT_S(capability, SR, 32, 0);
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9409d32f285e..4e9949800562 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -39,13 +39,14 @@
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
 #include <asm/uaccess.h>
+#include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
 #include <asm/vdso.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/vtimer.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cio.h>
 #include "entry.h"
 
@@ -61,6 +62,32 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
 EXPORT_SYMBOL(s390_epoch_delta_notifier);
 
+unsigned char ptff_function_mask[16];
+unsigned long lpar_offset;
+unsigned long initial_leap_seconds;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init ptff_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+
+	if (!test_facility(28))
+		return;
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -162,30 +189,32 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static void etr_timing_alert(struct etr_irq_parm *);
 static void stp_timing_alert(struct stp_irq_parm *);
 
 static void timing_alert_interrupt(struct ext_code ext_code,
 				   unsigned int param32, unsigned long param64)
 {
 	inc_irq_stat(IRQEXT_TLA);
-	if (param32 & 0x00c40000)
-		etr_timing_alert((struct etr_irq_parm *) &param32);
 	if (param32 & 0x00038000)
 		stp_timing_alert((struct stp_irq_parm *) &param32);
 }
 
-static void etr_reset(void);
 static void stp_reset(void);
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = get_tod_clock() - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 void read_boot_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = sched_clock_base_cc - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -269,7 +298,6 @@ void update_vsyscall_tz(void)
 void __init time_init(void)
 {
 	/* Reset time synchronization interfaces. */
-	etr_reset();
 	stp_reset();
 
 	/* request the clock comparator external interrupt */
@@ -337,20 +365,20 @@ static unsigned long clock_sync_flags;
 #define CLOCK_SYNC_STP		3
 
 /*
- * The synchronous get_clock function. It will write the current clock
- * value to the clock pointer and return 0 if the clock is in sync with
- * the external time source. If the clock mode is local it will return
- * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
- * reference.
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
  */
-int get_sync_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long long *clock)
 {
 	atomic_t *sw_ptr;
 	unsigned int sw0, sw1;
 
 	sw_ptr = &get_cpu_var(clock_sync_word);
 	sw0 = atomic_read(sw_ptr);
-	*clock = get_tod_clock();
+	*clock = get_tod_clock() - lpar_offset;
 	sw1 = atomic_read(sw_ptr);
 	put_cpu_var(clock_sync_word);
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
@@ -364,7 +392,7 @@ int get_sync_clock(unsigned long long *clock)
 		return -EACCES;
 	return -EAGAIN;
 }
-EXPORT_SYMBOL(get_sync_clock);
+EXPORT_SYMBOL(get_phys_clock);
 
 /*
  * Make get_sync_clock return -EAGAIN.
@@ -416,301 +444,6 @@ static void __init time_init_wq(void)
 	time_sync_wq = create_singlethread_workqueue("timesync");
 }
 
-/*
- * External Time Reference (ETR) code.
- */
-static int etr_port0_online;
-static int etr_port1_online;
-static int etr_steai_available;
-
-static int __init early_parse_etr(char *p)
-{
-	if (strncmp(p, "off", 3) == 0)
-		etr_port0_online = etr_port1_online = 0;
-	else if (strncmp(p, "port0", 5) == 0)
-		etr_port0_online = 1;
-	else if (strncmp(p, "port1", 5) == 0)
-		etr_port1_online = 1;
-	else if (strncmp(p, "on", 2) == 0)
-		etr_port0_online = etr_port1_online = 1;
-	return 0;
-}
-early_param("etr", early_parse_etr);
-
-enum etr_event {
-	ETR_EVENT_PORT0_CHANGE,
-	ETR_EVENT_PORT1_CHANGE,
-	ETR_EVENT_PORT_ALERT,
-	ETR_EVENT_SYNC_CHECK,
-	ETR_EVENT_SWITCH_LOCAL,
-	ETR_EVENT_UPDATE,
-};
-
-/*
- * Valid bit combinations of the eacr register are (x = don't care):
- * e0 e1 dp p0 p1 ea es sl
- *  0  0  x  0	0  0  0  0  initial, disabled state
- *  0  0  x  0	1  1  0  0  port 1 online
- *  0  0  x  1	0  1  0  0  port 0 online
- *  0  0  x  1	1  1  0  0  both ports online
- *  0  1  x  0	1  1  0  0  port 1 online and usable, ETR or PPS mode
- *  0  1  x  0	1  1  0  1  port 1 online, usable and ETR mode
- *  0  1  x  0	1  1  1  0  port 1 online, usable, PPS mode, in-sync
- *  0  1  x  0	1  1  1  1  port 1 online, usable, ETR mode, in-sync
- *  0  1  x  1	1  1  0  0  both ports online, port 1 usable
- *  0  1  x  1	1  1  1  0  both ports online, port 1 usable, PPS mode, in-sync
- *  0  1  x  1	1  1  1  1  both ports online, port 1 usable, ETR mode, in-sync
- *  1  0  x  1	0  1  0  0  port 0 online and usable, ETR or PPS mode
- *  1  0  x  1	0  1  0  1  port 0 online, usable and ETR mode
- *  1  0  x  1	0  1  1  0  port 0 online, usable, PPS mode, in-sync
- *  1  0  x  1	0  1  1  1  port 0 online, usable, ETR mode, in-sync
- *  1  0  x  1	1  1  0  0  both ports online, port 0 usable
- *  1  0  x  1	1  1  1  0  both ports online, port 0 usable, PPS mode, in-sync
- *  1  0  x  1	1  1  1  1  both ports online, port 0 usable, ETR mode, in-sync
- *  1  1  x  1	1  1  1  0  both ports online & usable, ETR, in-sync
- *  1  1  x  1	1  1  1  1  both ports online & usable, ETR, in-sync
- */
-static struct etr_eacr etr_eacr;
-static u64 etr_tolec;			/* time of last eacr update */
-static struct etr_aib etr_port0;
-static int etr_port0_uptodate;
-static struct etr_aib etr_port1;
-static int etr_port1_uptodate;
-static unsigned long etr_events;
-static struct timer_list etr_timer;
-
-static void etr_timeout(unsigned long dummy);
-static void etr_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(etr_work_mutex);
-static DECLARE_WORK(etr_work, etr_work_fn);
-
-/*
- * Reset ETR attachment.
- */
-static void etr_reset(void)
-{
-	etr_eacr =  (struct etr_eacr) {
-		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
-		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
-		.es = 0, .sl = 0 };
-	if (etr_setr(&etr_eacr) == 0) {
-		etr_tolec = get_tod_clock();
-		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-	} else if (etr_port0_online || etr_port1_online) {
-		pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
-		etr_port0_online = etr_port1_online = 0;
-	}
-}
-
-static int __init etr_init(void)
-{
-	struct etr_aib aib;
-
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return 0;
-	time_init_wq();
-	/* Check if this machine has the steai instruction. */
-	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-		etr_steai_available = 1;
-	setup_timer(&etr_timer, etr_timeout, 0UL);
-	if (etr_port0_online) {
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	if (etr_port1_online) {
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	return 0;
-}
-
-arch_initcall(etr_init);
-
-/*
- * Two sorts of ETR machine checks. The architecture reads:
- * "When a machine-check niterruption occurs and if a switch-to-local or
- *  ETR-sync-check interrupt request is pending but disabled, this pending
- *  disabled interruption request is indicated and is cleared".
- * Which means that we can get etr_switch_to_local events from the machine
- * check handler although the interruption condition is disabled. Lovely..
- */
-
-/*
- * Switch to local machine check. This is called when the last usable
- * ETR port goes inactive. After switch to local the clock is not in sync.
- */
-int etr_switch_to_local(void)
-{
-	if (!etr_eacr.sl)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
-		etr_eacr.es = etr_eacr.sl = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * ETR sync check machine check. This is called when the ETR OTE and the
- * local clock OTE are farther apart than the ETR sync check tolerance.
- * After a ETR sync check the clock is not in sync. The machine check
- * is broadcasted to all cpus at the same time.
- */
-int etr_sync_check(void)
-{
-	if (!etr_eacr.es)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
-		etr_eacr.es = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-void etr_queue_work(void)
-{
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * ETR timing alert. There are two causes:
- * 1) port state change, check the usability of the port
- * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
- *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
- *    or ETR-data word 4 (edf4) has changed.
- */
-static void etr_timing_alert(struct etr_irq_parm *intparm)
-{
-	if (intparm->pc0)
-		/* ETR port 0 state change. */
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-	if (intparm->pc1)
-		/* ETR port 1 state change. */
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-	if (intparm->eai)
-		/*
-		 * ETR port alert on either port 0, 1 or both.
-		 * Both ports are not up-to-date now.
-		 */
-		set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-static void etr_timeout(unsigned long dummy)
-{
-	set_bit(ETR_EVENT_UPDATE, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * Check if the etr mode is pss.
- */
-static inline int etr_mode_is_pps(struct etr_eacr eacr)
-{
-	return eacr.es && !eacr.sl;
-}
-
-/*
- * Check if the etr mode is etr.
- */
-static inline int etr_mode_is_etr(struct etr_eacr eacr)
-{
-	return eacr.es && eacr.sl;
-}
-
-/*
- * Check if the port can be used for TOD synchronization.
- * For PPS mode the port has to receive OTEs. For ETR mode
- * the port has to receive OTEs, the ETR stepping bit has to
- * be zero and the validity bits for data frame 1, 2, and 3
- * have to be 1.
- */
-static int etr_port_valid(struct etr_aib *aib, int port)
-{
-	unsigned int psc;
-
-	/* Check that this port is receiving OTEs. */
-	if (aib->tsp == 0)
-		return 0;
-
-	psc = port ? aib->esw.psc1 : aib->esw.psc0;
-	if (psc == etr_lpsc_pps_mode)
-		return 1;
-	if (psc == etr_lpsc_operational_step)
-		return !aib->esw.y && aib->slsw.v1 &&
-			aib->slsw.v2 && aib->slsw.v3;
-	return 0;
-}
-
-/*
- * Check if two ports are on the same network.
- */
-static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
-{
-	// FIXME: any other fields we have to compare?
-	return aib1->edf1.net_id == aib2->edf1.net_id;
-}
-
-/*
- * Wrapper for etr_stei that converts physical port states
- * to logical port states to be consistent with the output
- * of stetr (see etr_psc vs. etr_lpsc).
- */
-static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
-{
-	BUG_ON(etr_steai(aib, func) != 0);
-	/* Convert port state to logical port state. */
-	if (aib->esw.psc0 == 1)
-		aib->esw.psc0 = 2;
-	else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
-		aib->esw.psc0 = 1;
-	if (aib->esw.psc1 == 1)
-		aib->esw.psc1 = 2;
-	else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
-		aib->esw.psc1 = 1;
-}
-
-/*
- * Check if the aib a2 is still connected to the same attachment as
- * aib a1, the etv values differ by one and a2 is valid.
- */
-static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
-{
-	int state_a1, state_a2;
-
-	/* Paranoia check: e0/e1 should better be the same. */
-	if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
-	    a1->esw.eacr.e1 != a2->esw.eacr.e1)
-		return 0;
-
-	/* Still connected to the same etr ? */
-	state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
-	state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
-	if (state_a1 == etr_lpsc_operational_step) {
-		if (state_a2 != etr_lpsc_operational_step ||
-		    a1->edf1.net_id != a2->edf1.net_id ||
-		    a1->edf1.etr_id != a2->edf1.etr_id ||
-		    a1->edf1.etr_pn != a2->edf1.etr_pn)
-			return 0;
-	} else if (state_a2 != etr_lpsc_pps_mode)
-		return 0;
-
-	/* The ETV value of a2 needs to be ETV of a1 + 1. */
-	if (a1->edf2.etv + 1 != a2->edf2.etv)
-		return 0;
-
-	if (!etr_port_valid(a2, p))
-		return 0;
-
-	return 1;
-}
-
 struct clock_sync_data {
 	atomic_t cpus;
 	int in_sync;
@@ -748,688 +481,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 }
 
 /*
- * Sync the TOD clock using the port referred to by aibp. This port
- * has to be enabled and the other port has to be disabled. The
- * last eacr update has to be more than 1.6 seconds in the past.
- */
-static int etr_sync_clock(void *data)
-{
-	static int first;
-	unsigned long long clock, old_clock, clock_delta, delay, delta;
-	struct clock_sync_data *etr_sync;
-	struct etr_aib *sync_port, *aib;
-	int port;
-	int rc;
-
-	etr_sync = data;
-
-	if (xchg(&first, 1) == 1) {
-		/* Slave */
-		clock_sync_cpu(etr_sync);
-		return 0;
-	}
-
-	/* Wait until all other cpus entered the sync function. */
-	while (atomic_read(&etr_sync->cpus) != 0)
-		cpu_relax();
-
-	port = etr_sync->etr_port;
-	aib = etr_sync->etr_aib;
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	enable_sync_clock();
-
-	/* Set clock to next OTE. */
-	__ctl_set_bit(14, 21);
-	__ctl_set_bit(0, 29);
-	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
-	old_clock = get_tod_clock();
-	if (set_tod_clock(clock) == 0) {
-		__udelay(1);	/* Wait for the clock to start. */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		etr_stetr(aib);
-		/* Adjust Linux timing variables. */
-		delay = (unsigned long long)
-			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		delta = adjust_time(old_clock, clock, delay);
-		clock_delta = clock - old_clock;
-		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
-					   &clock_delta);
-		etr_sync->fixup_cc = delta;
-		fixup_clock_comparator(delta);
-		/* Verify that the clock is properly set. */
-		if (!etr_aib_follows(sync_port, aib, port)) {
-			/* Didn't work. */
-			disable_sync_clock(NULL);
-			etr_sync->in_sync = -EAGAIN;
-			rc = -EAGAIN;
-		} else {
-			etr_sync->in_sync = 1;
-			rc = 0;
-		}
-	} else {
-		/* Could not set the clock ?!? */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		disable_sync_clock(NULL);
-		etr_sync->in_sync = -EAGAIN;
-		rc = -EAGAIN;
-	}
-	xchg(&first, 0);
-	return rc;
-}
-
-static int etr_sync_clock_stop(struct etr_aib *aib, int port)
-{
-	struct clock_sync_data etr_sync;
-	struct etr_aib *sync_port;
-	int follows;
-	int rc;
-
-	/* Check if the current aib is adjacent to the sync port aib. */
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	follows = etr_aib_follows(sync_port, aib, port);
-	memcpy(sync_port, aib, sizeof(*aib));
-	if (!follows)
-		return -EAGAIN;
-	memset(&etr_sync, 0, sizeof(etr_sync));
-	etr_sync.etr_aib = aib;
-	etr_sync.etr_port = port;
-	get_online_cpus();
-	atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
-	rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
-	put_online_cpus();
-	return rc;
-}
-
-/*
- * Handle the immediate effects of the different events.
- * The port change event is used for online/offline changes.
- */
-static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
-{
-	if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
-		eacr.es = 0;
-	if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
-		eacr.es = eacr.sl = 0;
-	if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
-		etr_port0_uptodate = etr_port1_uptodate = 0;
-
-	if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
-		if (eacr.e0)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p0 = etr_port0_online;
-		if (!eacr.p0)
-			eacr.e0 = 0;
-		etr_port0_uptodate = 0;
-	}
-	if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
-		if (eacr.e1)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p1 = etr_port1_online;
-		if (!eacr.p1)
-			eacr.e1 = 0;
-		etr_port1_uptodate = 0;
-	}
-	clear_bit(ETR_EVENT_UPDATE, &etr_events);
-	return eacr;
-}
-
-/*
- * Set up a timer that expires after the etr_tolec + 1.6 seconds if
- * one of the ports needs an update.
- */
-static void etr_set_tolec_timeout(unsigned long long now)
-{
-	unsigned long micros;
-
-	if ((!etr_eacr.p0 || etr_port0_uptodate) &&
-	    (!etr_eacr.p1 || etr_port1_uptodate))
-		return;
-	micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
-	micros = (micros > 1600000) ? 0 : 1600000 - micros;
-	mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
-}
-
-/*
- * Set up a time that expires after 1/2 second.
- */
-static void etr_set_sync_timeout(void)
-{
-	mod_timer(&etr_timer, jiffies + HZ/2);
-}
-
-/*
- * Update the aib information for one or both ports.
- */
-static struct etr_eacr etr_handle_update(struct etr_aib *aib,
-					 struct etr_eacr eacr)
-{
-	/* With both ports disabled the aib information is useless. */
-	if (!eacr.e0 && !eacr.e1)
-		return eacr;
-
-	/* Update port0 or port1 with aib stored in etr_work_fn. */
-	if (aib->esw.q == 0) {
-		/* Information for port 0 stored. */
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_port0 = *aib;
-			if (etr_port0_online)
-				etr_port0_uptodate = 1;
-		}
-	} else {
-		/* Information for port 1 stored. */
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_port1 = *aib;
-			if (etr_port0_online)
-				etr_port1_uptodate = 1;
-		}
-	}
-
-	/*
-	 * Do not try to get the alternate port aib if the clock
-	 * is not in sync yet.
-	 */
-	if (!eacr.es || !check_sync_clock())
-		return eacr;
-
-	/*
-	 * If steai is available we can get the information about
-	 * the other port immediately. If only stetr is available the
-	 * data-port bit toggle has to be used.
-	 */
-	if (etr_steai_available) {
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
-			etr_port0_uptodate = 1;
-		}
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
-			etr_port1_uptodate = 1;
-		}
-	} else {
-		/*
-		 * One port was updated above, if the other
-		 * port is not uptodate toggle dp bit.
-		 */
-		if ((eacr.p0 && !etr_port0_uptodate) ||
-		    (eacr.p1 && !etr_port1_uptodate))
-			eacr.dp ^= 1;
-		else
-			eacr.dp = 0;
-	}
-	return eacr;
-}
-
-/*
- * Write new etr control register if it differs from the current one.
- * Return 1 if etr_tolec has been updated as well.
- */
-static void etr_update_eacr(struct etr_eacr eacr)
-{
-	int dp_changed;
-
-	if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
-		/* No change, return. */
-		return;
-	/*
-	 * The disable of an active port of the change of the data port
-	 * bit can/will cause a change in the data port.
-	 */
-	dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
-		(etr_eacr.dp ^ eacr.dp) != 0;
-	etr_eacr = eacr;
-	etr_setr(&etr_eacr);
-	if (dp_changed)
-		etr_tolec = get_tod_clock();
-}
-
-/*
- * ETR work. In this function you'll find the main logic. In
- * particular this is the only function that calls etr_update_eacr(),
- * it "controls" the etr control register.
- */
-static void etr_work_fn(struct work_struct *work)
-{
-	unsigned long long now;
-	struct etr_eacr eacr;
-	struct etr_aib aib;
-	int sync_port;
-
-	/* prevent multiple execution. */
-	mutex_lock(&etr_work_mutex);
-
-	/* Create working copy of etr_eacr. */
-	eacr = etr_eacr;
-
-	/* Check for the different events and their immediate effects. */
-	eacr = etr_handle_events(eacr);
-
-	/* Check if ETR is supposed to be active. */
-	eacr.ea = eacr.p0 || eacr.p1;
-	if (!eacr.ea) {
-		/* Both ports offline. Reset everything. */
-		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(disable_sync_clock, NULL, 1);
-		del_timer_sync(&etr_timer);
-		etr_update_eacr(eacr);
-		goto out_unlock;
-	}
-
-	/* Store aib to get the current ETR status word. */
-	BUG_ON(etr_stetr(&aib) != 0);
-	etr_port0.esw = etr_port1.esw = aib.esw;	/* Copy status word. */
-	now = get_tod_clock();
-
-	/*
-	 * Update the port information if the last stepping port change
-	 * or data port change is older than 1.6 seconds.
-	 */
-	if (now >= etr_tolec + (1600000 << 12))
-		eacr = etr_handle_update(&aib, eacr);
-
-	/*
-	 * Select ports to enable. The preferred synchronization mode is PPS.
-	 * If a port can be enabled depends on a number of things:
-	 * 1) The port needs to be online and uptodate. A port is not
-	 *    disabled just because it is not uptodate, but it is only
-	 *    enabled if it is uptodate.
-	 * 2) The port needs to have the same mode (pps / etr).
-	 * 3) The port needs to be usable -> etr_port_valid() == 1
-	 * 4) To enable the second port the clock needs to be in sync.
-	 * 5) If both ports are useable and are ETR ports, the network id
-	 *    has to be the same.
-	 * The eacr.sl bit is used to indicate etr mode vs. pps mode.
-	 */
-	if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
-			eacr.e1 = 0;
-		// FIXME: uptodate checks ?
-		else if (etr_port0_uptodate && etr_port1_uptodate)
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 ||
-		    aib.esw.psc1 != etr_lpsc_operational_alt)
-			eacr.e1 = 0;
-		else if (etr_port0_uptodate && etr_port1_uptodate &&
-			 etr_compare_network(&etr_port0, &etr_port1))
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else {
-		/* Both ports not usable. */
-		eacr.es = eacr.sl = 0;
-		sync_port = -1;
-	}
-
-	/*
-	 * If the clock is in sync just update the eacr and return.
-	 * If there is no valid sync port wait for a port update.
-	 */
-	if ((eacr.es && check_sync_clock()) || sync_port < 0) {
-		etr_update_eacr(eacr);
-		etr_set_tolec_timeout(now);
-		goto out_unlock;
-	}
-
-	/*
-	 * Prepare control register for clock syncing
-	 * (reset data port bit, set sync check control.
-	 */
-	eacr.dp = 0;
-	eacr.es = 1;
-
-	/*
-	 * Update eacr and try to synchronize the clock. If the update
-	 * of eacr caused a stepping port switch (or if we have to
-	 * assume that a stepping port switch has occurred) or the
-	 * clock syncing failed, reset the sync check control bit
-	 * and set up a timer to try again after 0.5 seconds
-	 */
-	etr_update_eacr(eacr);
-	if (now < etr_tolec + (1600000 << 12) ||
-	    etr_sync_clock_stop(&aib, sync_port) != 0) {
-		/* Sync failed. Try again in 1/2 second. */
-		eacr.es = 0;
-		etr_update_eacr(eacr);
-		etr_set_sync_timeout();
-	} else
-		etr_set_tolec_timeout(now);
-out_unlock:
-	mutex_unlock(&etr_work_mutex);
-}
-
-/*
- * Sysfs interface functions
- */
-static struct bus_type etr_subsys = {
-	.name		= "etr",
-	.dev_name	= "etr",
-};
-
-static struct device etr_port0_dev = {
-	.id	= 0,
-	.bus	= &etr_subsys,
-};
-
-static struct device etr_port1_dev = {
-	.id	= 1,
-	.bus	= &etr_subsys,
-};
-
-/*
- * ETR subsys attributes
- */
-static ssize_t etr_stepping_port_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", etr_port0.esw.p);
-}
-
-static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-
-static ssize_t etr_stepping_mode_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	char *mode_str;
-
-	if (etr_mode_is_pps(etr_eacr))
-		mode_str = "pps";
-	else if (etr_mode_is_etr(etr_eacr))
-		mode_str = "etr";
-	else
-		mode_str = "local";
-	return sprintf(buf, "%s\n", mode_str);
-}
-
-static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
-
-/*
- * ETR port attributes
- */
-static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
-{
-	if (dev == &etr_port0_dev)
-		return etr_port0_online ? &etr_port0 : NULL;
-	else
-		return etr_port1_online ? &etr_port1 : NULL;
-}
-
-static ssize_t etr_online_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	unsigned int online;
-
-	online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
-	return sprintf(buf, "%i\n", online);
-}
-
-static ssize_t etr_online_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	unsigned int value;
-
-	value = simple_strtoul(buf, NULL, 0);
-	if (value != 0 && value != 1)
-		return -EINVAL;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return -EOPNOTSUPP;
-	mutex_lock(&clock_sync_mutex);
-	if (dev == &etr_port0_dev) {
-		if (etr_port0_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port0_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	} else {
-		if (etr_port1_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port1_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-out:
-	mutex_unlock(&clock_sync_mutex);
-	return count;
-}
-
-static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-
-static ssize_t etr_stepping_control_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_eacr.e0 : etr_eacr.e1);
-}
-
-static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-
-static ssize_t etr_mode_code_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	if (!etr_port0_online && !etr_port1_online)
-		/* Status word is not uptodate if both ports are offline. */
-		return -ENODATA;
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_port0.esw.psc0 : etr_port0.esw.psc1);
-}
-
-static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-
-static ssize_t etr_untuned_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.u);
-}
-
-static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-
-static ssize_t etr_network_id_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.net_id);
-}
-
-static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-
-static ssize_t etr_id_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_id);
-}
-
-static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-
-static ssize_t etr_port_number_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_pn);
-}
-
-static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-
-static ssize_t etr_coupled_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.c);
-}
-
-static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-
-static ssize_t etr_local_time_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.blto);
-}
-
-static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-
-static ssize_t etr_utc_offset_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.buo);
-}
-
-static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct device_attribute *etr_port_attributes[] = {
-	&dev_attr_online,
-	&dev_attr_stepping_control,
-	&dev_attr_state_code,
-	&dev_attr_untuned,
-	&dev_attr_network,
-	&dev_attr_id,
-	&dev_attr_port,
-	&dev_attr_coupled,
-	&dev_attr_local_time,
-	&dev_attr_utc_offset,
-	NULL
-};
-
-static int __init etr_register_port(struct device *dev)
-{
-	struct device_attribute **attr;
-	int rc;
-
-	rc = device_register(dev);
-	if (rc)
-		goto out;
-	for (attr = etr_port_attributes; *attr; attr++) {
-		rc = device_create_file(dev, *attr);
-		if (rc)
-			goto out_unreg;
-	}
-	return 0;
-out_unreg:
-	for (; attr >= etr_port_attributes; attr--)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-out:
-	return rc;
-}
-
-static void __init etr_unregister_port(struct device *dev)
-{
-	struct device_attribute **attr;
-
-	for (attr = etr_port_attributes; *attr; attr++)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-}
-
-static int __init etr_init_sysfs(void)
-{
-	int rc;
-
-	rc = subsys_system_register(&etr_subsys, NULL);
-	if (rc)
-		goto out;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-	if (rc)
-		goto out_unreg_subsys;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-	if (rc)
-		goto out_remove_stepping_port;
-	rc = etr_register_port(&etr_port0_dev);
-	if (rc)
-		goto out_remove_stepping_mode;
-	rc = etr_register_port(&etr_port1_dev);
-	if (rc)
-		goto out_remove_port0;
-	return 0;
-
-out_remove_port0:
-	etr_unregister_port(&etr_port0_dev);
-out_remove_stepping_mode:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-out_remove_stepping_port:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-out_unreg_subsys:
-	bus_unregister(&etr_subsys);
-out:
-	return rc;
-}
-
-device_initcall(etr_init_sysfs);
-
-/*
  * Server Time Protocol (STP) code.
  */
 static bool stp_online;
@@ -1455,7 +506,7 @@ static void __init stp_reset(void)
 	int rc;
 
 	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
@@ -1533,6 +584,7 @@ static int stp_sync_clock(void *data)
 	static int first;
 	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
+	struct ptff_qto qto;
 	int rc;
 
 	stp_sync = data;
@@ -1554,11 +606,14 @@ static int stp_sync_clock(void *data)
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
 		old_clock = get_tod_clock();
-		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = get_tod_clock();
+			new_clock = old_clock + clock_delta;
 			delta = adjust_time(old_clock, new_clock, 0);
-			clock_delta = new_clock - old_clock;
+			if (ptff_query(PTFF_QTO) &&
+			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+				/* Update LPAR offset */
+				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
 			fixup_clock_comparator(delta);
@@ -1590,12 +645,12 @@ static void stp_work_fn(struct work_struct *work)
 	mutex_lock(&stp_work_mutex);
 
 	if (!stp_online) {
-		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 		del_timer_sync(&stp_timer);
 		goto out_unlock;
 	}
 
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
 	if (rc)
 		goto out_unlock;
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 64298a867589..e959c02e0cac 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
  */
 static struct mask_info socket_info;
 static struct mask_info book_info;
+static struct mask_info drawer_info;
 
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -79,10 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	return mask;
 }
 
-static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
-					  struct mask_info *book,
-					  struct mask_info *socket,
-					  int one_socket_per_cpu)
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
 {
 	struct cpu_topology_s390 *topo;
 	unsigned int core;
@@ -97,21 +98,17 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->drawer_id = drawer->id;
 			topo->book_id = book->id;
+			topo->socket_id = socket->id;
 			topo->core_id = rcore;
 			topo->thread_id = lcpu + i;
+			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			if (one_socket_per_cpu)
-				topo->socket_id = rcore;
-			else
-				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
-		if (one_socket_per_cpu)
-			socket = socket->next;
 	}
-	return socket;
 }
 
 static void clear_masks(void)
@@ -128,6 +125,11 @@ static void clear_masks(void)
 		cpumask_clear(&info->mask);
 		info = info->next;
 	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
 }
 
 static union topology_entry *next_tle(union topology_entry *tle)
@@ -137,16 +139,22 @@ static union topology_entry *next_tle(union topology_entry *tle)
 	return (union topology_entry *)((struct topology_container *)tle + 1);
 }
 
-static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct mask_info *socket = &socket_info;
 	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
 	union topology_entry *tle, *end;
 
+	clear_masks();
 	tle = info->tle;
 	end = (union topology_entry *)((unsigned long)info + info->length);
 	while (tle < end) {
 		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
 		case 2:
 			book = book->next;
 			book->id = tle->container.id;
@@ -156,32 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
 			socket->id = tle->container.id;
 			break;
 		case 0:
-			add_cpus_to_mask(&tle->cpu, book, socket, 0);
-			break;
-		default:
-			clear_masks();
-			return;
-		}
-		tle = next_tle(tle);
-	}
-}
-
-static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
-{
-	struct mask_info *socket = &socket_info;
-	struct mask_info *book = &book_info;
-	union topology_entry *tle, *end;
-
-	tle = info->tle;
-	end = (union topology_entry *)((unsigned long)info + info->length);
-	while (tle < end) {
-		switch (tle->nl) {
-		case 1:
-			book = book->next;
-			book->id = tle->container.id;
-			break;
-		case 0:
-			socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
 			break;
 		default:
 			clear_masks();
@@ -191,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
 	}
 }
 
-static void tl_to_masks(struct sysinfo_15_1_x *info)
-{
-	struct cpuid cpu_id;
-
-	get_cpu_id(&cpu_id);
-	clear_masks();
-	switch (cpu_id.machine) {
-	case 0x2097:
-	case 0x2098:
-		__tl_to_masks_z10(info);
-		break;
-	default:
-		__tl_to_masks_generic(info);
-	}
-}
-
 static void topology_update_polarization_simple(void)
 {
 	int cpu;
@@ -257,11 +224,13 @@ static void update_cpu_masks(void)
 		topo->thread_mask = cpu_thread_map(cpu);
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
+		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
 			topo->socket_id = cpu;
 			topo->book_id = cpu;
+			topo->drawer_id = cpu;
 		}
 	}
 	numa_update_cpu_topology();
@@ -269,10 +238,7 @@ static void update_cpu_masks(void)
 
 void store_topology(struct sysinfo_15_1_x *info)
 {
-	if (topology_max_mnest >= 3)
-		stsi(info, 15, 1, 3);
-	else
-		stsi(info, 15, 1, 2);
+	stsi(info, 15, 1, min(topology_max_mnest, 4));
 }
 
 int arch_update_cpu_topology(void)
@@ -442,6 +408,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
 	return &per_cpu(cpu_topology, cpu).book_mask;
 }
 
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
 static int __init early_parse_topology(char *p)
 {
 	return kstrtobool(p, &topology_enabled);
@@ -452,6 +423,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
@@ -487,6 +459,7 @@ static int __init s390_topology_init(void)
 	printk(KERN_CONT " / %d\n", info->mnest);
 	alloc_masks(info, &socket_info, 1);
 	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
 	set_sched_topology(s390_topology);
 	return 0;
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index f9c459586649..68145456fee2 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 058659c1b8cf..0b0fd22c869a 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 0f41a8286378..429bfd111961 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -4,6 +4,16 @@
 
 #include <asm/thread_info.h>
 #include <asm/page.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
 #include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -49,7 +59,14 @@ SECTIONS
 	_eshared = .;		/* End of shareable data */
 	_sdata = .;		/* Start of data section */
 
-	EXCEPTION_TABLE(16) :data
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+	}
+	EXCEPTION_TABLE(16)
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
 
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
@@ -81,7 +98,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
-	BSS_SECTION(0, 2, 0)
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
 
 	_end = . ;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 43f2a2b80490..6f5c344cd785 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,7 +28,7 @@
 #include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index b647d5ff0ad9..e390bbb16443 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -236,6 +236,26 @@ char * strrchr(const char * s, int c)
 }
 EXPORT_SYMBOL(strrchr);
 
+static inline int clcle(const char *s1, unsigned long l1,
+			const char *s2, unsigned long l2,
+			int *diff)
+{
+	register unsigned long r2 asm("2") = (unsigned long) s1;
+	register unsigned long r3 asm("3") = (unsigned long) l2;
+	register unsigned long r4 asm("4") = (unsigned long) s2;
+	register unsigned long r5 asm("5") = (unsigned long) l2;
+	int cc;
+
+	asm volatile ("0: clcle %1,%3,0\n"
+		      "   jo    0b\n"
+		      "   ipm   %0\n"
+		      "   srl   %0,28"
+		      : "=&d" (cc), "+a" (r2), "+a" (r3),
+			"+a" (r4), "+a" (r5) : : "cc");
+	*diff = *(char *)r2 - *(char *)r4;
+	return cc;
+}
+
 /**
  * strstr - Find the first substring in a %NUL terminated string
  * @s1: The string to be searched
@@ -250,18 +270,9 @@ char * strstr(const char * s1,const char * s2)
 		return (char *) s1;
 	l1 = __strend(s1) - s1;
 	while (l1-- >= l2) {
-		register unsigned long r2 asm("2") = (unsigned long) s1;
-		register unsigned long r3 asm("3") = (unsigned long) l2;
-		register unsigned long r4 asm("4") = (unsigned long) s2;
-		register unsigned long r5 asm("5") = (unsigned long) l2;
-		int cc;
-
-		asm volatile ("0: clcle %1,%3,0\n"
-			      "   jo    0b\n"
-			      "   ipm   %0\n"
-			      "   srl   %0,28"
-			      : "=&d" (cc), "+a" (r2), "+a" (r3),
-			        "+a" (r4), "+a" (r5) : : "cc" );
+		int cc, dummy;
+
+		cc = clcle(s1, l1, s2, l2, &dummy);
 		if (!cc)
 			return (char *) s1;
 		s1++;
@@ -302,20 +313,11 @@ EXPORT_SYMBOL(memchr);
  */
 int memcmp(const void *cs, const void *ct, size_t n)
 {
-	register unsigned long r2 asm("2") = (unsigned long) cs;
-	register unsigned long r3 asm("3") = (unsigned long) n;
-	register unsigned long r4 asm("4") = (unsigned long) ct;
-	register unsigned long r5 asm("5") = (unsigned long) n;
-	int ret;
+	int ret, diff;
 
-	asm volatile ("0: clcle %1,%3,0\n"
-		      "   jo    0b\n"
-		      "   ipm   %0\n"
-		      "   srl   %0,28"
-		      : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5)
-		      : : "cc" );
+	ret = clcle(cs, n, ct, n, &diff);
 	if (ret)
-		ret = *(char *) r2 - *(char *) r4;
+		ret = diff;
 	return ret;
 }
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index ae4de559e3a0..d96596128e9f 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -49,7 +49,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr
 		"   jnm   5b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     8f\n"
-		"7:slgr  %0,%0\n"
+		"7: slgr  %0,%0\n"
 		"8:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
 		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
@@ -93,7 +93,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 		"   jnm   6b\n"
 		"   ex    %4,0(%3)\n"
 		"   j     9f\n"
-		"8:slgr  %0,%0\n"
+		"8: slgr  %0,%0\n"
 		"9: sacf  768\n"
 		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
 		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
@@ -266,7 +266,7 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size
 		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
 		"   slgr  %0,%3\n"
 		"   j	  5f\n"
-		"4:slgr  %0,%0\n"
+		"4: slgr  %0,%0\n"
 		"5:\n"
 		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
 		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 8556d6be9b54..861880df12c7 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -157,7 +157,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud))
 			if (pud_large(*pud)) {
-				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
+				prot = pud_val(*pud) & _REGION_ENTRY_PROTECT;
 				note_page(m, st, prot, 2);
 			} else
 				walk_pmd_level(m, st, pud, addr);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 19288c1b36d3..6ad7eff84c82 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -624,7 +624,7 @@ void pfault_fini(void)
 	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%0,0,0x258\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: : "a" (&refbk), "m" (refbk) : "cc");
 }
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index cace818d86eb..063c721ec0dc 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc);
 static void gmap_flush_tlb(struct gmap *gmap)
 {
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
 }
@@ -124,7 +124,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Flush tlb. */
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, gmap->asce);
+		__tlb_flush_idte(gmap->asce);
 	else
 		__tlb_flush_global();
 
@@ -430,6 +430,9 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	VM_BUG_ON(pgd_none(*pgd));
 	pud = pud_offset(pgd, vmaddr);
 	VM_BUG_ON(pud_none(*pud));
+	/* large puds cannot yet be handled */
+	if (pud_large(*pud))
+		return -EFAULT;
 	pmd = pmd_offset(pud, vmaddr);
 	VM_BUG_ON(pmd_none(*pmd));
 	/* large pmds cannot yet be handled */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index a8a6765f1a51..adb0c34bf431 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -128,6 +128,44 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
 	return 1;
 }
 
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	struct page *head, *page;
+	unsigned long mask;
+	int refs;
+
+	mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
+	if ((pud_val(pud) & mask) != 0)
+		return 0;
+	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
+
+	refs = 0;
+	head = pud_page(pud);
+	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+	do {
+		VM_BUG_ON_PAGE(compound_head(page) != head, page);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	return 1;
+}
+
 static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
@@ -144,7 +182,12 @@ static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))
 			return 0;
-		if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
+		if (unlikely(pud_large(pud))) {
+			if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
+					  nr))
+				return 0;
+		} else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
+					  nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 1b5e8983f4f3..e19d853883be 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -1,19 +1,22 @@
 /*
  *  IBM System z Huge TLB Page Support for Kernel.
  *
- *    Copyright IBM Corp. 2007
+ *    Copyright IBM Corp. 2007,2016
  *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
 
+#define KMSG_COMPONENT "hugetlb"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 
-static inline pmd_t __pte_to_pmd(pte_t pte)
+static inline unsigned long __pte_to_rste(pte_t pte)
 {
-	pmd_t pmd;
+	unsigned long rste;
 
 	/*
-	 * Convert encoding		  pte bits	   pmd bits
+	 * Convert encoding		  pte bits	pmd / pud bits
 	 *				lIR.uswrdy.p	dy..R...I...wr
 	 * empty			010.000000.0 -> 00..0...1...00
 	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
@@ -33,25 +36,31 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
 	 *	    u unused, l large
 	 */
 	if (pte_present(pte)) {
-		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
-		pmd_val(pmd) |=	(pte_val(pte) & _PAGE_INVALID) >> 5;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
+		rste = pte_val(pte) & PAGE_MASK;
+		rste |= (pte_val(pte) & _PAGE_READ) >> 4;
+		rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+		rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+		rste |= (pte_val(pte) & _PAGE_PROTECT);
+		rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+		rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
 	} else
-		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
-	return pmd;
+		rste = _SEGMENT_ENTRY_INVALID;
+	return rste;
 }
 
-static inline pte_t __pmd_to_pte(pmd_t pmd)
+static inline pte_t __rste_to_pte(unsigned long rste)
 {
+	int present;
 	pte_t pte;
 
+	if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		present = pud_present(__pud(rste));
+	else
+		present = pmd_present(__pmd(rste));
+
 	/*
-	 * Convert encoding		   pmd bits	    pte bits
+	 * Convert encoding		pmd / pud bits	    pte bits
 	 *				dy..R...I...wr	  lIR.uswrdy.p
 	 * empty			00..0...1...00 -> 010.000000.0
 	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
@@ -70,16 +79,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
 	 *	    u unused, l large
 	 */
-	if (pmd_present(pmd)) {
-		pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+	if (present) {
+		pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
 		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
-		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
+		pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
@@ -88,27 +97,33 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t pte)
 {
-	pmd_t pmd = __pte_to_pmd(pte);
-
-	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
-	*(pmd_t *) ptep = pmd;
+	unsigned long rste = __pte_to_rste(pte);
+
+	/* Set correct table type for 2G hugepages */
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
+	else
+		rste |= _SEGMENT_ENTRY_LARGE;
+	pte_val(*ptep) = rste;
 }
 
 pte_t huge_ptep_get(pte_t *ptep)
 {
-	pmd_t pmd = *(pmd_t *) ptep;
-
-	return __pmd_to_pte(pmd);
+	return __rste_to_pte(pte_val(*ptep));
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep)
 {
+	pte_t pte = huge_ptep_get(ptep);
 	pmd_t *pmdp = (pmd_t *) ptep;
-	pmd_t old;
+	pud_t *pudp = (pud_t *) ptep;
 
-	old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
-	return __pmd_to_pte(old);
+	if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
+	else
+		pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return pte;
 }
 
 pte_t *huge_pte_alloc(struct mm_struct *mm,
@@ -120,8 +135,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 
 	pgdp = pgd_offset(mm, addr);
 	pudp = pud_alloc(mm, pgdp, addr);
-	if (pudp)
-		pmdp = pmd_alloc(mm, pudp, addr);
+	if (pudp) {
+		if (sz == PUD_SIZE)
+			return (pte_t *) pudp;
+		else if (sz == PMD_SIZE)
+			pmdp = pmd_alloc(mm, pudp, addr);
+	}
 	return (pte_t *) pmdp;
 }
 
@@ -134,8 +153,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	pgdp = pgd_offset(mm, addr);
 	if (pgd_present(*pgdp)) {
 		pudp = pud_offset(pgdp, addr);
-		if (pud_present(*pudp))
+		if (pud_present(*pudp)) {
+			if (pud_large(*pudp))
+				return (pte_t *) pudp;
 			pmdp = pmd_offset(pudp, addr);
+		}
 	}
 	return (pte_t *) pmdp;
 }
@@ -147,5 +169,34 @@ int pmd_huge(pmd_t pmd)
 
 int pud_huge(pud_t pud)
 {
-	return 0;
+	return pud_large(pud);
+}
+
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int flags)
+{
+	if (flags & FOLL_GET)
+		return NULL;
+
+	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
+}
+
+static __init int setup_hugepagesz(char *opt)
+{
+	unsigned long size;
+	char *string = opt;
+
+	size = memparse(opt, &opt);
+	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
+		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
+		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else {
+		pr_err("hugepagesz= specifies an unsupported page size %s\n",
+			string);
+		return 0;
+	}
+	return 1;
 }
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 2489b2e917c8..f56a39bd8ba6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -40,7 +40,7 @@
 #include <asm/ctl_reg.h>
 #include <asm/sclp.h>
 
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
@@ -111,17 +111,16 @@ void __init paging_init(void)
 
 void mark_rodata_ro(void)
 {
-	/* Text and rodata are already protected. Nothing to do here. */
-	pr_info("Write protecting the kernel read-only data: %luk\n",
-		((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+	unsigned long size = __end_ro_after_init - __start_ro_after_init;
+
+	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
 }
 
 void __init mem_init(void)
 {
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(0, mm_cpumask(&init_mm));
-	atomic_set(&init_mm.context.attach_count, 1);
 
 	set_max_mapnr(max_low_pfn);
         high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a90d45e9dfb0..3330ea124eec 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -34,20 +34,25 @@ static int __init cmma(char *str)
 }
 __setup("cmma=", cmma);
 
-void __init cmma_init(void)
+static inline int cmma_test_essa(void)
 {
 	register unsigned long tmp asm("0") = 0;
 	register int rc asm("1") = -EOPNOTSUPP;
 
-	if (!cmma_flag)
-		return;
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
 		: "+&d" (rc), "+&d" (tmp));
-	if (rc)
+	return rc;
+}
+
+void __init cmma_init(void)
+{
+	if (!cmma_flag)
+		return;
+	if (cmma_test_essa())
 		cmma_flag = 0;
 }
 
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f2a5c29a97e9..7104ffb5a67f 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/page.h>
 
-#if PAGE_DEFAULT_KEY
 static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
 {
 	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
@@ -22,6 +21,8 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 {
 	unsigned long boundary, size;
 
+	if (!PAGE_DEFAULT_KEY)
+		return;
 	while (start < end) {
 		if (MACHINE_HAS_EDAT1) {
 			/* set storage keys for a 1MB frame */
@@ -38,56 +39,254 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
 		start += PAGE_SIZE;
 	}
 }
-#endif
 
-static pte_t *walk_page_table(unsigned long addr)
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
+
+void arch_report_meminfo(struct seq_file *m)
 {
-	pgd_t *pgdp;
-	pud_t *pudp;
+	seq_printf(m, "DirectMap4k:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
+	seq_printf(m, "DirectMap1M:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
+	seq_printf(m, "DirectMap2G:    %8lu kB\n",
+		   atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
+}
+#endif /* CONFIG_PROC_FS */
+
+static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
+		    unsigned long dtt)
+{
+	unsigned long table, mask;
+
+	mask = 0;
+	if (MACHINE_HAS_EDAT2) {
+		switch (dtt) {
+		case CRDTE_DTT_REGION3:
+			mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
+			break;
+		case CRDTE_DTT_SEGMENT:
+			mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+			break;
+		case CRDTE_DTT_PAGE:
+			mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+			break;
+		}
+		table = (unsigned long)old & mask;
+		crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
+	} else if (MACHINE_HAS_IDTE) {
+		cspg(old, *old, new);
+	} else {
+		csp((unsigned int *)old + 1, *old, new);
+	}
+}
+
+struct cpa {
+	unsigned int set_ro	: 1;
+	unsigned int clear_ro	: 1;
+};
+
+static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	pte_t *ptep, new;
+
+	ptep = pte_offset(pmdp, addr);
+	do {
+		if (pte_none(*ptep))
+			return -EINVAL;
+		if (cpa.set_ro)
+			new = pte_wrprotect(*ptep);
+		else if (cpa.clear_ro)
+			new = pte_mkwrite(pte_mkdirty(*ptep));
+		pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
+		ptep++;
+		addr += PAGE_SIZE;
+		cond_resched();
+	} while (addr < end);
+	return 0;
+}
+
+static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
+{
+	unsigned long pte_addr, prot;
+	pte_t *pt_dir, *ptep;
+	pmd_t new;
+	int i, ro;
+
+	pt_dir = vmem_pte_alloc();
+	if (!pt_dir)
+		return -ENOMEM;
+	pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
+	ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+	ptep = pt_dir;
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte_val(*ptep) = pte_addr | prot;
+		pte_addr += PAGE_SIZE;
+		ptep++;
+	}
+	pmd_val(new) = __pa(pt_dir) | _SEGMENT_ENTRY;
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+	update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
+	update_page_count(PG_DIRECT_MAP_1M, -1);
+	return 0;
+}
+
+static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, struct cpa cpa)
+{
+	pmd_t new;
+
+	if (cpa.set_ro)
+		new = pmd_wrprotect(*pmdp);
+	else if (cpa.clear_ro)
+		new = pmd_mkwrite(pmd_mkdirty(*pmdp));
+	pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
+}
+
+static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
 	pmd_t *pmdp;
-	pte_t *ptep;
+	int rc = 0;
 
-	pgdp = pgd_offset_k(addr);
-	if (pgd_none(*pgdp))
-		return NULL;
-	pudp = pud_offset(pgdp, addr);
-	if (pud_none(*pudp) || pud_large(*pudp))
-		return NULL;
 	pmdp = pmd_offset(pudp, addr);
-	if (pmd_none(*pmdp) || pmd_large(*pmdp))
-		return NULL;
-	ptep = pte_offset_kernel(pmdp, addr);
-	if (pte_none(*ptep))
-		return NULL;
-	return ptep;
+	do {
+		if (pmd_none(*pmdp))
+			return -EINVAL;
+		next = pmd_addr_end(addr, end);
+		if (pmd_large(*pmdp)) {
+			if (addr & ~PMD_MASK || addr + PMD_SIZE > next) {
+				rc = split_pmd_page(pmdp, addr);
+				if (rc)
+					return rc;
+				continue;
+			}
+			modify_pmd_page(pmdp, addr, cpa);
+		} else {
+			rc = walk_pte_level(pmdp, addr, next, cpa);
+			if (rc)
+				return rc;
+		}
+		pmdp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end);
+	return rc;
 }
 
-static void change_page_attr(unsigned long addr, int numpages,
-			     pte_t (*set) (pte_t))
+static int split_pud_page(pud_t *pudp, unsigned long addr)
 {
-	pte_t *ptep;
-	int i;
+	unsigned long pmd_addr, prot;
+	pmd_t *pm_dir, *pmdp;
+	pud_t new;
+	int i, ro;
 
-	for (i = 0; i < numpages; i++) {
-		ptep = walk_page_table(addr);
-		if (WARN_ON_ONCE(!ptep))
-			break;
-		*ptep = set(*ptep);
-		addr += PAGE_SIZE;
+	pm_dir = vmem_pmd_alloc();
+	if (!pm_dir)
+		return -ENOMEM;
+	pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
+	ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
+	prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
+	pmdp = pm_dir;
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd_val(*pmdp) = pmd_addr | prot;
+		pmd_addr += PMD_SIZE;
+		pmdp++;
 	}
-	__tlb_flush_kernel();
+	pud_val(new) = __pa(pm_dir) | _REGION3_ENTRY;
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+	update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
+	update_page_count(PG_DIRECT_MAP_2G, -1);
+	return 0;
+}
+
+static void modify_pud_page(pud_t *pudp, unsigned long addr, struct cpa cpa)
+{
+	pud_t new;
+
+	if (cpa.set_ro)
+		new = pud_wrprotect(*pudp);
+	else if (cpa.clear_ro)
+		new = pud_mkwrite(pud_mkdirty(*pudp));
+	pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
+}
+
+static int walk_pud_level(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  struct cpa cpa)
+{
+	unsigned long next;
+	pud_t *pudp;
+	int rc = 0;
+
+	pudp = pud_offset(pgd, addr);
+	do {
+		if (pud_none(*pudp))
+			return -EINVAL;
+		next = pud_addr_end(addr, end);
+		if (pud_large(*pudp)) {
+			if (addr & ~PUD_MASK || addr + PUD_SIZE > next) {
+				rc = split_pud_page(pudp, addr);
+				if (rc)
+					break;
+				continue;
+			}
+			modify_pud_page(pudp, addr, cpa);
+		} else {
+			rc = walk_pmd_level(pudp, addr, next, cpa);
+		}
+		pudp++;
+		addr = next;
+		cond_resched();
+	} while (addr < end && !rc);
+	return rc;
+}
+
+static DEFINE_MUTEX(cpa_mutex);
+
+static int change_page_attr(unsigned long addr, unsigned long end,
+			    struct cpa cpa)
+{
+	unsigned long next;
+	int rc = -EINVAL;
+	pgd_t *pgdp;
+
+	if (end >= MODULES_END)
+		return -EINVAL;
+	mutex_lock(&cpa_mutex);
+	pgdp = pgd_offset_k(addr);
+	do {
+		if (pgd_none(*pgdp))
+			break;
+		next = pgd_addr_end(addr, end);
+		rc = walk_pud_level(pgdp, addr, next, cpa);
+		if (rc)
+			break;
+		cond_resched();
+	} while (pgdp++, addr = next, addr < end && !rc);
+	mutex_unlock(&cpa_mutex);
+	return rc;
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
 {
-	change_page_attr(addr, numpages, pte_wrprotect);
-	return 0;
+	struct cpa cpa = {
+		.set_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
-	change_page_attr(addr, numpages, pte_mkwrite);
-	return 0;
+	struct cpa cpa = {
+		.clear_ro = 1,
+	};
+
+	addr &= PAGE_MASK;
+	return change_page_attr(addr, addr + numpages * PAGE_SIZE, cpa);
 }
 
 /* not possible */
@@ -138,7 +337,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 		nr = min(numpages - i, nr);
 		if (enable) {
 			for (j = 0; j < nr; j++) {
-				pte_val(*pte) = __pa(address);
+				pte_val(*pte) = address | pgprot_val(PAGE_KERNEL);
 				address += PAGE_SIZE;
 				pte++;
 			}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9f0ce0e6eeb4..b98d1a152d46 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -27,40 +27,37 @@
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
-	int active, count;
 	pte_t old;
 
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__ptep_ipte_local(addr, ptep);
 	else
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pte_t *ptep)
 {
-	int active, count;
 	pte_t old;
 
 	old = *ptep;
 	if (unlikely(pte_val(old) & _PAGE_INVALID))
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
 		__ptep_ipte(addr, ptep);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
@@ -70,7 +67,6 @@ static inline pgste_t pgste_get_lock(pte_t *ptep)
 #ifdef CONFIG_PGSTE
 	unsigned long old;
 
-	preempt_disable();
 	asm(
 		"	lg	%0,%2\n"
 		"0:	lgr	%1,%0\n"
@@ -93,7 +89,6 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
 		: "=Q" (ptep[PTRS_PER_PTE])
 		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
 		: "cc", "memory");
-	preempt_enable();
 #endif
 }
 
@@ -230,9 +225,11 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_direct(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(ptep_xchg_direct);
@@ -243,9 +240,11 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(ptep_xchg_lazy);
@@ -256,6 +255,7 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 	pte_t old;
 
+	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
 	old = ptep_flush_lazy(mm, addr, ptep);
 	if (mm_has_pgste(mm)) {
@@ -279,13 +279,13 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 	} else {
 		*ptep = pte;
 	}
+	preempt_enable();
 }
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
-	int active, count;
 	pmd_t old;
 
 	old = *pmdp;
@@ -295,36 +295,34 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 		__pmdp_csp(pmdp);
 		return old;
 	}
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
 		__pmdp_idte_local(addr, pmdp);
 	else
 		__pmdp_idte(addr, pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 				    unsigned long addr, pmd_t *pmdp)
 {
-	int active, count;
 	pmd_t old;
 
 	old = *pmdp;
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
-	active = (mm == current->active_mm) ? 1 : 0;
-	count = atomic_add_return(0x10000, &mm->context.attach_count);
-	if ((count & 0xffff) <= active) {
+	atomic_inc(&mm->context.flush_count);
+	if (cpumask_equal(&mm->context.cpu_attach_mask,
+			  cpumask_of(smp_processor_id()))) {
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
 		__pmdp_idte(addr, pmdp);
 	else
 		__pmdp_csp(pmdp);
-	atomic_sub(0x10000, &mm->context.attach_count);
+	atomic_dec(&mm->context.flush_count);
 	return old;
 }
 
@@ -333,8 +331,10 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
 {
 	pmd_t old;
 
+	preempt_disable();
 	old = pmdp_flush_direct(mm, addr, pmdp);
 	*pmdp = new;
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(pmdp_xchg_direct);
@@ -344,12 +344,53 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 {
 	pmd_t old;
 
+	preempt_disable();
 	old = pmdp_flush_lazy(mm, addr, pmdp);
 	*pmdp = new;
+	preempt_enable();
 	return old;
 }
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
+		return old;
+	if (!MACHINE_HAS_IDTE) {
+		/*
+		 * Invalid bit position is the same for pmd and pud, so we can
+		 * re-use _pmd_csp() here
+		 */
+		__pmdp_csp((pmd_t *) pudp);
+		return old;
+	}
+	atomic_inc(&mm->context.flush_count);
+	if (MACHINE_HAS_TLB_LC &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__pudp_idte_local(addr, pudp);
+	else
+		__pudp_idte(addr, pudp);
+	atomic_dec(&mm->context.flush_count);
+	return old;
+}
+
+pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+		       pud_t *pudp, pud_t new)
+{
+	pud_t old;
+
+	preempt_disable();
+	old = pudp_flush_direct(mm, addr, pudp);
+	*pudp = new;
+	preempt_enable();
+	return old;
+}
+EXPORT_SYMBOL(pudp_xchg_direct);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 				pgtable_t pgtable)
@@ -398,20 +439,24 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	pgste_t pgste;
 
 	/* the mm_has_pgste() check is done in set_pte_at() */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
 	pgste_set_key(ptep, pgste, entry, mm);
 	pgste = pgste_set_pte(ptep, pgste, entry);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	pgste_t pgste;
 
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) |= PGSTE_IN_BIT;
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
@@ -434,6 +479,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	pte_t pte;
 
 	/* Zap unused and logically-zero pages */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
 	pte = *ptep;
@@ -446,6 +492,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	if (reset)
 		pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
@@ -454,6 +501,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	pgste_t pgste;
 
 	/* Clear storage key */
+	preempt_disable();
 	pgste = pgste_get_lock(ptep);
 	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
 			      PGSTE_GR_BIT | PGSTE_GC_BIT);
@@ -461,6 +509,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
 		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
 	pgste_set_unlock(ptep, pgste);
+	preempt_enable();
 }
 
 /*
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index d48cf25cfe99..1848292766ef 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -11,6 +11,7 @@
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
+#include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
@@ -29,9 +30,11 @@ static LIST_HEAD(mem_segs);
 
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
+	unsigned long size = PAGE_SIZE << order;
+
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+	return alloc_bootmem_align(size, size);
 }
 
 static inline pud_t *vmem_pud_alloc(void)
@@ -45,7 +48,7 @@ static inline pud_t *vmem_pud_alloc(void)
 	return pud;
 }
 
-static inline pmd_t *vmem_pmd_alloc(void)
+pmd_t *vmem_pmd_alloc(void)
 {
 	pmd_t *pmd = NULL;
 
@@ -56,7 +59,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
 	return pmd;
 }
 
-static pte_t __ref *vmem_pte_alloc(void)
+pte_t __ref *vmem_pte_alloc(void)
 {
 	pte_t *pte;
 
@@ -75,8 +78,9 @@ static pte_t __ref *vmem_pte_alloc(void)
 /*
  * Add a physical memory range to the 1:1 mapping.
  */
-static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
+static int vmem_add_mem(unsigned long start, unsigned long size)
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
@@ -85,6 +89,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 	pte_t *pt_dir;
 	int ret = -ENOMEM;
 
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
@@ -97,10 +102,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
 		     !debug_pagealloc_enabled()) {
-			pud_val(*pu_dir) = __pa(address) |
-				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
-				(ro ? _REGION_ENTRY_PROTECT : 0);
+			pud_val(*pu_dir) = address | pgprot_val(REGION3_KERNEL);
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 		}
 		if (pud_none(*pu_dir)) {
@@ -113,11 +117,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
 		    !debug_pagealloc_enabled()) {
-			pmd_val(*pm_dir) = __pa(address) |
-				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
-				_SEGMENT_ENTRY_YOUNG |
-				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
+			pmd_val(*pm_dir) = address | pgprot_val(SEGMENT_KERNEL);
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 		}
 		if (pmd_none(*pm_dir)) {
@@ -128,12 +130,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = __pa(address) |
-			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+		pte_val(*pt_dir) = address |  pgprot_val(PAGE_KERNEL);
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	ret = 0;
 out:
+	update_page_count(PG_DIRECT_MAP_4K, pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, pages2g);
 	return ret;
 }
 
@@ -143,15 +148,15 @@ out:
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
+	unsigned long pages4k, pages1m, pages2g;
 	unsigned long end = start + size;
 	unsigned long address = start;
 	pgd_t *pg_dir;
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
-	pte_t  pte;
 
-	pte_val(pte) = _PAGE_INVALID;
+	pages4k = pages1m = pages2g = 0;
 	while (address < end) {
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
@@ -166,6 +171,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pud_large(*pu_dir)) {
 			pud_clear(pu_dir);
 			address += PUD_SIZE;
+			pages2g++;
 			continue;
 		}
 		pm_dir = pmd_offset(pu_dir, address);
@@ -176,13 +182,18 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
 		if (pmd_large(*pm_dir)) {
 			pmd_clear(pm_dir);
 			address += PMD_SIZE;
+			pages1m++;
 			continue;
 		}
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		*pt_dir = pte;
+		pte_clear(&init_mm, address, pt_dir);
 		address += PAGE_SIZE;
+		pages4k++;
 	}
 	flush_tlb_kernel_range(start, end);
+	update_page_count(PG_DIRECT_MAP_4K, -pages4k);
+	update_page_count(PG_DIRECT_MAP_1M, -pages1m);
+	update_page_count(PG_DIRECT_MAP_2G, -pages2g);
 }
 
 /*
@@ -341,7 +352,7 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
 	if (ret)
 		goto out_free;
 
-	ret = vmem_add_mem(start, size, 0);
+	ret = vmem_add_mem(start, size);
 	if (ret)
 		goto out_remove;
 	goto out;
@@ -362,31 +373,13 @@ out:
  */
 void __init vmem_map_init(void)
 {
-	unsigned long ro_start, ro_end;
+	unsigned long size = _eshared - _stext;
 	struct memblock_region *reg;
-	phys_addr_t start, end;
 
-	ro_start = PFN_ALIGN((unsigned long)&_stext);
-	ro_end = (unsigned long)&_eshared & PAGE_MASK;
-	for_each_memblock(memory, reg) {
-		start = reg->base;
-		end = reg->base + reg->size;
-		if (start >= ro_end || end <= ro_start)
-			vmem_add_mem(start, end - start, 0);
-		else if (start >= ro_start && end <= ro_end)
-			vmem_add_mem(start, end - start, 1);
-		else if (start >= ro_start) {
-			vmem_add_mem(start, ro_end - start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		} else if (end < ro_end) {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, end - ro_start, 1);
-		} else {
-			vmem_add_mem(start, ro_start - start, 0);
-			vmem_add_mem(ro_start, ro_end - ro_start, 1);
-			vmem_add_mem(ro_end, end - ro_end, 0);
-		}
-	}
+	for_each_memblock(memory, reg)
+		vmem_add_mem(reg->base, reg->size);
+	set_memory_ro((unsigned long)_stext, size >> PAGE_SHIFT);
+	pr_info("Write protected kernel read-only data: %luk\n", size >> 10);
 }
 
 /*
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 828d0695d0d4..fbc394e16b2c 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -34,7 +34,8 @@
 #define DIST_CORE	1
 #define DIST_MC		2
 #define DIST_BOOK	3
-#define DIST_MAX	4
+#define DIST_DRAWER	4
+#define DIST_MAX	5
 
 /* Node distance reported to common code */
 #define EMU_NODE_DIST	10
@@ -43,7 +44,7 @@
 #define NODE_ID_FREE	-1
 
 /* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, NODE, TOPOLOGY};
+enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
 
 /* The two toptree IDs */
 enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
@@ -114,6 +115,14 @@ static int cores_free(struct toptree *tree)
  */
 static struct toptree *core_node(struct toptree *core)
 {
+	return core->parent->parent->parent->parent;
+}
+
+/*
+ * Return drawer of core
+ */
+static struct toptree *core_drawer(struct toptree *core)
+{
 	return core->parent->parent->parent;
 }
 
@@ -138,6 +147,8 @@ static struct toptree *core_mc(struct toptree *core)
  */
 static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
 {
+	if (core_drawer(core1)->id != core_drawer(core2)->id)
+		return DIST_DRAWER;
 	if (core_book(core1)->id != core_book(core2)->id)
 		return DIST_BOOK;
 	if (core_mc(core1)->id != core_mc(core2)->id)
@@ -262,6 +273,8 @@ static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
 	struct toptree *core;
 
 	/* Always try to move perfectly fitting structures first */
+	move_level_to_numa(numa, phys, DRAWER, true);
+	move_level_to_numa(numa, phys, DRAWER, false);
 	move_level_to_numa(numa, phys, BOOK, true);
 	move_level_to_numa(numa, phys, BOOK, false);
 	move_level_to_numa(numa, phys, MC, true);
@@ -335,7 +348,7 @@ static struct toptree *toptree_to_numa(struct toptree *phys)
  */
 static struct toptree *toptree_from_topology(void)
 {
-	struct toptree *phys, *node, *book, *mc, *core;
+	struct toptree *phys, *node, *drawer, *book, *mc, *core;
 	struct cpu_topology_s390 *top;
 	int cpu;
 
@@ -344,10 +357,11 @@ static struct toptree *toptree_from_topology(void)
 	for_each_online_cpu(cpu) {
 		top = &per_cpu(cpu_topology, cpu);
 		node = toptree_get_child(phys, 0);
-		book = toptree_get_child(node, top->book_id);
+		drawer = toptree_get_child(node, top->drawer_id);
+		book = toptree_get_child(drawer, top->book_id);
 		mc = toptree_get_child(book, top->socket_id);
 		core = toptree_get_child(mc, top->core_id);
-		if (!book || !mc || !core)
+		if (!drawer || !book || !mc || !core)
 			panic("NUMA emulation could not allocate memory");
 		cpumask_set_cpu(cpu, &core->mask);
 		toptree_update_mask(mc);
@@ -368,6 +382,7 @@ static void topology_add_core(struct toptree *core)
 		cpumask_copy(&top->thread_mask, &core->mask);
 		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
 		cpumask_copy(&top->book_mask, &core_book(core)->mask);
+		cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
 		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
 		top->node_id = core_node(core)->id;
 	}
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 496e4a7ee00e..e9dd41b0b8d3 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -7,4 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		timer_int.o )
 
 oprofile-y :=	$(DRIVER_OBJS) init.o
-oprofile-y +=	hwsampler.o
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
deleted file mode 100644
index ff9b4eb34589..000000000000
--- a/arch/s390/oprofile/hwsampler.c
+++ /dev/null
@@ -1,1178 +0,0 @@
-/*
- * Copyright IBM Corp. 2010
- * Author: Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#include <linux/kernel_stat.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/semaphore.h>
-#include <linux/oom.h>
-#include <linux/oprofile.h>
-
-#include <asm/facility.h>
-#include <asm/cpu_mf.h>
-#include <asm/irq.h>
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define MAX_NUM_SDB 511
-#define MIN_NUM_SDB 1
-
-DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-
-struct hws_execute_parms {
-	void *buffer;
-	signed int rc;
-};
-
-DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
-EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
-
-static DEFINE_MUTEX(hws_sem);
-static DEFINE_MUTEX(hws_sem_oom);
-
-static unsigned char hws_flush_all;
-static unsigned int hws_oom;
-static unsigned int hws_alert;
-static struct workqueue_struct *hws_wq;
-
-static unsigned int hws_state;
-enum {
-	HWS_INIT = 1,
-	HWS_DEALLOCATED,
-	HWS_STOPPED,
-	HWS_STARTED,
-	HWS_STOPPING };
-
-/* set to 1 if called by kernel during memory allocation */
-static unsigned char oom_killer_was_active;
-/* size of SDBT and SDB as of allocate API */
-static unsigned long num_sdbt = 100;
-static unsigned long num_sdb = 511;
-/* sampling interval (machine cycles) */
-static unsigned long interval;
-
-static unsigned long min_sampler_rate;
-static unsigned long max_sampler_rate;
-
-static void execute_qsi(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = qsi(ep->buffer);
-}
-
-static void execute_ssctl(void *parms)
-{
-	struct hws_execute_parms *ep = parms;
-
-	ep->rc = lsctl(ep->buffer);
-}
-
-static int smp_ctl_ssctl_stop(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 0;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc) {
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-		dump_stack();
-	}
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.es || cb->qsi.cs) {
-		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
-		dump_stack();
-	}
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_deactivate(int cpu)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 0;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	if (cb->qsi.cs)
-		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
-
-	return rc;
-}
-
-static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
-{
-	int rc;
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	cb->ssctl.h = 1;
-	cb->ssctl.tear = cb->first_sdbt;
-	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
-	cb->ssctl.interval = interval;
-	cb->ssctl.es = 1;
-	cb->ssctl.cs = 1;
-
-	ep.buffer = &cb->ssctl;
-	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
-	rc = ep.rc;
-	if (rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-	if (ep.rc)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
-
-	return rc;
-}
-
-static int smp_ctl_qsi(int cpu)
-{
-	struct hws_execute_parms ep;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	ep.buffer = &cb->qsi;
-	smp_call_function_single(cpu, execute_qsi, &ep, 1);
-
-	return ep.rc;
-}
-
-static void hws_ext_handler(struct ext_code ext_code,
-			    unsigned int param32, unsigned long param64)
-{
-	struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer);
-
-	if (!(param32 & CPU_MF_INT_SF_MASK))
-		return;
-
-	if (!hws_alert)
-		return;
-
-	inc_irq_stat(IRQEXT_CMS);
-	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
-
-	if (hws_wq)
-		queue_work(hws_wq, &cb->worker);
-}
-
-static void worker(struct work_struct *work);
-
-static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
-				unsigned long *dear);
-
-static void init_all_cpu_buffers(void)
-{
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		memset(cb, 0, sizeof(struct hws_cpu_buffer));
-	}
-}
-
-static void prepare_cpu_buffers(void)
-{
-	struct hws_cpu_buffer *cb;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		atomic_set(&cb->ext_params, 0);
-		cb->worker_entry = 0;
-		cb->sample_overflow = 0;
-		cb->req_alert = 0;
-		cb->incorrect_sdbt_entry = 0;
-		cb->invalid_entry_address = 0;
-		cb->loss_of_sample_data = 0;
-		cb->sample_auth_change_alert = 0;
-		cb->finish = 0;
-		cb->oom = 0;
-		cb->stop_mode = 0;
-	}
-}
-
-/*
- * allocate_sdbt() - allocate sampler memory
- * @cpu: the cpu for which sampler memory is allocated
- *
- * A 4K page is allocated for each requested SDBT.
- * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
- * Set ALERT_REQ mask in each SDBs trailer.
- * Returns zero if successful, <0 otherwise.
- */
-static int allocate_sdbt(int cpu)
-{
-	int j, k, rc;
-	unsigned long *sdbt;
-	unsigned long  sdb;
-	unsigned long *tail;
-	unsigned long *trailer;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->first_sdbt)
-		return -EINVAL;
-
-	sdbt = NULL;
-	tail = sdbt;
-
-	for (j = 0; j < num_sdbt; j++) {
-		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
-
-		mutex_lock(&hws_sem_oom);
-		/* OOM killer might have been activated */
-		barrier();
-		if (oom_killer_was_active || !sdbt) {
-			if (sdbt)
-				free_page((unsigned long)sdbt);
-
-			goto allocate_sdbt_error;
-		}
-		if (cb->first_sdbt == 0)
-			cb->first_sdbt = (unsigned long)sdbt;
-
-		/* link current page to tail of chain */
-		if (tail)
-			*tail = (unsigned long)(void *)sdbt + 1;
-
-		mutex_unlock(&hws_sem_oom);
-
-		for (k = 0; k < num_sdb; k++) {
-			/* get and set SDB page */
-			sdb = get_zeroed_page(GFP_KERNEL);
-
-			mutex_lock(&hws_sem_oom);
-			/* OOM killer might have been activated */
-			barrier();
-			if (oom_killer_was_active || !sdb) {
-				if (sdb)
-					free_page(sdb);
-
-				goto allocate_sdbt_error;
-			}
-			*sdbt = sdb;
-			trailer = trailer_entry_ptr(*sdbt);
-			*trailer = SDB_TE_ALERT_REQ_MASK;
-			sdbt++;
-			mutex_unlock(&hws_sem_oom);
-		}
-		tail = sdbt;
-	}
-	mutex_lock(&hws_sem_oom);
-	if (oom_killer_was_active)
-		goto allocate_sdbt_error;
-
-	rc = 0;
-	if (tail)
-		*tail = (unsigned long)
-			((void *)cb->first_sdbt) + 1;
-
-allocate_sdbt_exit:
-	mutex_unlock(&hws_sem_oom);
-	return rc;
-
-allocate_sdbt_error:
-	rc = -ENOMEM;
-	goto allocate_sdbt_exit;
-}
-
-/*
- * deallocate_sdbt() - deallocate all sampler memory
- *
- * For each online CPU all SDBT trees are deallocated.
- * Returns the number of freed pages.
- */
-static int deallocate_sdbt(void)
-{
-	int cpu;
-	int counter;
-
-	counter = 0;
-
-	for_each_online_cpu(cpu) {
-		unsigned long start;
-		unsigned long sdbt;
-		unsigned long *curr;
-		struct hws_cpu_buffer *cb;
-
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->first_sdbt)
-			continue;
-
-		sdbt = cb->first_sdbt;
-		curr = (unsigned long *) sdbt;
-		start = sdbt;
-
-		/* we'll free the SDBT after all SDBs are processed... */
-		while (1) {
-			if (!*curr || !sdbt)
-				break;
-
-			/* watch for link entry reset if found */
-			if (is_link_entry(curr)) {
-				curr = get_next_sdbt(curr);
-				if (sdbt)
-					free_page(sdbt);
-
-				/* we are done if we reach the start */
-				if ((unsigned long) curr == start)
-					break;
-				else
-					sdbt = (unsigned long) curr;
-			} else {
-				/* process SDB pointer */
-				if (*curr) {
-					free_page(*curr);
-					curr++;
-				}
-			}
-			counter++;
-		}
-		cb->first_sdbt = 0;
-	}
-	return counter;
-}
-
-static int start_sampling(int cpu)
-{
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
-		goto start_exit;
-	}
-
-	rc = -EINVAL;
-	if (!cb->qsi.es) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
-		goto start_exit;
-	}
-
-	if (!cb->qsi.cs) {
-		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
-		goto start_exit;
-	}
-
-	printk(KERN_INFO
-		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
-		cpu, interval);
-
-	rc = 0;
-
-start_exit:
-	return rc;
-}
-
-static int stop_sampling(int cpu)
-{
-	unsigned long v;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = smp_ctl_qsi(cpu);
-	WARN_ON(rc);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (!rc && !cb->qsi.es)
-		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
-
-	rc = smp_ctl_ssctl_stop(cpu);
-	if (rc) {
-		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
-				cpu, rc);
-		goto stop_exit;
-	}
-
-	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
-
-stop_exit:
-	v = cb->req_alert;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->loss_of_sample_data;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->invalid_entry_address;
-	if (v)
-		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->incorrect_sdbt_entry;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
-				" count=%lu.\n", cpu, v);
-
-	v = cb->sample_auth_change_alert;
-	if (v)
-		printk(KERN_ERR
-				"hwsampler: CPU %d CPUMF Sample authorization change,"
-				" count=%lu.\n", cpu, v);
-
-	return rc;
-}
-
-static int check_hardware_prerequisites(void)
-{
-	if (!test_facility(68))
-		return -EOPNOTSUPP;
-	return 0;
-}
-/*
- * hws_oom_callback() - the OOM callback function
- *
- * In case the callback is invoked during memory allocation for the
- *  hw sampler, all obtained memory is deallocated and a flag is set
- *  so main sampler memory allocation can exit with a failure code.
- * In case the callback is invoked during sampling the hw sampler
- *  is deactivated for all CPUs.
- */
-static int hws_oom_callback(struct notifier_block *nfb,
-	unsigned long dummy, void *parm)
-{
-	unsigned long *freed;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	freed = parm;
-
-	mutex_lock(&hws_sem_oom);
-
-	if (hws_state == HWS_DEALLOCATED) {
-		/* during memory allocation */
-		if (oom_killer_was_active == 0) {
-			oom_killer_was_active = 1;
-			*freed += deallocate_sdbt();
-		}
-	} else {
-		int i;
-		cpu = get_cpu();
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-		if (!cb->oom) {
-			for_each_online_cpu(i) {
-				smp_ctl_ssctl_deactivate(i);
-				cb->oom = 1;
-			}
-			cb->finish = 1;
-
-			printk(KERN_INFO
-				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
-				cpu);
-		}
-	}
-
-	mutex_unlock(&hws_sem_oom);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block hws_oom_notifier = {
-	.notifier_call = hws_oom_callback
-};
-
-static int hws_cpu_callback(struct notifier_block *nfb,
-	unsigned long action, void *hcpu)
-{
-	/* We do not have sampler space available for all possible CPUs.
-	   All CPUs should be online when hw sampling is activated. */
-	return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
-}
-
-static struct notifier_block hws_cpu_notifier = {
-	.notifier_call = hws_cpu_callback
-};
-
-/**
- * hwsampler_deactivate() - set hardware sampling temporarily inactive
- * @cpu:  specifies the CPU to be set inactive.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deactivate(unsigned int cpu)
-{
-	/*
-	 * Deactivate hw sampling temporarily and flush the buffer
-	 * by pushing all the pending samples to oprofile buffer.
-	 *
-	 * This function can be called under one of the following conditions:
-	 *     Memory unmap, task is exiting.
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.cs) {
-			rc = smp_ctl_ssctl_deactivate(cpu);
-			if (rc) {
-				printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
-				cb->finish = 1;
-				hws_state = HWS_STOPPING;
-			} else  {
-				hws_flush_all = 1;
-				/* Add work to queue to read pending samples.*/
-				queue_work_on(cpu, hws_wq, &cb->worker);
-			}
-		}
-	}
-	mutex_unlock(&hws_sem);
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	return rc;
-}
-
-/**
- * hwsampler_activate() - activate/resume hardware sampling which was deactivated
- * @cpu:  specifies the CPU to be set active.
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_activate(unsigned int cpu)
-{
-	/*
-	 * Re-activate hw sampling. This should be called in pair with
-	 * hwsampler_deactivate().
-	 */
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	mutex_lock(&hws_sem);
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	if (hws_state == HWS_STARTED) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (!cb->qsi.cs) {
-			hws_flush_all = 0;
-			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
-			if (rc) {
-				printk(KERN_ERR
-				"CPU %d, CPUMF activate sampling failed.\n",
-					 cpu);
-			}
-		}
-	}
-
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-static int check_qsi_on_setup(void)
-{
-	int rc;
-	unsigned int cpu;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (rc)
-			return -EOPNOTSUPP;
-
-		if (!cb->qsi.as) {
-			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
-			return -EINVAL;
-		}
-
-		if (cb->qsi.es) {
-			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				return -EINVAL;
-
-			printk(KERN_INFO
-				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
-		}
-	}
-	return 0;
-}
-
-static int check_qsi_on_start(void)
-{
-	unsigned int cpu;
-	int rc;
-	struct hws_cpu_buffer *cb;
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-
-		if (!cb->qsi.as)
-			return -EINVAL;
-
-		if (cb->qsi.es)
-			return -EINVAL;
-
-		if (cb->qsi.cs)
-			return -EINVAL;
-	}
-	return 0;
-}
-
-static void worker_on_start(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	cb->worker_entry = cb->first_sdbt;
-}
-
-static int worker_check_error(unsigned int cpu, int ext_params)
-{
-	int rc;
-	unsigned long *sdbt;
-	struct hws_cpu_buffer *cb;
-
-	rc = 0;
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	if (!sdbt || !*sdbt)
-		return -EINVAL;
-
-	if (ext_params & CPU_MF_INT_SF_PRA)
-		cb->req_alert++;
-
-	if (ext_params & CPU_MF_INT_SF_LSDA)
-		cb->loss_of_sample_data++;
-
-	if (ext_params & CPU_MF_INT_SF_IAE) {
-		cb->invalid_entry_address++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_ISE) {
-		cb->incorrect_sdbt_entry++;
-		rc = -EINVAL;
-	}
-
-	if (ext_params & CPU_MF_INT_SF_SACA) {
-		cb->sample_auth_change_alert++;
-		rc = -EINVAL;
-	}
-
-	return rc;
-}
-
-static void worker_on_finish(unsigned int cpu)
-{
-	int rc, i;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	if (cb->finish) {
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (cb->qsi.es) {
-			printk(KERN_INFO
-				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
-				cpu);
-			rc = smp_ctl_ssctl_stop(cpu);
-			if (rc)
-				printk(KERN_INFO
-					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
-					cpu);
-
-			for_each_online_cpu(i) {
-				if (i == cpu)
-					continue;
-				if (!cb->finish) {
-					cb->finish = 1;
-					queue_work_on(i, hws_wq,
-						&cb->worker);
-				}
-			}
-		}
-	}
-}
-
-static void worker_on_interrupt(unsigned int cpu)
-{
-	unsigned long *sdbt;
-	unsigned char done;
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	sdbt = (unsigned long *) cb->worker_entry;
-
-	done = 0;
-	/* do not proceed if stop was entered,
-	 * forget the buffers not yet processed */
-	while (!done && !cb->stop_mode) {
-		unsigned long *trailer;
-		struct hws_trailer_entry *te;
-		unsigned long *dear = 0;
-
-		trailer = trailer_entry_ptr(*sdbt);
-		/* leave loop if no more work to do */
-		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
-			done = 1;
-			if (!hws_flush_all)
-				continue;
-		}
-
-		te = (struct hws_trailer_entry *)trailer;
-		cb->sample_overflow += te->overflow;
-
-		add_samples_to_oprofile(cpu, sdbt, dear);
-
-		/* reset trailer */
-		xchg((unsigned char *) te, 0x40);
-
-		/* advance to next sdb slot in current sdbt */
-		sdbt++;
-		/* in case link bit is set use address w/o link bit */
-		if (is_link_entry(sdbt))
-			sdbt = get_next_sdbt(sdbt);
-
-		cb->worker_entry = (unsigned long)sdbt;
-	}
-}
-
-static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
-		unsigned long *dear)
-{
-	struct hws_basic_entry *sample_data_ptr;
-	unsigned long *trailer;
-
-	trailer = trailer_entry_ptr(*sdbt);
-	if (dear) {
-		if (dear > trailer)
-			return;
-		trailer = dear;
-	}
-
-	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
-
-	while ((unsigned long *)sample_data_ptr < trailer) {
-		struct pt_regs *regs = NULL;
-		struct task_struct *tsk = NULL;
-
-		/*
-		 * Check sampling mode, 1 indicates basic (=customer) sampling
-		 * mode.
-		 */
-		if (sample_data_ptr->def != 1) {
-			/* sample slot is not yet written */
-			break;
-		} else {
-			/* make sure we don't use it twice,
-			 * the next time the sampler will set it again */
-			sample_data_ptr->def = 0;
-		}
-
-		/* Get pt_regs. */
-		if (sample_data_ptr->P == 1) {
-			/* userspace sample */
-			unsigned int pid = sample_data_ptr->prim_asn;
-			if (!counter_config.user)
-				goto skip_sample;
-			rcu_read_lock();
-			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
-			if (tsk)
-				regs = task_pt_regs(tsk);
-			rcu_read_unlock();
-		} else {
-			/* kernelspace sample */
-			if (!counter_config.kernel)
-				goto skip_sample;
-			regs = task_pt_regs(current);
-		}
-
-		mutex_lock(&hws_sem);
-		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
-				!sample_data_ptr->P, tsk);
-		mutex_unlock(&hws_sem);
-	skip_sample:
-		sample_data_ptr++;
-	}
-}
-
-static void worker(struct work_struct *work)
-{
-	unsigned int cpu;
-	int ext_params;
-	struct hws_cpu_buffer *cb;
-
-	cb = container_of(work, struct hws_cpu_buffer, worker);
-	cpu = smp_processor_id();
-	ext_params = atomic_xchg(&cb->ext_params, 0);
-
-	if (!cb->worker_entry)
-		worker_on_start(cpu);
-
-	if (worker_check_error(cpu, ext_params))
-		return;
-
-	if (!cb->finish)
-		worker_on_interrupt(cpu);
-
-	if (cb->finish)
-		worker_on_finish(cpu);
-}
-
-/**
- * hwsampler_allocate() - allocate memory for the hardware sampler
- * @sdbt:  number of SDBTs per online CPU (must be > 0)
- * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
-{
-	int cpu, rc;
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_DEALLOCATED)
-		goto allocate_exit;
-
-	if (sdbt < 1)
-		goto allocate_exit;
-
-	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
-		goto allocate_exit;
-
-	num_sdbt = sdbt;
-	num_sdb = sdb;
-
-	oom_killer_was_active = 0;
-	register_oom_notifier(&hws_oom_notifier);
-
-	for_each_online_cpu(cpu) {
-		if (allocate_sdbt(cpu)) {
-			unregister_oom_notifier(&hws_oom_notifier);
-			goto allocate_error;
-		}
-	}
-	unregister_oom_notifier(&hws_oom_notifier);
-	if (oom_killer_was_active)
-		goto allocate_error;
-
-	hws_state = HWS_STOPPED;
-	rc = 0;
-
-allocate_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-
-allocate_error:
-	rc = -ENOMEM;
-	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
-	goto allocate_exit;
-}
-
-/**
- * hwsampler_deallocate() - deallocate hardware sampler memory
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_deallocate(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto deallocate_exit;
-
-	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	hws_alert = 0;
-	deallocate_sdbt();
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-deallocate_exit:
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
-
-unsigned long hwsampler_query_min_interval(void)
-{
-	return min_sampler_rate;
-}
-
-unsigned long hwsampler_query_max_interval(void)
-{
-	return max_sampler_rate;
-}
-
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
-{
-	struct hws_cpu_buffer *cb;
-
-	cb = &per_cpu(sampler_cpu_buffer, cpu);
-
-	return cb->sample_overflow;
-}
-
-int hwsampler_setup(void)
-{
-	int rc;
-	int cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state)
-		goto setup_exit;
-
-	hws_state = HWS_INIT;
-
-	init_all_cpu_buffers();
-
-	rc = check_hardware_prerequisites();
-	if (rc)
-		goto setup_exit;
-
-	rc = check_qsi_on_setup();
-	if (rc)
-		goto setup_exit;
-
-	rc = -EINVAL;
-	hws_wq = create_workqueue("hwsampler");
-	if (!hws_wq)
-		goto setup_exit;
-
-	register_cpu_notifier(&hws_cpu_notifier);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		INIT_WORK(&cb->worker, worker);
-		rc = smp_ctl_qsi(cpu);
-		WARN_ON(rc);
-		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
-			if (min_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different min sampler rate values.\n");
-				if (min_sampler_rate < cb->qsi.min_sampl_rate)
-					min_sampler_rate =
-						cb->qsi.min_sampl_rate;
-			} else
-				min_sampler_rate = cb->qsi.min_sampl_rate;
-		}
-		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
-			if (max_sampler_rate) {
-				printk(KERN_WARNING
-					"hwsampler: different max sampler rate values.\n");
-				if (max_sampler_rate > cb->qsi.max_sampl_rate)
-					max_sampler_rate =
-						cb->qsi.max_sampl_rate;
-			} else
-				max_sampler_rate = cb->qsi.max_sampl_rate;
-		}
-	}
-	register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-
-	hws_state = HWS_DEALLOCATED;
-	rc = 0;
-
-setup_exit:
-	mutex_unlock(&hws_sem);
-	return rc;
-}
-
-int hwsampler_shutdown(void)
-{
-	int rc;
-
-	mutex_lock(&hws_sem);
-
-	rc = -EINVAL;
-	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
-		mutex_unlock(&hws_sem);
-
-		if (hws_wq)
-			flush_workqueue(hws_wq);
-
-		mutex_lock(&hws_sem);
-
-		if (hws_state == HWS_STOPPED) {
-			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-			hws_alert = 0;
-			deallocate_sdbt();
-		}
-		if (hws_wq) {
-			destroy_workqueue(hws_wq);
-			hws_wq = NULL;
-		}
-
-		unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
-		hws_state = HWS_INIT;
-		rc = 0;
-	}
-	mutex_unlock(&hws_sem);
-
-	unregister_cpu_notifier(&hws_cpu_notifier);
-
-	return rc;
-}
-
-/**
- * hwsampler_start_all() - start hardware sampling on all online CPUs
- * @rate:  specifies the used interval when samples are taken
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_start_all(unsigned long rate)
-{
-	int rc, cpu;
-
-	mutex_lock(&hws_sem);
-
-	hws_oom = 0;
-
-	rc = -EINVAL;
-	if (hws_state != HWS_STOPPED)
-		goto start_all_exit;
-
-	interval = rate;
-
-	/* fail if rate is not valid */
-	if (interval < min_sampler_rate || interval > max_sampler_rate)
-		goto start_all_exit;
-
-	rc = check_qsi_on_start();
-	if (rc)
-		goto start_all_exit;
-
-	prepare_cpu_buffers();
-
-	for_each_online_cpu(cpu) {
-		rc = start_sampling(cpu);
-		if (rc)
-			break;
-	}
-	if (rc) {
-		for_each_online_cpu(cpu) {
-			stop_sampling(cpu);
-		}
-		goto start_all_exit;
-	}
-	hws_state = HWS_STARTED;
-	rc = 0;
-
-start_all_exit:
-	mutex_unlock(&hws_sem);
-
-	if (rc)
-		return rc;
-
-	register_oom_notifier(&hws_oom_notifier);
-	hws_oom = 1;
-	hws_flush_all = 0;
-	/* now let them in, 1407 CPUMF external interrupts */
-	hws_alert = 1;
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
-	return 0;
-}
-
-/**
- * hwsampler_stop_all() - stop hardware sampling on all online CPUs
- *
- * Returns 0 on success, !0 on failure.
- */
-int hwsampler_stop_all(void)
-{
-	int tmp_rc, rc, cpu;
-	struct hws_cpu_buffer *cb;
-
-	mutex_lock(&hws_sem);
-
-	rc = 0;
-	if (hws_state == HWS_INIT) {
-		mutex_unlock(&hws_sem);
-		return 0;
-	}
-	hws_state = HWS_STOPPING;
-	mutex_unlock(&hws_sem);
-
-	for_each_online_cpu(cpu) {
-		cb = &per_cpu(sampler_cpu_buffer, cpu);
-		cb->stop_mode = 1;
-		tmp_rc = stop_sampling(cpu);
-		if (tmp_rc)
-			rc = tmp_rc;
-	}
-
-	if (hws_wq)
-		flush_workqueue(hws_wq);
-
-	mutex_lock(&hws_sem);
-	if (hws_oom) {
-		unregister_oom_notifier(&hws_oom_notifier);
-		hws_oom = 0;
-	}
-	hws_state = HWS_STOPPED;
-	mutex_unlock(&hws_sem);
-
-	return rc;
-}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
deleted file mode 100644
index a483d06f2fa7..000000000000
--- a/arch/s390/oprofile/hwsampler.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CPUMF HW sampler functions and internal structures
- *
- *    Copyright IBM Corp. 2010
- *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
- */
-
-#ifndef HWSAMPLER_H_
-#define HWSAMPLER_H_
-
-#include <linux/workqueue.h>
-#include <asm/cpu_mf.h>
-
-struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
-{ /* bytes 0 - 7  Bit(s) */
-	unsigned int s:1;           /* 0: maximum buffer indicator       */
-	unsigned int h:1;           /* 1: part. level reserved for VM use*/
-	unsigned long b2_53:52;     /* 2-53: zeros                       */
-	unsigned int es:1;          /* 54: sampling enable control       */
-	unsigned int b55_61:7;      /* 55-61: - zeros                    */
-	unsigned int cs:1;          /* 62: sampling activation control   */
-	unsigned int b63:1;         /* 63: zero                          */
-	unsigned long interval;     /* 8-15: sampling interval           */
-	unsigned long tear;         /* 16-23: TEAR contents              */
-	unsigned long dear;         /* 24-31: DEAR contents              */
-	/* 32-63:                                                        */
-	unsigned long rsvrd1;       /* reserved                          */
-	unsigned long rsvrd2;       /* reserved                          */
-	unsigned long rsvrd3;       /* reserved                          */
-	unsigned long rsvrd4;       /* reserved                          */
-};
-
-struct hws_cpu_buffer {
-	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
-	unsigned long worker_entry;
-	unsigned long sample_overflow;  /* taken from SDB ...            */
-	struct hws_qsi_info_block qsi;
-	struct hws_ssctl_request_block ssctl;
-	struct work_struct worker;
-	atomic_t ext_params;
-	unsigned long req_alert;
-	unsigned long loss_of_sample_data;
-	unsigned long invalid_entry_address;
-	unsigned long incorrect_sdbt_entry;
-	unsigned long sample_auth_change_alert;
-	unsigned int finish:1;
-	unsigned int oom:1;
-	unsigned int stop_mode:1;
-};
-
-int hwsampler_setup(void);
-int hwsampler_shutdown(void);
-int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
-int hwsampler_deallocate(void);
-unsigned long hwsampler_query_min_interval(void);
-unsigned long hwsampler_query_max_interval(void);
-int hwsampler_start_all(unsigned long interval);
-int hwsampler_stop_all(void);
-int hwsampler_deactivate(unsigned int cpu);
-int hwsampler_activate(unsigned int cpu);
-unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
-
-#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 791935a65800..16f4c3960b87 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -10,488 +10,8 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/perf_event.h>
 #include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/module.h>
 #include <asm/processor.h>
-#include <asm/perf_event.h>
-
-#include "../../../drivers/oprofile/oprof.h"
-
-#include "hwsampler.h"
-#include "op_counter.h"
-
-#define DEFAULT_INTERVAL	4127518
-
-#define DEFAULT_SDBT_BLOCKS	1
-#define DEFAULT_SDB_BLOCKS	511
-
-static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
-static unsigned long oprofile_min_interval;
-static unsigned long oprofile_max_interval;
-
-static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
-static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-
-static int hwsampler_enabled;
-static int hwsampler_running;	/* start_mutex must be held to change */
-static int hwsampler_available;
-
-static struct oprofile_operations timer_ops;
-
-struct op_counter_config counter_config;
-
-enum __force_cpu_type {
-	reserved = 0,		/* do not force */
-	timer,
-};
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, struct kernel_param *kp)
-{
-	if (!strcmp(str, "timer")) {
-		force_cpu_type = timer;
-		printk(KERN_INFO "oprofile: forcing timer to be returned "
-		                 "as cpu type\n");
-	} else {
-		force_cpu_type = 0;
-	}
-
-	return 0;
-}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
-MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
-		           "(report cpu_type \"timer\"");
-
-static int __oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
-	if (retval)
-		return retval;
-
-	retval = hwsampler_start_all(oprofile_hw_interval);
-	if (retval)
-		hwsampler_deallocate();
-
-	return retval;
-}
-
-static int oprofile_hwsampler_start(void)
-{
-	int retval;
-
-	hwsampler_running = hwsampler_enabled;
-
-	if (!hwsampler_running)
-		return timer_ops.start();
-
-	retval = perf_reserve_sampling();
-	if (retval)
-		return retval;
-
-	retval = __oprofile_hwsampler_start();
-	if (retval)
-		perf_release_sampling();
-
-	return retval;
-}
-
-static void oprofile_hwsampler_stop(void)
-{
-	if (!hwsampler_running) {
-		timer_ops.stop();
-		return;
-	}
-
-	hwsampler_stop_all();
-	hwsampler_deallocate();
-	perf_release_sampling();
-	return;
-}
-
-/*
- * File ops used for:
- * /dev/oprofile/0/enabled
- * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
- */
-
-static ssize_t hwsampler_read(struct file *file, char __user *buf,
-		size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t hwsampler_write(struct file *file, char const __user *buf,
-		size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = val;
-
-	return count;
-}
-
-static const struct file_operations hwsampler_fops = {
-	.read		= hwsampler_read,
-	.write		= hwsampler_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/count
- * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
- *
- * Make sure that the value is within the hardware range.
- */
-
-static ssize_t hw_interval_read(struct file *file, char __user *buf,
-				size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
-					count, offset);
-}
-
-static ssize_t hw_interval_write(struct file *file, char const __user *buf,
-				 size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	else if (val > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-	else
-		oprofile_hw_interval = val;
-
-	return count;
-}
-
-static const struct file_operations hw_interval_fops = {
-	.read		= hw_interval_read,
-	.write		= hw_interval_write,
-};
-
-/*
- * File ops used for:
- * /dev/oprofile/0/event
- * Only a single event with number 0 is supported with this counter.
- *
- * /dev/oprofile/0/unit_mask
- * This is a dummy file needed by the user space tools.
- * No value other than 0 is accepted or returned.
- */
-
-static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
-				    size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(0, buf, count, offset);
-}
-
-static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
-				     size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-	if (val != 0)
-		return -EINVAL;
-	return count;
-}
-
-static const struct file_operations zero_fops = {
-	.read		= hwsampler_zero_read,
-	.write		= hwsampler_zero_write,
-};
-
-/* /dev/oprofile/0/kernel file ops.  */
-
-static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
-				     size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.kernel,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
-				      size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.kernel = val;
-
-	return count;
-}
-
-static const struct file_operations kernel_fops = {
-	.read		= hwsampler_kernel_read,
-	.write		= hwsampler_kernel_write,
-};
-
-/* /dev/oprofile/0/user file ops. */
-
-static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
-				   size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(counter_config.user,
-					buf, count, offset);
-}
-
-static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
-				    size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	counter_config.user = val;
-
-	return count;
-}
-
-static const struct file_operations user_fops = {
-	.read		= hwsampler_user_read,
-	.write		= hwsampler_user_write,
-};
-
-
-/*
- * File ops used for: /dev/oprofile/timer/enabled
- * The value always has to be the inverted value of hwsampler_enabled. So
- * no separate variable is created. That way we do not need locking.
- */
-
-static ssize_t timer_enabled_read(struct file *file, char __user *buf,
-				  size_t count, loff_t *offset)
-{
-	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
-}
-
-static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
-				   size_t count, loff_t *offset)
-{
-	unsigned long val;
-	int retval;
-
-	if (*offset)
-		return -EINVAL;
-
-	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval <= 0)
-		return retval;
-
-	if (val != 0 && val != 1)
-		return -EINVAL;
-
-	/* Timer cannot be disabled without having hardware sampling.  */
-	if (val == 0 && !hwsampler_available)
-		return -EINVAL;
-
-	if (oprofile_started)
-		/*
-		 * save to do without locking as we set
-		 * hwsampler_running in start() when start_mutex is
-		 * held
-		 */
-		return -EBUSY;
-
-	hwsampler_enabled = !val;
-
-	return count;
-}
-
-static const struct file_operations timer_enabled_fops = {
-	.read		= timer_enabled_read,
-	.write		= timer_enabled_write,
-};
-
-
-static int oprofile_create_hwsampling_files(struct dentry *root)
-{
-	struct dentry *dir;
-
-	dir = oprofilefs_mkdir(root, "timer");
-	if (!dir)
-		return -EINVAL;
-
-	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
-
-	if (!hwsampler_available)
-		return 0;
-
-	/* reinitialize default values */
-	hwsampler_enabled = 1;
-	counter_config.kernel = 1;
-	counter_config.user = 1;
-
-	if (!force_cpu_type) {
-		/*
-		 * Create the counter file system.  A single virtual
-		 * counter is created which can be used to
-		 * enable/disable hardware sampling dynamically from
-		 * user space.  The user space will configure a single
-		 * counter with a single event.  The value of 'event'
-		 * and 'unit_mask' are not evaluated by the kernel code
-		 * and can only be set to 0.
-		 */
-
-		dir = oprofilefs_mkdir(root, "0");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
-		oprofilefs_create_file(dir, "event", &zero_fops);
-		oprofilefs_create_file(dir, "count", &hw_interval_fops);
-		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
-		oprofilefs_create_file(dir, "kernel", &kernel_fops);
-		oprofilefs_create_file(dir, "user", &user_fops);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-
-	} else {
-		/*
-		 * Hardware sampling can be used but the cpu_type is
-		 * forced to timer in order to deal with legacy user
-		 * space tools.  The /dev/oprofile/hwsampling fs is
-		 * provided in that case.
-		 */
-		dir = oprofilefs_mkdir(root, "hwsampling");
-		if (!dir)
-			return -EINVAL;
-
-		oprofilefs_create_file(dir, "hwsampler",
-				       &hwsampler_fops);
-		oprofilefs_create_file(dir, "hw_interval",
-				       &hw_interval_fops);
-		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
-					   &oprofile_min_interval);
-		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
-					   &oprofile_max_interval);
-		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
-					&oprofile_sdbt_blocks);
-	}
-	return 0;
-}
-
-static int oprofile_hwsampler_init(struct oprofile_operations *ops)
-{
-	/*
-	 * Initialize the timer mode infrastructure as well in order
-	 * to be able to switch back dynamically.  oprofile_timer_init
-	 * is not supposed to fail.
-	 */
-	if (oprofile_timer_init(ops))
-		BUG();
-
-	memcpy(&timer_ops, ops, sizeof(timer_ops));
-	ops->create_files = oprofile_create_hwsampling_files;
-
-	/*
-	 * If the user space tools do not support newer cpu types,
-	 * the force_cpu_type module parameter
-	 * can be used to always return \"timer\" as cpu type.
-	 */
-	if (force_cpu_type != timer) {
-		struct cpuid id;
-
-		get_cpu_id (&id);
-
-		switch (id.machine) {
-		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
-		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
-		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
-		case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
-		default: return -ENODEV;
-		}
-	}
-
-	if (hwsampler_setup())
-		return -ENODEV;
-
-	/*
-	 * Query the range for the sampling interval from the
-	 * hardware.
-	 */
-	oprofile_min_interval = hwsampler_query_min_interval();
-	if (oprofile_min_interval == 0)
-		return -ENODEV;
-	oprofile_max_interval = hwsampler_query_max_interval();
-	if (oprofile_max_interval == 0)
-		return -ENODEV;
-
-	/* The initial value should be sane */
-	if (oprofile_hw_interval < oprofile_min_interval)
-		oprofile_hw_interval = oprofile_min_interval;
-	if (oprofile_hw_interval > oprofile_max_interval)
-		oprofile_hw_interval = oprofile_max_interval;
-
-	printk(KERN_INFO "oprofile: System z hardware sampling "
-	       "facility found.\n");
-
-	ops->start = oprofile_hwsampler_start;
-	ops->stop = oprofile_hwsampler_stop;
-
-	return 0;
-}
-
-static void oprofile_hwsampler_exit(void)
-{
-	hwsampler_shutdown();
-}
 
 static int __s390_backtrace(void *data, unsigned long address)
 {
@@ -514,18 +34,9 @@ static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
-
-	/*
-	 * -ENODEV is not reported to the caller.  The module itself
-         * will use the timer mode sampling as fallback and this is
-         * always available.
-	 */
-	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
-
 	return 0;
 }
 
 void oprofile_arch_exit(void)
 {
-	oprofile_hwsampler_exit();
 }
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
deleted file mode 100644
index 61b2531eef17..000000000000
--- a/arch/s390/oprofile/op_counter.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- *   Copyright IBM Corp. 2011
- *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
- *
- * @remark Copyright 2011 OProfile authors
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-struct op_counter_config {
-	/* `enabled' maps to the hwsampler_file variable.  */
-	/* `count' maps to the oprofile_hw_interval variable.  */
-	/* `event' and `unit_mask' are unused. */
-	unsigned long kernel;
-	unsigned long user;
-};
-
-extern struct op_counter_config counter_config;
-
-#endif /* OP_COUNTER_H */
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 1ea8c07eab84..070f1ae5cfad 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -226,7 +226,8 @@ static unsigned long __dma_alloc_iommu(struct device *dev,
 	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
-				start, size, 0, boundary_size, 0);
+				start, size, zdev->start_dma >> PAGE_SHIFT,
+				boundary_size, 0);
 }
 
 static unsigned long dma_alloc_iommu(struct device *dev, int size)
@@ -469,6 +470,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 	 * Also set zdev->end_dma to the actual end address of the usable
 	 * range, instead of the theoretical maximum as reported by hardware.
 	 */
+	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
 	zdev->iommu_size = min3((u64) high_memory,
 				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 				zdev->end_dma - zdev->start_dma + 1);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index fb2a9a560fdc..c2b27ad8e94d 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -145,8 +145,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 	default:
 		break;
 	}
-	if (pdev)
-		pci_dev_put(pdev);
+	pci_dev_put(pdev);
 }
 
 void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 10ca15dcab11..fa8d7d4b9751 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -99,7 +99,7 @@ void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
 }
 
 /* PCI Load */
-static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+static inline int ____pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 {
 	register u64 __req asm("2") = req;
 	register u64 __offset asm("3") = offset;
@@ -116,6 +116,16 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
 		:  "d" (__offset)
 		: "cc");
 	*status = __req >> 24 & 0xff;
+	*data = __data;
+	return cc;
+}
+
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	u64 __data;
+	int cc;
+
+	cc = ____pcilg(&__data, req, offset, status);
 	if (!cc)
 		*data = __data;
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 8b7d7f8e5851..df3c97cb4c99 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -77,6 +77,14 @@ static DEVICE_ATTR_RO(book_siblings);
 static DEVICE_ATTR_RO(book_siblings_list);
 #endif
 
+#ifdef CONFIG_SCHED_DRAWER
+define_id_show_func(drawer_id);
+static DEVICE_ATTR_RO(drawer_id);
+define_siblings_show_func(drawer_siblings, drawer_cpumask);
+static DEVICE_ATTR_RO(drawer_siblings);
+static DEVICE_ATTR_RO(drawer_siblings_list);
+#endif
+
 static struct attribute *default_attrs[] = {
 	&dev_attr_physical_package_id.attr,
 	&dev_attr_core_id.attr,
@@ -89,6 +97,11 @@ static struct attribute *default_attrs[] = {
 	&dev_attr_book_siblings.attr,
 	&dev_attr_book_siblings_list.attr,
 #endif
+#ifdef CONFIG_SCHED_DRAWER
+	&dev_attr_drawer_id.attr,
+	&dev_attr_drawer_siblings.attr,
+	&dev_attr_drawer_siblings_list.attr,
+#endif
 	NULL
 };
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d77ba2f12242..1af94e2d1a25 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -159,6 +159,19 @@ config CRYPTO_GHASH_S390
 
 	  It is available as of z196.
 
+config CRYPTO_CRC32_S390
+	tristate "CRC-32 algorithms"
+	depends on S390
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  Select this option if you want to use hardware accelerated
+	  implementations of CRC algorithms.  With this option, you
+	  can optimize the computation of CRC-32 (IEEE 802.3 Ethernet)
+	  and CRC-32C (Castagnoli).
+
+	  It is available with IBM z13 or later.
+
 config CRYPTO_DEV_MV_CESA
 	tristate "Marvell's Cryptographic Engine"
 	depends on PLAT_ORION
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 42b34cd1f002..fd2eff440098 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -228,7 +228,7 @@ check_XRC (struct ccw1         *de_ccw,
 	data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid'   */
 	data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */
 
-	rc = get_sync_clock(&data->ep_sys_time);
+	rc = get_phys_clock(&data->ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
@@ -339,7 +339,7 @@ static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata,
 	pfxdata->define_extent.ga_extended |= 0x02; /* 'Extended Parameter' */
 	pfxdata->validity.time_stamp = 1;	    /* 'Time Stamp Valid'   */
 
-	rc = get_sync_clock(&pfxdata->define_extent.ep_sys_time);
+	rc = get_phys_clock(&pfxdata->define_extent.ep_sys_time);
 	/* Ignore return code if sync clock is switched off. */
 	if (rc == -EOPNOTSUPP || rc == -EACCES)
 		rc = 0;
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index ef04a9f7a704..7b9c50aa4cc9 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -438,18 +438,9 @@ do_kdgkb_ioctl(struct kbd_data *kbd, struct kbsentry __user *u_kbs,
 			return -EFAULT;
 		if (len > sizeof(u_kbs->kb_string))
 			return -EINVAL;
-		p = kmalloc(len, GFP_KERNEL);
-		if (!p)
-			return -ENOMEM;
-		if (copy_from_user(p, u_kbs->kb_string, len)) {
-			kfree(p);
-			return -EFAULT;
-		}
-		/*
-		 * Make sure the string is terminated by 0. User could have
-		 * modified it between us running strnlen_user() and copying it.
-		 */
-		p[len - 1] = 0;
+		p = memdup_user_nul(u_kbs->kb_string, len);
+		if (IS_ERR(p))
+			return PTR_ERR(p);
 		kfree(kbd->func_table[kb_func]);
 		kbd->func_table[kb_func] = p;
 		break;
diff --git a/drivers/s390/char/sclp_con.c b/drivers/s390/char/sclp_con.c
index 5880def98fc1..6037bc87e767 100644
--- a/drivers/s390/char/sclp_con.c
+++ b/drivers/s390/char/sclp_con.c
@@ -319,7 +319,8 @@ sclp_console_init(void)
 	int i;
 	int rc;
 
-	if (!CONSOLE_IS_SCLP)
+	/* SCLP consoles are handled together */
+	if (!(CONSOLE_IS_SCLP || CONSOLE_IS_VT220))
 		return 0;
 	rc = sclp_rw_init();
 	if (rc)
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
index 2ced50ccca63..1406fb688a26 100644
--- a/drivers/s390/char/sclp_config.c
+++ b/drivers/s390/char/sclp_config.c
@@ -47,7 +47,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work)
 	int cpu;
 	struct device *dev;
 
-	s390_adjust_jiffies();
+	s390_update_cpu_mhz();
 	pr_info("CPU capability may have changed\n");
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 5043ecfa1fbc..16992e2a40ad 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -185,7 +185,7 @@ static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf,
 {
 	if (ipl_block) {
 		diag308(DIAG308_SET, ipl_block);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 	}
 	return count;
 }
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 50597f9522fe..e96aced58627 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -47,8 +47,6 @@ static DEFINE_MUTEX(info_lock);
 /* Time after which channel-path status may be outdated. */
 static unsigned long chp_info_expires;
 
-/* Workqueue to perform pending configure tasks. */
-static struct workqueue_struct *chp_wq;
 static struct work_struct cfg_work;
 
 /* Wait queue for configure completion events. */
@@ -428,11 +426,14 @@ int chp_update_desc(struct channel_path *chp)
 	if (rc)
 		return rc;
 
-	rc = chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
-	if (rc)
-		return rc;
+	/*
+	 * Fetching the following data is optional. Not all machines or
+	 * hypervisors implement the required chsc commands.
+	 */
+	chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	chsc_get_channel_measurement_chars(chp);
 
-	return chsc_get_channel_measurement_chars(chp);
+	return 0;
 }
 
 /**
@@ -714,7 +715,7 @@ static void cfg_func(struct work_struct *work)
 		wake_up_interruptible(&cfg_wait_queue);
 		return;
 	}
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 
 /**
@@ -732,7 +733,7 @@ void chp_cfg_schedule(struct chp_id chpid, int configure)
 	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
 	cfg_busy = 1;
 	mutex_unlock(&cfg_lock);
-	queue_work(chp_wq, &cfg_work);
+	schedule_work(&cfg_work);
 }
 
 /**
@@ -766,11 +767,6 @@ static int __init chp_init(void)
 	ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
 	if (ret)
 		return ret;
-	chp_wq = create_singlethread_workqueue("cio_chp");
-	if (!chp_wq) {
-		crw_unregister_handler(CRW_RSC_CPATH);
-		return -ENOMEM;
-	}
 	INIT_WORK(&cfg_work, cfg_func);
 	init_waitqueue_head(&cfg_wait_queue);
 	if (info_update())
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index af0232290dc4..bb5a68226cda 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -4,7 +4,7 @@
  */
 
 #ifndef S390_CHP_H
-#define S390_CHP_H S390_CHP_H
+#define S390_CHP_H
 
 #include <linux/types.h>
 #include <linux/device.h>
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index c424c0c7367e..940e725bde1e 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -907,7 +907,8 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 	struct chsc_scpd *scpd_area;
 	int ccode, ret;
 
-	if ((rfmt == 1) && !css_general_characteristics.fcs)
+	if ((rfmt == 1 || rfmt == 0) && c == 1 &&
+	    !css_general_characteristics.fcs)
 		return -EINVAL;
 	if ((rfmt == 2) && !css_general_characteristics.cib)
 		return -EINVAL;
@@ -939,7 +940,6 @@ EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc);
 int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc *desc)
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	int ret;
@@ -949,8 +949,8 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid,
 	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area);
 	if (ret)
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
@@ -959,18 +959,17 @@ out:
 int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc_fmt1 *desc)
 {
-	struct chsc_response_struct *chsc_resp;
 	struct chsc_scpd *scpd_area;
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&chsc_page_lock, flags);
 	scpd_area = chsc_page;
-	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 1, 0, scpd_area);
+	ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area);
 	if (ret)
 		goto out;
-	chsc_resp = (void *)&scpd_area->response;
-	memcpy(desc, &chsc_resp->data, sizeof(*desc));
+
+	memcpy(desc, scpd_area->data, sizeof(*desc));
 out:
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
@@ -1020,7 +1019,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 	chp->cmg = -1;
 
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
-		return 0;
+		return -EINVAL;
 
 	spin_lock_irq(&chsc_page_lock);
 	memset(chsc_page, 0, PAGE_SIZE);
@@ -1176,7 +1175,7 @@ exit:
 EXPORT_SYMBOL_GPL(css_general_characteristics);
 EXPORT_SYMBOL_GPL(css_chsc_characteristics);
 
-int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta)
 {
 	struct {
 		struct chsc_header request;
@@ -1186,7 +1185,9 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 		unsigned int ctrl : 16;
 		unsigned int rsvd2[5];
 		struct chsc_header response;
-		unsigned int rsvd3[7];
+		unsigned int rsvd3[3];
+		u64 clock_delta;
+		unsigned int rsvd4[2];
 	} __attribute__ ((packed)) *rr;
 	int rc;
 
@@ -1200,6 +1201,8 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl)
 	if (rc)
 		return -EIO;
 	rc = (rr->response.code == 0x0001) ? 0 : -EIO;
+	if (clock_delta)
+		*clock_delta = rr->clock_delta;
 	return rc;
 }
 
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 0de134c3a204..67c87b6e63ec 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -112,8 +112,9 @@ struct chsc_scpd {
 	u32 last_chpid:8;
 	u32 zeroes1;
 	struct chsc_header response;
-	u8 data[PAGE_SIZE - 20];
-} __attribute__ ((packed));
+	u32:32;
+	u8 data[0];
+} __packed;
 
 struct chsc_sda_area {
 	struct chsc_header request;
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index b6f12c2bb114..735052ecd3e5 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -552,7 +552,7 @@ static int chsc_ioctl_info_cu(void __user *user_cd)
 		goto out_free;
 	}
 	scucd_area->request.length = 0x0010;
-	scucd_area->request.code = 0x0028;
+	scucd_area->request.code = 0x0026;
 	scucd_area->m = cd->m;
 	scucd_area->fmt1 = cd->fmt;
 	scucd_area->cssid = cd->cssid;
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index b2afad5a5682..268aa23afa01 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -164,6 +164,9 @@ static inline u64 time_to_avg_nsec(u32 value, u32 count)
 	return ret;
 }
 
+#define CMF_OFF 0
+#define CMF_ON	2
+
 /*
  * Activate or deactivate the channel monitor. When area is NULL,
  * the monitor is deactivated. The channel monitor needs to
@@ -176,7 +179,7 @@ static inline void cmf_activate(void *area, unsigned int onoff)
 	register long __gpr1 asm("1");
 
 	__gpr2 = area;
-	__gpr1 = onoff ? 2 : 0;
+	__gpr1 = onoff;
 	/* activate channel measurement */
 	asm("schm" : : "d" (__gpr2), "d" (__gpr1) );
 }
@@ -587,7 +590,7 @@ static int alloc_cmb(struct ccw_device *cdev)
 			/* everything ok */
 			memset(mem, 0, size);
 			cmb_area.mem = mem;
-			cmf_activate(cmb_area.mem, 1);
+			cmf_activate(cmb_area.mem, CMF_ON);
 		}
 	}
 
@@ -621,7 +624,7 @@ static void free_cmb(struct ccw_device *cdev)
 	if (list_empty(&cmb_area.list)) {
 		ssize_t size;
 		size = sizeof(struct cmb) * cmb_area.num_channels;
-		cmf_activate(NULL, 0);
+		cmf_activate(NULL, CMF_OFF);
 		free_pages((unsigned long)cmb_area.mem, get_order(size));
 		cmb_area.mem = NULL;
 	}
@@ -753,6 +756,17 @@ static void reset_cmb(struct ccw_device *cdev)
 	cmf_generic_reset(cdev);
 }
 
+static int cmf_enabled(struct ccw_device *cdev)
+{
+	int enabled;
+
+	spin_lock_irq(cdev->ccwlock);
+	enabled = !!cdev->private->cmb;
+	spin_unlock_irq(cdev->ccwlock);
+
+	return enabled;
+}
+
 static struct attribute_group cmf_attr_group;
 
 static struct cmb_operations cmbops_basic = {
@@ -830,7 +844,7 @@ static int alloc_cmbe(struct ccw_device *cdev)
 
 	/* activate global measurement if this is the first channel */
 	if (list_empty(&cmb_area.list))
-		cmf_activate(NULL, 1);
+		cmf_activate(NULL, CMF_ON);
 	list_add_tail(&cdev->private->cmb_list, &cmb_area.list);
 
 	spin_unlock_irq(cdev->ccwlock);
@@ -867,7 +881,7 @@ static void free_cmbe(struct ccw_device *cdev)
 	/* deactivate global measurement if this is the last channel */
 	list_del_init(&cdev->private->cmb_list);
 	if (list_empty(&cmb_area.list))
-		cmf_activate(NULL, 0);
+		cmf_activate(NULL, CMF_OFF);
 	spin_unlock_irq(cdev->ccwlock);
 	spin_unlock(&cmb_area.lock);
 }
@@ -1153,13 +1167,8 @@ static ssize_t cmb_enable_show(struct device *dev,
 			       char *buf)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
-	int enabled;
 
-	spin_lock_irq(cdev->ccwlock);
-	enabled = !!cdev->private->cmb;
-	spin_unlock_irq(cdev->ccwlock);
-
-	return sprintf(buf, "%d\n", enabled);
+	return sprintf(buf, "%d\n", cmf_enabled(cdev));
 }
 
 static ssize_t cmb_enable_store(struct device *dev,
@@ -1199,15 +1208,20 @@ int ccw_set_cmf(struct ccw_device *cdev, int enable)
  *  @cdev:	The ccw device to be enabled
  *
  *  Returns %0 for success or a negative error value.
- *
+ *  Note: If this is called on a device for which channel measurement is already
+ *	  enabled a reset of the measurement data is triggered.
  *  Context:
  *    non-atomic
  */
 int enable_cmf(struct ccw_device *cdev)
 {
-	int ret;
+	int ret = 0;
 
 	device_lock(&cdev->dev);
+	if (cmf_enabled(cdev)) {
+		cmbops->reset(cdev);
+		goto out_unlock;
+	}
 	get_device(&cdev->dev);
 	ret = cmbops->alloc(cdev);
 	if (ret)
@@ -1226,7 +1240,7 @@ int enable_cmf(struct ccw_device *cdev)
 out:
 	if (ret)
 		put_device(&cdev->dev);
-
+out_unlock:
 	device_unlock(&cdev->dev);
 	return ret;
 }
@@ -1321,7 +1335,7 @@ void cmf_reactivate(void)
 {
 	spin_lock(&cmb_area.lock);
 	if (!list_empty(&cmb_area.list))
-		cmf_activate(cmb_area.mem, 1);
+		cmf_activate(cmb_area.mem, CMF_ON);
 	spin_unlock(&cmb_area.lock);
 }
 
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index a69f702a2fcc..877d9f601e63 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -97,7 +97,7 @@ void ccw_device_clear_options(struct ccw_device *cdev, unsigned long flags)
 }
 
 /**
- * ccw_device_is_pathgroup - determine if paths to this device are grouped
+ * ccw_device_is_pathgroup() - determine if paths to this device are grouped
  * @cdev: ccw device
  *
  * Return non-zero if there is a path group, zero otherwise.
@@ -109,7 +109,7 @@ int ccw_device_is_pathgroup(struct ccw_device *cdev)
 EXPORT_SYMBOL(ccw_device_is_pathgroup);
 
 /**
- * ccw_device_is_multipath - determine if device is operating in multipath mode
+ * ccw_device_is_multipath() - determine if device is operating in multipath mode
  * @cdev: ccw device
  *
  * Return non-zero if device is operating in multipath mode, zero otherwise.
@@ -457,7 +457,7 @@ __u8 ccw_device_get_path_mask(struct ccw_device *cdev)
 }
 
 /**
- * chp_get_chp_desc - return newly allocated channel-path descriptor
+ * ccw_device_get_chp_desc() - return newly allocated channel-path descriptor
  * @cdev: device to obtain the descriptor for
  * @chp_idx: index of the channel path
  *
@@ -477,7 +477,7 @@ struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *cdev,
 }
 
 /**
- * ccw_device_get_id - obtain a ccw device id
+ * ccw_device_get_id() - obtain a ccw device id
  * @cdev: device to obtain the id for
  * @dev_id: where to fill in the values
  */
@@ -488,7 +488,7 @@ void ccw_device_get_id(struct ccw_device *cdev, struct ccw_dev_id *dev_id)
 EXPORT_SYMBOL(ccw_device_get_id);
 
 /**
- * ccw_device_tm_start_key - perform start function
+ * ccw_device_tm_start_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -533,7 +533,7 @@ int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_key);
 
 /**
- * ccw_device_tm_start_timeout_key - perform start function
+ * ccw_device_tm_start_timeout_key() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -559,7 +559,7 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_timeout_key);
 
 /**
- * ccw_device_tm_start - perform start function
+ * ccw_device_tm_start() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -577,7 +577,7 @@ int ccw_device_tm_start(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start);
 
 /**
- * ccw_device_tm_start_timeout - perform start function
+ * ccw_device_tm_start_timeout() - perform start function
  * @cdev: ccw device on which to perform the start function
  * @tcw: transport-command word to be started
  * @intparm: user defined parameter to be passed to the interrupt handler
@@ -596,7 +596,7 @@ int ccw_device_tm_start_timeout(struct ccw_device *cdev, struct tcw *tcw,
 EXPORT_SYMBOL(ccw_device_tm_start_timeout);
 
 /**
- * ccw_device_get_mdc - accumulate max data count
+ * ccw_device_get_mdc() - accumulate max data count
  * @cdev: ccw device for which the max data count is accumulated
  * @mask: mask of paths to use
  *
@@ -642,7 +642,7 @@ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask)
 EXPORT_SYMBOL(ccw_device_get_mdc);
 
 /**
- * ccw_device_tm_intrg - perform interrogate function
+ * ccw_device_tm_intrg() - perform interrogate function
  * @cdev: ccw device on which to perform the interrogate function
  *
  * Perform an interrogate function on the given ccw device. Return zero on
@@ -664,7 +664,7 @@ int ccw_device_tm_intrg(struct ccw_device *cdev)
 EXPORT_SYMBOL(ccw_device_tm_intrg);
 
 /**
- * ccw_device_get_schid - obtain a subchannel id
+ * ccw_device_get_schid() - obtain a subchannel id
  * @cdev: device to obtain the id for
  * @schid: where to fill in the values
  */
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
index 22b58104683b..89a787790888 100644
--- a/drivers/s390/cio/idset.h
+++ b/drivers/s390/cio/idset.h
@@ -4,7 +4,7 @@
  */
 
 #ifndef S390_IDSET_H
-#define S390_IDSET_H S390_IDSET_H
+#define S390_IDSET_H
 
 #include <asm/schid.h>
 
diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
index 98984818618f..8225da619014 100644
--- a/drivers/s390/cio/ioasm.c
+++ b/drivers/s390/cio/ioasm.c
@@ -12,7 +12,7 @@
 #include "orb.h"
 #include "cio.h"
 
-int stsch(struct subchannel_id schid, struct schib *addr)
+static inline int __stsch(struct subchannel_id schid, struct schib *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode = -EIO;
@@ -26,13 +26,21 @@ int stsch(struct subchannel_id schid, struct schib *addr)
 		: "+d" (ccode), "=m" (*addr)
 		: "d" (reg1), "a" (addr)
 		: "cc");
+	return ccode;
+}
+
+int stsch(struct subchannel_id schid, struct schib *addr)
+{
+	int ccode;
+
+	ccode = __stsch(schid, addr);
 	trace_s390_cio_stsch(schid, addr, ccode);
 
 	return ccode;
 }
 EXPORT_SYMBOL(stsch);
 
-int msch(struct subchannel_id schid, struct schib *addr)
+static inline int __msch(struct subchannel_id schid, struct schib *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode = -EIO;
@@ -46,12 +54,20 @@ int msch(struct subchannel_id schid, struct schib *addr)
 		: "+d" (ccode)
 		: "d" (reg1), "a" (addr), "m" (*addr)
 		: "cc");
+	return ccode;
+}
+
+int msch(struct subchannel_id schid, struct schib *addr)
+{
+	int ccode;
+
+	ccode = __msch(schid, addr);
 	trace_s390_cio_msch(schid, addr, ccode);
 
 	return ccode;
 }
 
-int tsch(struct subchannel_id schid, struct irb *addr)
+static inline int __tsch(struct subchannel_id schid, struct irb *addr)
 {
 	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
@@ -63,12 +79,20 @@ int tsch(struct subchannel_id schid, struct irb *addr)
 		: "=d" (ccode), "=m" (*addr)
 		: "d" (reg1), "a" (addr)
 		: "cc");
+	return ccode;
+}
+
+int tsch(struct subchannel_id schid, struct irb *addr)
+{
+	int ccode;
+
+	ccode = __tsch(schid, addr);
 	trace_s390_cio_tsch(schid, addr, ccode);
 
 	return ccode;
 }
 
-int ssch(struct subchannel_id schid, union orb *addr)
+static inline int __ssch(struct subchannel_id schid, union orb *addr)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode = -EIO;
@@ -82,13 +106,21 @@ int ssch(struct subchannel_id schid, union orb *addr)
 		: "+d" (ccode)
 		: "d" (reg1), "a" (addr), "m" (*addr)
 		: "cc", "memory");
+	return ccode;
+}
+
+int ssch(struct subchannel_id schid, union orb *addr)
+{
+	int ccode;
+
+	ccode = __ssch(schid, addr);
 	trace_s390_cio_ssch(schid, addr, ccode);
 
 	return ccode;
 }
 EXPORT_SYMBOL(ssch);
 
-int csch(struct subchannel_id schid)
+static inline int __csch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -100,6 +132,14 @@ int csch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int csch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __csch(schid);
 	trace_s390_cio_csch(schid, ccode);
 
 	return ccode;
@@ -140,7 +180,7 @@ int chsc(void *chsc_area)
 }
 EXPORT_SYMBOL(chsc);
 
-int rchp(struct chp_id chpid)
+static inline int __rchp(struct chp_id chpid)
 {
 	register struct chp_id reg1 asm ("1") = chpid;
 	int ccode;
@@ -151,12 +191,20 @@ int rchp(struct chp_id chpid)
 		"	ipm	%0\n"
 		"	srl	%0,28"
 		: "=d" (ccode) : "d" (reg1) : "cc");
+	return ccode;
+}
+
+int rchp(struct chp_id chpid)
+{
+	int ccode;
+
+	ccode = __rchp(chpid);
 	trace_s390_cio_rchp(chpid, ccode);
 
 	return ccode;
 }
 
-int rsch(struct subchannel_id schid)
+static inline int __rsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -168,12 +216,21 @@ int rsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc", "memory");
+
+	return ccode;
+}
+
+int rsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __rsch(schid);
 	trace_s390_cio_rsch(schid, ccode);
 
 	return ccode;
 }
 
-int hsch(struct subchannel_id schid)
+static inline int __hsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -185,12 +242,20 @@ int hsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int hsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __hsch(schid);
 	trace_s390_cio_hsch(schid, ccode);
 
 	return ccode;
 }
 
-int xsch(struct subchannel_id schid)
+static inline int __xsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
 	int ccode;
@@ -202,6 +267,14 @@ int xsch(struct subchannel_id schid)
 		: "=d" (ccode)
 		: "d" (reg1)
 		: "cc");
+	return ccode;
+}
+
+int xsch(struct subchannel_id schid)
+{
+	int ccode;
+
+	ccode = __xsch(schid);
 	trace_s390_cio_xsch(schid, ccode);
 
 	return ccode;
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 327255da115a..4feb27215ab6 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -169,6 +169,19 @@ static int ap_configuration_available(void)
 	return test_facility(12);
 }
 
+static inline struct ap_queue_status
+__pqap_tapq(ap_qid_t qid, unsigned long *info)
+{
+	register unsigned long reg0 asm ("0") = qid;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = 0UL;
+
+	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
+		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+	*info = reg2;
+	return reg1;
+}
+
 /**
  * ap_test_queue(): Test adjunct processor queue.
  * @qid: The AP queue number
@@ -179,17 +192,15 @@ static int ap_configuration_available(void)
 static inline struct ap_queue_status
 ap_test_queue(ap_qid_t qid, unsigned long *info)
 {
-	register unsigned long reg0 asm ("0") = qid;
-	register struct ap_queue_status reg1 asm ("1");
-	register unsigned long reg2 asm ("2") = 0UL;
+	struct ap_queue_status aqs;
+	unsigned long _info;
 
 	if (test_facility(15))
-		reg0 |= 1UL << 23;		/* set APFT T bit*/
-	asm volatile(".long 0xb2af0000"		/* PQAP(TAPQ) */
-		     : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc");
+		qid |= 1UL << 23;		/* set APFT T bit*/
+	aqs = __pqap_tapq(qid, &_info);
 	if (info)
-		*info = reg2;
-	return reg1;
+		*info = _info;
+	return aqs;
 }
 
 /**
@@ -237,14 +248,12 @@ ap_queue_interruption_control(ap_qid_t qid, void *ind)
  *
  * Returns 0 on success, or -EOPNOTSUPP.
  */
-static inline int ap_query_configuration(void)
+static inline int __ap_query_configuration(void)
 {
 	register unsigned long reg0 asm ("0") = 0x04000000UL;
 	register unsigned long reg1 asm ("1") = -EINVAL;
 	register void *reg2 asm ("2") = (void *) ap_configuration;
 
-	if (!ap_configuration)
-		return -EOPNOTSUPP;
 	asm volatile(
 		".long 0xb2af0000\n"		/* PQAP(QCI) */
 		"0: la    %1,0\n"
@@ -257,6 +266,13 @@ static inline int ap_query_configuration(void)
 	return reg1;
 }
 
+static inline int ap_query_configuration(void)
+{
+	if (!ap_configuration)
+		return -EOPNOTSUPP;
+	return __ap_query_configuration();
+}
+
 /**
  * ap_init_configuration(): Allocate and query configuration array.
  */
@@ -346,6 +362,26 @@ static int ap_queue_enable_interruption(struct ap_device *ap_dev, void *ind)
 	}
 }
 
+static inline struct ap_queue_status
+__nqap(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
+{
+	typedef struct { char _[length]; } msgblock;
+	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
+	register struct ap_queue_status reg1 asm ("1");
+	register unsigned long reg2 asm ("2") = (unsigned long) msg;
+	register unsigned long reg3 asm ("3") = (unsigned long) length;
+	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
+	register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
+
+	asm volatile (
+		"0: .long 0xb2ad0042\n"		/* NQAP */
+		"   brc   2,0b"
+		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
+		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
+		: "cc");
+	return reg1;
+}
+
 /**
  * __ap_send(): Send message to adjunct processor queue.
  * @qid: The AP queue number
@@ -363,24 +399,9 @@ static inline struct ap_queue_status
 __ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length,
 	  unsigned int special)
 {
-	typedef struct { char _[length]; } msgblock;
-	register unsigned long reg0 asm ("0") = qid | 0x40000000UL;
-	register struct ap_queue_status reg1 asm ("1");
-	register unsigned long reg2 asm ("2") = (unsigned long) msg;
-	register unsigned long reg3 asm ("3") = (unsigned long) length;
-	register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32);
-	register unsigned long reg5 asm ("5") = psmid & 0xffffffff;
-
 	if (special == 1)
-		reg0 |= 0x400000UL;
-
-	asm volatile (
-		"0: .long 0xb2ad0042\n"		/* NQAP */
-		"   brc   2,0b"
-		: "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3)
-		: "d" (reg4), "d" (reg5), "m" (*(msgblock *) msg)
-		: "cc" );
-	return reg1;
+		qid |= 0x400000UL;
+	return __nqap(qid, psmid, msg, length);
 }
 
 int ap_send(ap_qid_t qid, unsigned long long psmid, void *msg, size_t length)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 081d0f258d4c..54643d1f5af4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -250,6 +250,14 @@
 	VMLINUX_SYMBOL(__end_init_task) = .;
 
 /*
+ * Allow architectures to handle ro_after_init data on their
+ * own by defining an empty RO_AFTER_INIT_DATA.
+ */
+#ifndef RO_AFTER_INIT_DATA
+#define RO_AFTER_INIT_DATA *(.data..ro_after_init)
+#endif
+
+/*
  * Read only Data
  */
 #define RO_DATA_SECTION(align)						\
@@ -257,7 +265,7 @@
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
-		*(.data..ro_after_init)	/* Read only after init */	\
+		RO_AFTER_INIT_DATA	/* Read only after init */	\
 		*(__vermagic)		/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e9b0b9ab07e5..27bfc0b631a9 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -267,6 +267,10 @@ static inline int bitmap_equal(const unsigned long *src1,
 {
 	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+#ifdef CONFIG_S390
+	else if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
+		return !memcmp(src1, src2, nbits / 8);
+#endif
 	else
 		return __bitmap_equal(src1, src2, nbits);
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index addfe4accc07..cc2a99e9cbc8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1022,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
 		nr_nodes--)
 
-#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA))
+#if (defined(CONFIG_X86_64) || defined(CONFIG_S390)) && \
+	((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \
+	defined(CONFIG_CMA))
 static void destroy_compound_gigantic_page(struct page *page,
 					unsigned int order)
 {
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 7eaa000c9258..88a2a3ba4212 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -320,21 +320,29 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
  *
  * Returns the result of the CP IUCV call.
  */
-static inline int iucv_call_b2f0(int command, union iucv_param *parm)
+static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
 {
 	register unsigned long reg0 asm ("0");
 	register unsigned long reg1 asm ("1");
 	int ccode;
 
 	reg0 = command;
-	reg1 = virt_to_phys(parm);
+	reg1 = (unsigned long)parm;
 	asm volatile(
 		"	.long 0xb2f01000\n"
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
 		:  "m" (*parm) : "cc");
-	return (ccode == 1) ? parm->ctrl.iprcode : ccode;
+	return ccode;
+}
+
+static inline int iucv_call_b2f0(int command, union iucv_param *parm)
+{
+	int ccode;
+
+	ccode = __iucv_call_b2f0(command, parm);
+	return ccode == 1 ? parm->ctrl.iprcode : ccode;
 }
 
 /**
@@ -345,16 +353,12 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
  * Returns the maximum number of connections or -EPERM is IUCV is not
  * available.
  */
-static int iucv_query_maxconn(void)
+static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
 {
 	register unsigned long reg0 asm ("0");
 	register unsigned long reg1 asm ("1");
-	void *param;
 	int ccode;
 
-	param = kzalloc(sizeof(union iucv_param), GFP_KERNEL|GFP_DMA);
-	if (!param)
-		return -ENOMEM;
 	reg0 = IUCV_QUERY;
 	reg1 = (unsigned long) param;
 	asm volatile (
@@ -362,8 +366,22 @@ static int iucv_query_maxconn(void)
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+	*max_pathid = reg1;
+	return ccode;
+}
+
+static int iucv_query_maxconn(void)
+{
+	unsigned long max_pathid;
+	void *param;
+	int ccode;
+
+	param = kzalloc(sizeof(union iucv_param), GFP_KERNEL | GFP_DMA);
+	if (!param)
+		return -ENOMEM;
+	ccode = __iucv_query_maxconn(param, &max_pathid);
 	if (ccode == 0)
-		iucv_max_pathid = reg1;
+		iucv_max_pathid = max_pathid;
 	kfree(param);
 	return ccode ? -EPERM : 0;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-26 21:22:51 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-07-26 21:22:51 +0200
commit	015cd867e566e3a27b5e8062eb24eeaa4d77297f (patch)
tree	d96c90119b3c454b5c5cfb07f2409a87bbd29754
parent	Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff)
parent	s390/pci: Delete an unnecessary check before the function call "pci_dev_put" (diff)
download	linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.tar.xz linux-015cd867e566e3a27b5e8062eb24eeaa4d77297f.zip