789 files changed, 17744 insertions, 14464 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index e8c956098424..572b228c44c7 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -17,11 +17,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		((v)->counter = (i))
-#define atomic64_set(v,i)	((v)->counter = (i))
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
+#define atomic64_set(v,i)	WRITE_ONCE((v)->counter, (i))
 
 /*
  * To get proper branch prediction for the main line, we must branch
diff --git a/arch/alpha/include/asm/word-at-a-time.h b/arch/alpha/include/asm/word-at-a-time.h
index 6b340d0f1521..902e6ab00a06 100644
--- a/arch/alpha/include/asm/word-at-a-time.h
+++ b/arch/alpha/include/asm/word-at-a-time.h
@@ -52,4 +52,6 @@ static inline unsigned long find_zero(unsigned long bits)
 #endif
 }
 
+#define zero_bytemask(mask) ((2ul << (find_zero(mask) * 8)) - 1)
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 0086b472bc2b..f2f949671798 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -37,6 +37,9 @@
 
 #define MCL_CURRENT	 8192		/* lock all currently mapped pages */
 #define MCL_FUTURE	16384		/* lock all additions to address space */
+#define MCL_ONFAULT	32768		/* lock all pages that are faulted in */
+
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
 
 #define MADV_NORMAL	0		/* no further special treatment */
 #define MADV_RANDOM	1		/* expect random page references */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 78c0621d5819..2c2ac3f3ff80 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -76,6 +76,10 @@ config STACKTRACE_SUPPORT
 config HAVE_LATENCYTOP_SUPPORT
 	def_bool y
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	def_bool y
+	depends on ARC_MMU_V4
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
@@ -190,6 +194,16 @@ config NR_CPUS
 	range 2 4096
 	default "4"
 
+config ARC_SMP_HALT_ON_RESET
+	bool "Enable Halt-on-reset boot mode"
+	default y if ARC_UBOOT_SUPPORT
+	help
+	  In SMP configuration cores can be configured as Halt-on-reset
+	  or they could all start at same time. For Halt-on-reset, non
+	  masters are parked until Master kicks them so they can start of
+	  at designated entry point. For other case, all jump to common
+	  entry point and spin wait for Master's signal.
+
 endif	#SMP
 
 menuconfig ARC_CACHE
@@ -278,6 +292,8 @@ choice
 	default ARC_MMU_V2 if ARC_CPU_750D
 	default ARC_MMU_V4 if ARC_CPU_HS
 
+if ISA_ARCOMPACT
+
 config ARC_MMU_V1
 	bool "MMU v1"
 	help
@@ -297,6 +313,8 @@ config ARC_MMU_V3
 	  Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
 	  Shared Address Spaces (SASID)
 
+endif
+
 config ARC_MMU_V4
 	bool "MMU v4"
 	depends on ISA_ARCV2
@@ -428,6 +446,28 @@ config LINUX_LINK_BASE
 	  Linux needs to be scooted a bit.
 	  If you don't know what the above means, leave this setting alone.
 
+config HIGHMEM
+	bool "High Memory Support"
+	help
+	  With ARC 2G:2G address split, only upper 2G is directly addressable by
+	  kernel. Enable this to potentially allow access to rest of 2G and PAE
+	  in future
+
+config ARC_HAS_PAE40
+	bool "Support for the 40-bit Physical Address Extension"
+	default n
+	depends on ISA_ARCV2
+	select HIGHMEM
+	help
+	  Enable access to physical memory beyond 4G, only supported on
+	  ARC cores with 40 bit Physical Addressing support
+
+config ARCH_PHYS_ADDR_T_64BIT
+	def_bool ARC_HAS_PAE40
+
+config ARCH_DMA_ADDR_T_64BIT
+	bool
+
 config ARC_CURR_IN_REG
 	bool "Dedicate Register r25 for current_task pointer"
 	default y
diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi
index a5e2726a067e..420dcfde289f 100644
--- a/arch/arc/boot/dts/axc001.dtsi
+++ b/arch/arc/boot/dts/axc001.dtsi
@@ -95,6 +95,6 @@
 		#size-cells = <1>;
 		ranges = <0x00000000 0x80000000 0x40000000>;
 		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	/* 512MiB */
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
 	};
 };
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 846481f37eef..f90fadf7f94e 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -98,6 +98,6 @@
 		#size-cells = <1>;
 		ranges = <0x00000000 0x80000000 0x40000000>;
 		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	/* 512MiB */
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
 	};
 };
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 2f0b33257db2..06a9f294a2e6 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -121,6 +121,6 @@
 		#size-cells = <1>;
 		ranges = <0x00000000 0x80000000 0x40000000>;
 		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	/* 512MiB */
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
 	};
 };
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index 911f069e0540..b0eb0e7fe21d 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -11,8 +11,16 @@
 
 / {
 	compatible = "snps,nsim_hs";
+	#address-cells = <2>;
+	#size-cells = <2>;
 	interrupt-parent = <&core_intc>;
 
+	memory {
+		device_type = "memory";
+		reg = <0x0 0x80000000 0x0 0x40000000	/* 1 GB low mem */
+		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
+	};
+
 	chosen {
 		bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8";
 	};
@@ -26,8 +34,8 @@
 		#address-cells = <1>;
 		#size-cells = <1>;
 
-		/* child and parent address space 1:1 mapped */
-		ranges;
+		/* only perip space at end of low mem accessible */
+		ranges = <0x80000000 0x0 0x80000000 0x80000000>;
 
 		core_intc: core-interrupt-controller {
 			compatible = "snps,archs-intc";
diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi
index a870bdd5e404..296d371a335c 100644
--- a/arch/arc/boot/dts/skeleton.dtsi
+++ b/arch/arc/boot/dts/skeleton.dtsi
@@ -32,6 +32,6 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x00000000 0x10000000>;	/* 256M */
+		reg = <0x80000000 0x10000000>;	/* 256M */
 	};
 };
diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi
index 9393fd902f0d..84226bd48baf 100644
--- a/arch/arc/boot/dts/vdk_axc003.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003.dtsi
@@ -56,6 +56,6 @@
 		#size-cells = <1>;
 		ranges = <0x00000000 0x80000000 0x40000000>;
 		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	/* 512MiB */
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
 	};
 };
diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
index 9bee8ed09eb0..31f0fb5fc91d 100644
--- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi
+++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi
@@ -71,6 +71,6 @@
 		#size-cells = <1>;
 		ranges = <0x00000000 0x80000000 0x40000000>;
 		device_type = "memory";
-		reg = <0x00000000 0x20000000>;	/* 512MiB */
+		reg = <0x80000000 0x20000000>;	/* 512MiB */
 	};
 };
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 562dac6a7f78..c92c0ef1e9d2 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -89,7 +89,6 @@ CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 83a6d8d5cc58..cfac24e0e7b6 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -95,7 +95,6 @@ CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index f1e1c84e0dda..9922a118a15a 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -96,7 +96,6 @@ CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS=y
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 7611b10a2d23..0b10ef2a4372 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -48,4 +48,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index d8023bc8d1ad..7fac7d85ed6a 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -120,7 +120,7 @@
 
 /* gcc builtin sr needs reg param to be long immediate */
 #define write_aux_reg(reg_immed, val)		\
-		__builtin_arc_sr((unsigned int)val, reg_immed)
+		__builtin_arc_sr((unsigned int)(val), reg_immed)
 
 #else
 
@@ -327,8 +327,8 @@ struct bcr_generic {
  */
 
 struct cpuinfo_arc_mmu {
-	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6;
-	unsigned int num_tlb:16, sets:12, ways:4;
+	unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
+	unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
 };
 
 struct cpuinfo_arc_cache {
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index c3ecda023e3a..7730d302cadb 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -17,11 +17,11 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define atomic_read(v)  ((v)->counter)
+#define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_ARC_STAR_9000923308
 
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 #ifndef CONFIG_SMP
 
  /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #else
 
@@ -125,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i)
 	unsigned long flags;
 
 	atomic_ops_lock(flags);
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 	atomic_ops_unlock(flags);
 }
 
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index e23ea6e7633a..abf06e81c929 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -65,6 +65,7 @@ extern int ioc_exists;
 #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
 #define ARC_REG_IC_PTAG		0x1E
 #endif
+#define ARC_REG_IC_PTAG_HI	0x1F
 
 /* Bit val in IC_CTRL */
 #define IC_CTRL_CACHE_DISABLE   0x1
@@ -77,6 +78,7 @@ extern int ioc_exists;
 #define ARC_REG_DC_FLSH		0x4B
 #define ARC_REG_DC_FLDL		0x4C
 #define ARC_REG_DC_PTAG		0x5C
+#define ARC_REG_DC_PTAG_HI	0x5F
 
 /* Bit val in DC_CTRL */
 #define DC_CTRL_INV_MODE_FLUSH  0x40
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index 0992d3dbcc65..fbe3587c4f36 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -31,10 +31,10 @@
 
 void flush_cache_all(void);
 
-void flush_icache_range(unsigned long start, unsigned long end);
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len);
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr);
-void __flush_dcache_page(unsigned long paddr, unsigned long vaddr);
+void flush_icache_range(unsigned long kstart, unsigned long kend);
+void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len);
+void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr);
+void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 415443c2a8c4..1aff3be91075 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -110,13 +110,12 @@
 
 .macro FAKE_RET_FROM_EXCPN
 
-	ld  r9, [sp, PT_status32]
-	bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK)
-	bset  r9, r9, STATUS_L_BIT
-	sr  r9, [erstatus]
-	mov r9, 55f
-	sr  r9, [eret]
-
+	lr	r9, [status32]
+	bclr	r9, r9, STATUS_AE_BIT
+	or	r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK)
+	sr	r9, [erstatus]
+	mov	r9, 55f
+	sr	r9, [eret]
 	rtie
 55:
 .endm
diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h
new file mode 100644
index 000000000000..b1585c96324a
--- /dev/null
+++ b/arch/arc/include/asm/highmem.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef CONFIG_HIGHMEM
+
+#include <uapi/asm/page.h>
+#include <asm/kmap_types.h>
+
+/* start after vmalloc area */
+#define FIXMAP_BASE		(PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE)
+#define FIXMAP_SIZE		PGDIR_SIZE	/* only 1 PGD worth */
+#define KM_TYPE_NR		((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS)
+#define FIXMAP_ADDR(nr)		(FIXMAP_BASE + ((nr) << PAGE_SHIFT))
+
+/* start after fixmap area */
+#define PKMAP_BASE		(FIXMAP_BASE + FIXMAP_SIZE)
+#define PKMAP_SIZE		PGDIR_SIZE
+#define LAST_PKMAP		(PKMAP_SIZE >> PAGE_SHIFT)
+#define LAST_PKMAP_MASK		(LAST_PKMAP - 1)
+#define PKMAP_ADDR(nr)		(PKMAP_BASE + ((nr) << PAGE_SHIFT))
+#define PKMAP_NR(virt)		(((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+
+#define kmap_prot		PAGE_KERNEL
+
+
+#include <asm/cacheflush.h>
+
+extern void *kmap(struct page *page);
+extern void *kmap_high(struct page *page);
+extern void *kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void kunmap_high(struct page *page);
+
+extern void kmap_init(void);
+
+static inline void flush_cache_kmaps(void)
+{
+	flush_cache_all();
+}
+
+static inline void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+
+
+#endif
+
+#endif
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
new file mode 100644
index 000000000000..c5094de86403
--- /dev/null
+++ b/arch/arc/include/asm/hugepage.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#ifndef _ASM_ARC_HUGEPAGE_H
+#define _ASM_ARC_HUGEPAGE_H
+
+#include <linux/types.h>
+#include <asm-generic/pgtable-nopmd.h>
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkhuge(pmd)		pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd)	pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
+
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_special(pmd)	pte_special(pmd_pte(pmd))
+
+#define mk_pmd(page, prot)	pte_pmd(mk_pte(page, prot))
+
+#define pmd_trans_huge(pmd)	(pmd_val(pmd) & _PAGE_HW_SZ)
+#define pmd_trans_splitting(pmd)	(pmd_trans_huge(pmd) && pmd_special(pmd))
+
+#define pfn_pmd(pfn, prot)	(__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+        /*
+         * open-coded pte_modify() with additional retaining of HW_SZ bit
+         * so that pmd_trans_huge() remains true for this PMD
+         */
+        return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot));
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+
+#define has_transparent_hugepage() 1
+
+/* Generic variants assume pgtable_t is struct page *, hence need for these */
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				       pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end);
+
+#endif
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index bc5103637326..4fd7d62a6e30 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -16,6 +16,7 @@
 #ifdef CONFIG_ISA_ARCOMPACT
 #define TIMER0_IRQ      3
 #define TIMER1_IRQ      4
+#define IPI_IRQ		(NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
 #else
 #define TIMER0_IRQ      16
 #define TIMER1_IRQ      17
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index aa805575c320..d8c608174617 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -23,11 +23,13 @@
 #define STATUS_E2_BIT		2	/* Int 2 enable */
 #define STATUS_A1_BIT		3	/* Int 1 active */
 #define STATUS_A2_BIT		4	/* Int 2 active */
+#define STATUS_AE_BIT		5	/* Exception active */
 
 #define STATUS_E1_MASK		(1<<STATUS_E1_BIT)
 #define STATUS_E2_MASK		(1<<STATUS_E2_BIT)
 #define STATUS_A1_MASK		(1<<STATUS_A1_BIT)
 #define STATUS_A2_MASK		(1<<STATUS_A2_BIT)
+#define STATUS_AE_MASK		(1<<STATUS_AE_BIT)
 #define STATUS_IE_MASK		(STATUS_E1_MASK | STATUS_E2_MASK)
 
 /* Other Interrupt Handling related Aux regs */
@@ -91,7 +93,19 @@ static inline void arch_local_irq_restore(unsigned long flags)
 /*
  * Unconditionally Enable IRQs
  */
-extern void arch_local_irq_enable(void);
+static inline void arch_local_irq_enable(void)
+{
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"	lr   %0, [status32]	\n"
+	"	or   %0, %0, %1		\n"
+	"	flag %0			\n"
+	: "=&r"(temp)
+	: "n"((STATUS_E1_MASK | STATUS_E2_MASK))
+	: "cc", "memory");
+}
+
 
 /*
  * Unconditionally Disable IRQs
diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h
new file mode 100644
index 000000000000..f0d7f6acea4e
--- /dev/null
+++ b/arch/arc/include/asm/kmap_types.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+/*
+ * We primarily need to define KM_TYPE_NR here but that in turn
+ * is a function of PGDIR_SIZE etc.
+ * To avoid circular deps issue, put everything in asm/highmem.h
+ */
+#endif
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index e8993a2be6c2..6ff657a904b6 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -23,11 +23,8 @@
  * @dt_compat:		Array of device tree 'compatible' strings
  * 			(XXX: although only 1st entry is looked at)
  * @init_early:		Very early callback [called from setup_arch()]
- * @init_irq:		setup external IRQ controllers [called from init_IRQ()]
- * @init_smp:		for each CPU (e.g. setup IPI)
+ * @init_cpu_smp:	for each CPU as it is coming up (SMP as well as UP)
  * 			[(M):init_IRQ(), (o):start_kernel_secondary()]
- * @init_time:		platform specific clocksource/clockevent registration
- * 			[called from time_init()]
  * @init_machine:	arch initcall level callback (e.g. populate static
  * 			platform devices or parse Devicetree)
  * @init_late:		Late initcall level callback
@@ -36,13 +33,10 @@
 struct machine_desc {
 	const char		*name;
 	const char		**dt_compat;
-
 	void			(*init_early)(void);
-	void			(*init_irq)(void);
 #ifdef CONFIG_SMP
-	void			(*init_smp)(unsigned int);
+	void			(*init_cpu_smp)(unsigned int);
 #endif
-	void			(*init_time)(void);
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
 
diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h
index 52c11f0bb0e5..46f4e5351b2a 100644
--- a/arch/arc/include/asm/mcip.h
+++ b/arch/arc/include/asm/mcip.h
@@ -86,9 +86,6 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param,
 	__mcip_cmd(cmd, param);
 }
 
-extern void mcip_init_early_smp(void);
-extern void mcip_init_smp(unsigned int cpu);
-
 #endif
 
 #endif
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 0f9c3eb5327e..b144d7ca7d20 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -24,6 +24,7 @@
 #if (CONFIG_ARC_MMU_VER < 4)
 #define ARC_REG_TLBPD0		0x405
 #define ARC_REG_TLBPD1		0x406
+#define ARC_REG_TLBPD1HI	0	/* Dummy: allows code sharing with ARC700 */
 #define ARC_REG_TLBINDEX	0x407
 #define ARC_REG_TLBCOMMAND	0x408
 #define ARC_REG_PID		0x409
@@ -31,6 +32,7 @@
 #else
 #define ARC_REG_TLBPD0		0x460
 #define ARC_REG_TLBPD1		0x461
+#define ARC_REG_TLBPD1HI	0x463
 #define ARC_REG_TLBINDEX	0x464
 #define ARC_REG_TLBCOMMAND	0x465
 #define ARC_REG_PID		0x468
@@ -83,6 +85,11 @@ void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 void read_decode_mmu_bcr(void);
 
+static inline int is_pae40_enabled(void)
+{
+	return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index 9c8aa41e45c2..429957f1c236 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -43,7 +43,6 @@ typedef struct {
 typedef struct {
 	unsigned long pgprot;
 } pgprot_t;
-typedef unsigned long pgtable_t;
 
 #define pte_val(x)      ((x).pte)
 #define pgd_val(x)      ((x).pgd)
@@ -57,20 +56,26 @@ typedef unsigned long pgtable_t;
 
 #else /* !STRICT_MM_TYPECHECKS */
 
+#ifdef CONFIG_ARC_HAS_PAE40
+typedef unsigned long long pte_t;
+#else
 typedef unsigned long pte_t;
+#endif
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;
-typedef unsigned long pgtable_t;
 
 #define pte_val(x)	(x)
 #define pgd_val(x)	(x)
 #define pgprot_val(x)	(x)
 #define __pte(x)	(x)
+#define __pgd(x)	(x)
 #define __pgprot(x)	(x)
 #define pte_pgprot(x)	(x)
 
 #endif
 
+typedef pte_t * pgtable_t;
+
 #define ARCH_PFN_OFFSET     (CONFIG_LINUX_LINK_BASE >> PAGE_SHIFT)
 
 #define pfn_valid(pfn)      (((pfn) - ARCH_PFN_OFFSET) < max_mapnr)
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 81208bfd9dcb..86ed671286df 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
 
 static inline int __get_order_pgd(void)
 {
-	return get_order(PTRS_PER_PGD * 4);
+	return get_order(PTRS_PER_PGD * sizeof(pgd_t));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
@@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline int __get_order_pte(void)
 {
-	return get_order(PTRS_PER_PTE * 4);
+	return get_order(PTRS_PER_PTE * sizeof(pte_t));
 }
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
@@ -107,10 +107,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
 	if (!pte_pg)
 		return 0;
-	memzero((void *)pte_pg, PTRS_PER_PTE * 4);
+	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
 	page = virt_to_page(pte_pg);
 	if (!pgtable_page_ctor(page)) {
 		__free_page(page);
@@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
 	pgtable_page_dtor(virt_to_page(ptep));
-	free_pages(ptep, __get_order_pte());
+	free_pages((unsigned long)ptep, __get_order_pte());
 }
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
 #define check_pgt_cache()   do { } while (0)
-#define pmd_pgtable(pmd) pmd_page_vaddr(pmd)
+#define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
 
 #endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1281718802f7..57af2f05ae84 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -38,6 +38,7 @@
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm-generic/pgtable-nopmd.h>
+#include <linux/const.h>
 
 /**************************************************************************
  * Page Table Flags
@@ -60,7 +61,8 @@
 #define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
 #define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_DIRTY         (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_SPECIAL       (1<<7)
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
@@ -71,7 +73,8 @@
 #define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
 #define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
 #define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
-#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_DIRTY         (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_SPECIAL       (1<<6)
 
 #if (CONFIG_ARC_MMU_VER >= 4)
 #define _PAGE_WTHRU         (1<<7)	/* Page cache mode write-thru (H) */
@@ -81,32 +84,33 @@
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
 
 #if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_SZ            (1<<10)	/* Page Size indicator (H) */
+#define _PAGE_HW_SZ         (1<<10)	/* Page Size indicator (H): 0 normal, 1 super */
 #endif
 
 #define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
+
+#define _PAGE_UNUSED_BIT    (1<<12)
 #endif
 
 /* vmalloc permissions */
 #define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
-#ifdef CONFIG_ARC_CACHE_PAGES
-#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE
-#else
-#define _PAGE_DEF_CACHEABLE (0)
+#ifndef CONFIG_ARC_CACHE_PAGES
+#undef _PAGE_CACHEABLE
+#define _PAGE_CACHEABLE 0
 #endif
 
-/* Helper for every "user" page
- * -kernel can R/W/X
- * -by default cached, unless config otherwise
- * -present in memory
- */
-#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
+#ifndef _PAGE_HW_SZ
+#define _PAGE_HW_SZ	0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
+#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 /* More Abbrevaited helpers */
 #define PAGE_U_NONE     __pgprot(___DEF)
@@ -122,15 +126,20 @@
  * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
+#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
 
 /* ioremap */
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#else
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
+#endif
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
@@ -191,26 +200,22 @@
 
 /* Optimal Sizing of Pg Tbl - based on MMU page size */
 #if defined(CONFIG_ARC_PAGE_SIZE_8K)
-#define BITS_FOR_PTE	8
+#define BITS_FOR_PTE	8		/* 11:8:13 */
 #elif defined(CONFIG_ARC_PAGE_SIZE_16K)
-#define BITS_FOR_PTE	8
+#define BITS_FOR_PTE	8		/* 10:8:14 */
 #elif defined(CONFIG_ARC_PAGE_SIZE_4K)
-#define BITS_FOR_PTE	9
+#define BITS_FOR_PTE	9		/* 11:9:12 */
 #endif
 
 #define BITS_FOR_PGD	(32 - BITS_FOR_PTE - BITS_IN_PAGE)
 
-#define PGDIR_SHIFT	(BITS_FOR_PTE + BITS_IN_PAGE)
+#define PGDIR_SHIFT	(32 - BITS_FOR_PGD)
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)	/* vaddr span, not PDG sz */
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#ifdef __ASSEMBLY__
-#define	PTRS_PER_PTE	(1 << BITS_FOR_PTE)
-#define	PTRS_PER_PGD	(1 << BITS_FOR_PGD)
-#else
-#define	PTRS_PER_PTE	(1UL << BITS_FOR_PTE)
-#define	PTRS_PER_PGD	(1UL << BITS_FOR_PGD)
-#endif
+#define	PTRS_PER_PTE	_BITUL(BITS_FOR_PTE)
+#define	PTRS_PER_PGD	_BITUL(BITS_FOR_PGD)
+
 /*
  * Number of entries a user land program use.
  * TASK_SIZE is the maximum vaddr that can be used by a userland program.
@@ -270,15 +275,10 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 		(unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \
 				PAGE_SHIFT)))
 
-#define mk_pte(page, pgprot)						\
-({									\
-	pte_t pte;							\
-	pte_val(pte) = __pa(page_address(page)) + pgprot_val(pgprot);	\
-	pte;								\
-})
-
+#define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
 #define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
-#define pfn_pte(pfn, prot)	(__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)	(__pte(((pte_t)(pfn) << PAGE_SHIFT) | \
+				 pgprot_val(prot)))
 #define __pte_index(addr)	(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
 /*
@@ -295,23 +295,26 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 /* Zoo of pte_xxx function */
 #define pte_read(pte)		(pte_val(pte) & _PAGE_READ)
 #define pte_write(pte)		(pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)		(pte_val(pte) & _PAGE_MODIFIED)
+#define pte_dirty(pte)		(pte_val(pte) & _PAGE_DIRTY)
 #define pte_young(pte)		(pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)	(0)
+#define pte_special(pte)	(pte_val(pte) & _PAGE_SPECIAL)
 
 #define PTE_BIT_FUNC(fn, op) \
 	static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
 
+PTE_BIT_FUNC(mknotpresent,	&= ~(_PAGE_PRESENT));
 PTE_BIT_FUNC(wrprotect,	&= ~(_PAGE_WRITE));
 PTE_BIT_FUNC(mkwrite,	|= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_MODIFIED));
-PTE_BIT_FUNC(mkdirty,	|= (_PAGE_MODIFIED));
+PTE_BIT_FUNC(mkclean,	&= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,	|= (_PAGE_DIRTY));
 PTE_BIT_FUNC(mkold,	&= ~(_PAGE_ACCESSED));
 PTE_BIT_FUNC(mkyoung,	|= (_PAGE_ACCESSED));
 PTE_BIT_FUNC(exprotect,	&= ~(_PAGE_EXECUTE));
 PTE_BIT_FUNC(mkexec,	|= (_PAGE_EXECUTE));
+PTE_BIT_FUNC(mkspecial,	|= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,	|= (_PAGE_HW_SZ));
 
-static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+#define __HAVE_ARCH_PTE_SPECIAL
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
@@ -357,7 +360,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 #define pgd_offset_fast(mm, addr)	pgd_offset(mm, addr)
 #endif
 
-extern void paging_init(void);
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		      pte_t *ptep);
@@ -383,6 +385,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
  * remap a physical page `pfn' of size `size' with page protection `prot'
  * into virtual address `from'
  */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
 #include <asm-generic/pgtable.h>
 
 /* to cope with aliasing VIPT cache */
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index ee682d8e0213..44545354e9e8 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -114,7 +114,12 @@ extern unsigned int get_wchan(struct task_struct *p);
  * -----------------------------------------------------------------------------
  */
 #define VMALLOC_START	0x70000000
-#define VMALLOC_SIZE	(PAGE_OFFSET - VMALLOC_START)
+
+/*
+ * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter
+ * See asm/highmem.h for details
+ */
+#define VMALLOC_SIZE	(PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4)
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 
 #define USER_KERNEL_GUTTER    0x10000000
diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h
index 6e3ef5ba4f74..307846691be6 100644
--- a/arch/arc/include/asm/setup.h
+++ b/arch/arc/include/asm/setup.h
@@ -33,4 +33,11 @@ extern int root_mountflags, end_mem;
 void setup_processor(void);
 void __init setup_arch_memory(void);
 
+/* Helpers used in arc_*_mumbojumbo routines */
+#define IS_AVAIL1(v, s)		((v) ? s : "")
+#define IS_DISABLED_RUN(v)	((v) ? "" : "(disabled) ")
+#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
+#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
+#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
+
 #endif /* __ASMARC_SETUP_H */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 3845b9e94f69..133c867d15af 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -45,12 +45,19 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
  * struct plat_smp_ops	- SMP callbacks provided by platform to ARC SMP
  *
  * @info:		SoC SMP specific info for /proc/cpuinfo etc
+ * @init_early_smp:	A SMP specific h/w block can init itself
+ * 			Could be common across platforms so not covered by
+ * 			mach_desc->init_early()
+ * @init_irq_cpu:	Called for each core so SMP h/w block driver can do
+ * 			any needed setup per cpu (e.g. IPI request)
  * @cpu_kick:		For Master to kickstart a cpu (optionally at a PC)
  * @ipi_send:		To send IPI to a @cpu
  * @ips_clear:		To clear IPI received at @irq
  */
 struct plat_smp_ops {
 	const char 	*info;
+	void		(*init_early_smp)(void);
+	void		(*init_irq_cpu)(int cpu);
 	void		(*cpu_kick)(int cpu, unsigned long pc);
 	void		(*ipi_send)(int cpu);
 	void		(*ipi_clear)(int irq);
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index 71c7b2e4b874..1fe9c8c80280 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end);
 
 #ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
@@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma,
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
+#define flush_pmd_tlb_range(vma, s, e)	local_flush_pmd_tlb_range(vma, s, e)
 #else
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 							 unsigned long end);
@@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+
 #endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 9d129a2a1351..059aff38f10a 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -9,6 +9,8 @@
 #ifndef _UAPI__ASM_ARC_PAGE_H
 #define _UAPI__ASM_ARC_PAGE_H
 
+#include <linux/const.h>
+
 /* PAGE_SHIFT determines the page size */
 #if defined(CONFIG_ARC_PAGE_SIZE_16K)
 #define PAGE_SHIFT 14
@@ -25,13 +27,8 @@
 #define PAGE_SHIFT 13
 #endif
 
-#ifdef __ASSEMBLY__
-#define PAGE_SIZE	(1 << PAGE_SHIFT)
-#define PAGE_OFFSET	(0x80000000)
-#else
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)	/* Default 8K */
-#define PAGE_OFFSET	(0x80000000UL)		/* Kernel starts at 2G onwards */
-#endif
+#define PAGE_SIZE	_BITUL(PAGE_SHIFT)	/* Default 8K */
+#define PAGE_OFFSET	_AC(0x80000000, UL)	/* Kernel starts at 2G onwrds */
 
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 8fa76567e402..445e63a10754 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -24,7 +24,7 @@
 	.align 4
 
 # Initial 16 slots are Exception Vectors
-VECTOR	stext			; Restart Vector (jump to entry point)
+VECTOR	res_service		; Reset Vector
 VECTOR	mem_service		; Mem exception
 VECTOR	instr_service		; Instrn Error
 VECTOR	EV_MachineCheck		; Fatal Machine check
diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S
index 15d457b4403a..59f52035b4ea 100644
--- a/arch/arc/kernel/entry-compact.S
+++ b/arch/arc/kernel/entry-compact.S
@@ -86,7 +86,7 @@
  */
 
 ; ********* Critical System Events **********************
-VECTOR   res_service             ; 0x0, Restart Vector  (0x0)
+VECTOR   res_service             ; 0x0, Reset Vector	(0x0)
 VECTOR   mem_service             ; 0x8, Mem exception   (0x1)
 VECTOR   instr_service           ; 0x10, Instrn Error   (0x2)
 
@@ -155,13 +155,9 @@ int2_saved_reg:
 ; ---------------------------------------------
 	.section .text, "ax",@progbits
 
-res_service:		; processor restart
-	flag    0x1     ; not implemented
-	nop
-	nop
 
-reserved:		; processor restart
-	rtie            ; jump to processor initializations
+reserved:
+	flag 1		; Unexpected event, halt
 
 ;##################### Interrupt Handling ##############################
 
@@ -175,12 +171,25 @@ ENTRY(handle_interrupt_level2)
 
 	;------------------------------------------------------
 	; if L2 IRQ interrupted a L1 ISR, disable preemption
+	;
+	; This is to avoid a potential L1-L2-L1 scenario
+	;  -L1 IRQ taken
+	;  -L2 interrupts L1 (before L1 ISR could run)
+	;  -preemption off IRQ, user task in syscall picked to run
+	;  -RTIE to userspace
+	;	Returns from L2 context fine
+	;	But both L1 and L2 re-enabled, so another L1 can be taken
+	;	while prev L1 is still unserviced
+	;
 	;------------------------------------------------------
 
+	; L2 interrupting L1 implies both L2 and L1 active
+	; However both A2 and A1 are NOT set in STATUS32, thus
+	; need to check STATUS32_L2 to determine if L1 was active
+
 	ld r9, [sp, PT_status32]        ; get statu32_l2 (saved in pt_regs)
 	bbit0 r9, STATUS_A1_BIT, 1f     ; L1 not active when L2 IRQ, so normal
 
-	; A1 is set in status32_l2
 	; bump thread_info->preempt_count (Disable preemption)
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
@@ -320,11 +329,10 @@ END(call_do_page_fault)
 	; Note that we use realtime STATUS32 (not pt_regs->status32) to
 	; decide that.
 
-	; if Returning from Exception
-	btst   r10, STATUS_AE_BIT
-	bnz    .Lexcep_ret
+	and.f	0, r10, (STATUS_A1_MASK|STATUS_A2_MASK)
+	bz	.Lexcep_or_pure_K_ret
 
-	; Not Exception so maybe Interrupts (Level 1 or 2)
+	; Returning from Interrupts (Level 1 or 2)
 
 #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS
 
@@ -365,8 +373,7 @@ END(call_do_page_fault)
 	st      r9, [r10, THREAD_INFO_PREEMPT_COUNT]
 
 149:
-	;return from level 2
-	INTERRUPT_EPILOGUE 2
+	INTERRUPT_EPILOGUE 2	; return from level 2 interrupt
 debug_marker_l2:
 	rtie
 
@@ -374,15 +381,11 @@ not_level2_interrupt:
 
 #endif
 
-	bbit0  r10, STATUS_A1_BIT, .Lpure_k_mode_ret
-
-	;return from level 1
-	INTERRUPT_EPILOGUE 1
+	INTERRUPT_EPILOGUE 1	; return from level 1 interrupt
 debug_marker_l1:
 	rtie
 
-.Lexcep_ret:
-.Lpure_k_mode_ret:
+.Lexcep_or_pure_K_ret:
 
 	;this case is for syscalls or Exceptions or pure kernel mode
 
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 812f95e6ae69..689dd867fdff 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -50,28 +50,37 @@
 .endm
 
 	.section .init.text, "ax",@progbits
-	.type stext, @function
-	.globl stext
-stext:
-	;-------------------------------------------------------------------
-	; Don't clobber r0-r2 yet. It might have bootloader provided info
-	;-------------------------------------------------------------------
+
+;----------------------------------------------------------------
+; Default Reset Handler (jumped into from Reset vector)
+; - Don't clobber r0,r1,r2 as they might have u-boot provided args
+; - Platforms can override this weak version if needed
+;----------------------------------------------------------------
+WEAK(res_service)
+	j	stext
+END(res_service)
+
+;----------------------------------------------------------------
+; Kernel Entry point
+;----------------------------------------------------------------
+ENTRY(stext)
 
 	CPU_EARLY_SETUP
 
 #ifdef CONFIG_SMP
-	; Ensure Boot (Master) proceeds. Others wait in platform dependent way
-	;	IDENTITY Reg [ 3  2  1  0 ]
-	;	(cpu-id)             ^^^	=> Zero for UP ARC700
-	;					=> #Core-ID if SMP (Master 0)
-	; Note that non-boot CPUs might not land here if halt-on-reset and
-	; instead breath life from @first_lines_of_secondary, but we still
-	; need to make sure only boot cpu takes this path.
 	GET_CPU_ID  r5
 	cmp	r5, 0
-	mov.ne	r0, r5
-	jne	arc_platform_smp_wait_to_boot
+	mov.nz	r0, r5
+#ifdef CONFIG_ARC_SMP_HALT_ON_RESET
+	; Non-Master can proceed as system would be booted sufficiently
+	jnz	first_lines_of_secondary
+#else
+	; Non-Masters wait for Master to boot enough and bring them up
+	jnz	arc_platform_smp_wait_to_boot
 #endif
+	; Master falls thru
+#endif
+
 	; Clear BSS before updating any globals
 	; XXX: use ZOL here
 	mov	r5, __bss_start
@@ -102,18 +111,14 @@ stext:
 	GET_TSK_STACK_BASE r9, sp	; r9 = tsk, sp = stack base(output)
 
 	j	start_kernel	; "C" entry point
+END(stext)
 
 #ifdef CONFIG_SMP
 ;----------------------------------------------------------------
 ;     First lines of code run by secondary before jumping to 'C'
 ;----------------------------------------------------------------
 	.section .text, "ax",@progbits
-	.type first_lines_of_secondary, @function
-	.globl first_lines_of_secondary
-
-first_lines_of_secondary:
-
-	CPU_EARLY_SETUP
+ENTRY(first_lines_of_secondary)
 
 	; setup per-cpu idle task as "current" on this CPU
 	ld	r0, [@secondary_idle_tsk]
@@ -126,5 +131,5 @@ first_lines_of_secondary:
 	GET_TSK_STACK_BASE r0, sp
 
 	j	start_kernel_secondary
-
+END(first_lines_of_secondary)
 #endif
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 039fac30b5c1..06bcedf19b62 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -79,17 +79,16 @@ static struct irq_chip onchip_intc = {
 static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
 			       irq_hw_number_t hw)
 {
-	/*
-	 * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core
-	 *      code doesn't own it (like TIMER0). ISS IDU / ezchip define it
-	 *      in platform header which can't be included here as it goes
-	 *      against multi-platform image philisophy
-	 */
-	if (irq == TIMER0_IRQ)
+	switch (irq) {
+	case TIMER0_IRQ:
+#ifdef CONFIG_SMP
+	case IPI_IRQ:
+#endif
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
-	else
+		break;
+	default:
 		irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq);
-
+	}
 	return 0;
 }
 
@@ -148,78 +147,15 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
 
 void arch_local_irq_enable(void)
 {
-
 	unsigned long flags = arch_local_save_flags();
 
-	/* Allow both L1 and L2 at the onset */
-	flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-
-	/* Called from hard ISR (between irq_enter and irq_exit) */
-	if (in_irq()) {
-
-		/* If in L2 ISR, don't re-enable any further IRQs as this can
-		 * cause IRQ priorities to get upside down. e.g. it could allow
-		 * L1 be taken while in L2 hard ISR which is wrong not only in
-		 * theory, it can also cause the dreaded L1-L2-L1 scenario
-		 */
-		if (flags & STATUS_A2_MASK)
-			flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK);
-
-		/* Even if in L1 ISR, allowe Higher prio L2 IRQs */
-		else if (flags & STATUS_A1_MASK)
-			flags &= ~(STATUS_E1_MASK);
-	}
-
-	/* called from soft IRQ, ideally we want to re-enable all levels */
-
-	else if (in_softirq()) {
-
-		/* However if this is case of L1 interrupted by L2,
-		 * re-enabling both may cause whaco L1-L2-L1 scenario
-		 * because ARC700 allows level 1 to interrupt an active L2 ISR
-		 * Thus we disable both
-		 * However some code, executing in soft ISR wants some IRQs
-		 * to be enabled so we re-enable L2 only
-		 *
-		 * How do we determine L1 intr by L2
-		 *  -A2 is set (means in L2 ISR)
-		 *  -E1 is set in this ISR's pt_regs->status32 which is
-		 *      saved copy of status32_l2 when l2 ISR happened
-		 */
-		struct pt_regs *pt = get_irq_regs();
-
-		if ((flags & STATUS_A2_MASK) && pt &&
-		    (pt->status32 & STATUS_A1_MASK)) {
-			/*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */
-			flags &= ~(STATUS_E1_MASK);
-		}
-	}
+	if (flags & STATUS_A2_MASK)
+		flags |= STATUS_E2_MASK;
+	else if (flags & STATUS_A1_MASK)
+		flags |= STATUS_E1_MASK;
 
 	arch_local_irq_restore(flags);
 }
 
-#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */
-
-/*
- * Simpler version for only 1 level of interrupt
- * Here we only Worry about Level 1 Bits
- */
-void arch_local_irq_enable(void)
-{
-	unsigned long flags;
-
-	/*
-	 * ARC IDE Drivers tries to re-enable interrupts from hard-isr
-	 * context which is simply wrong
-	 */
-	if (in_irq()) {
-		WARN_ONCE(1, "IRQ enabled from hard-isr");
-		return;
-	}
-
-	flags = arch_local_save_flags();
-	flags |= (STATUS_E1_MASK | STATUS_E2_MASK);
-	arch_local_irq_restore(flags);
-}
-#endif
 EXPORT_SYMBOL(arch_local_irq_enable);
+#endif
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 2989a7bcf8a8..2ee226546c6a 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <asm/mach_desc.h>
+#include <asm/smp.h>
 
 /*
  * Late Interrupt system init called from start_kernel for Boot CPU only
@@ -19,17 +20,20 @@
  */
 void __init init_IRQ(void)
 {
-	/* Any external intc can be setup here */
-	if (machine_desc->init_irq)
-		machine_desc->init_irq();
-
-	/* process the entire interrupt tree in one go */
+	/*
+	 * process the entire interrupt tree in one go
+	 * Any external intc will be setup provided DT chains them
+	 * properly
+	 */
 	irqchip_init();
 
 #ifdef CONFIG_SMP
-	/* Master CPU can initialize it's side of IPI */
-	if (machine_desc->init_smp)
-		machine_desc->init_smp(smp_processor_id());
+	/* a SMP H/w block could do IPI IRQ request here */
+	if (plat_smp_ops.init_irq_cpu)
+		plat_smp_ops.init_irq_cpu(smp_processor_id());
+
+	if (machine_desc->init_cpu_smp)
+		machine_desc->init_cpu_smp(smp_processor_id());
 #endif
 }
 
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 4ffd1855f1bd..74a9b074ac3e 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -12,20 +12,14 @@
 #include <linux/irq.h>
 #include <linux/spinlock.h>
 #include <asm/mcip.h>
+#include <asm/setup.h>
 
 static char smp_cpuinfo_buf[128];
 static int idu_detected;
 
 static DEFINE_RAW_SPINLOCK(mcip_lock);
 
-/*
- * Any SMP specific init any CPU does when it comes up.
- * Here we setup the CPU to enable Inter-Processor-Interrupts
- * Called for each CPU
- * -Master      : init_IRQ()
- * -Other(s)    : start_kernel_secondary()
- */
-void mcip_init_smp(unsigned int cpu)
+static void mcip_setup_per_cpu(int cpu)
 {
 	smp_ipi_irq_setup(cpu, IPI_IRQ);
 }
@@ -96,34 +90,8 @@ static void mcip_ipi_clear(int irq)
 #endif
 }
 
-volatile int wake_flag;
-
-static void mcip_wakeup_cpu(int cpu, unsigned long pc)
-{
-	BUG_ON(cpu == 0);
-	wake_flag = cpu;
-}
-
-void arc_platform_smp_wait_to_boot(int cpu)
+static void mcip_probe_n_setup(void)
 {
-	while (wake_flag != cpu)
-		;
-
-	wake_flag = 0;
-	__asm__ __volatile__("j @first_lines_of_secondary	\n");
-}
-
-struct plat_smp_ops plat_smp_ops = {
-	.info		= smp_cpuinfo_buf,
-	.cpu_kick	= mcip_wakeup_cpu,
-	.ipi_send	= mcip_ipi_send,
-	.ipi_clear	= mcip_ipi_clear,
-};
-
-void mcip_init_early_smp(void)
-{
-#define IS_AVAIL1(var, str)    ((var) ? str : "")
-
 	struct mcip_bcr {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 		unsigned int pad3:8,
@@ -161,6 +129,14 @@ void mcip_init_early_smp(void)
 		panic("kernel trying to use non-existent GRTC\n");
 }
 
+struct plat_smp_ops plat_smp_ops = {
+	.info		= smp_cpuinfo_buf,
+	.init_early_smp	= mcip_probe_n_setup,
+	.init_irq_cpu	= mcip_setup_per_cpu,
+	.ipi_send	= mcip_ipi_send,
+	.ipi_clear	= mcip_ipi_clear,
+};
+
 /***************************************************************************
  * ARCv2 Interrupt Distribution Unit (IDU)
  *
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index cabde9dc0696..c33e77c0ad3e 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -160,10 +160,6 @@ static const struct cpuinfo_data arc_cpu_tbl[] = {
 	{ {0x00, NULL		} }
 };
 
-#define IS_AVAIL1(v, s)		((v) ? s : "")
-#define IS_USED_RUN(v)		((v) ? "" : "(not used) ")
-#define IS_USED_CFG(cfg)	IS_USED_RUN(IS_ENABLED(cfg))
-#define IS_AVAIL2(v, s, cfg)	IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
@@ -415,8 +411,9 @@ void __init setup_arch(char **cmdline_p)
 	if (machine_desc->init_early)
 		machine_desc->init_early();
 
-	setup_processor();
 	smp_init_cpus();
+
+	setup_processor();
 	setup_arch_memory();
 
 	/* copy flat DT out of .init and then unflatten it */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index be13d12420ba..580587805fa3 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -42,8 +42,13 @@ void __init smp_prepare_boot_cpu(void)
 }
 
 /*
- * Initialise the CPU possible map early - this describes the CPUs
- * which may be present or become present in the system.
+ * Called from setup_arch() before calling setup_processor()
+ *
+ * - Initialise the CPU possible map early - this describes the CPUs
+ *   which may be present or become present in the system.
+ * - Call early smp init hook. This can initialize a specific multi-core
+ *   IP which is say common to several platforms (hence not part of
+ *   platform specific int_early() hook)
  */
 void __init smp_init_cpus(void)
 {
@@ -51,6 +56,9 @@ void __init smp_init_cpus(void)
 
 	for (i = 0; i < NR_CPUS; i++)
 		set_cpu_possible(i, true);
+
+	if (plat_smp_ops.init_early_smp)
+		plat_smp_ops.init_early_smp();
 }
 
 /* called from init ( ) =>  process 1 */
@@ -72,35 +80,29 @@ void __init smp_cpus_done(unsigned int max_cpus)
 }
 
 /*
- * After power-up, a non Master CPU needs to wait for Master to kick start it
- *
- * The default implementation halts
- *
- * This relies on platform specific support allowing Master to directly set
- * this CPU's PC (to be @first_lines_of_secondary() and kick start it.
- *
- * In lack of such h/w assist, platforms can override this function
- *   - make this function busy-spin on a token, eventually set by Master
- *     (from arc_platform_smp_wakeup_cpu())
- *   - Once token is available, jump to @first_lines_of_secondary
- *     (using inline asm).
- *
- * Alert: can NOT use stack here as it has not been determined/setup for CPU.
- *        If it turns out to be elaborate, it's better to code it in assembly
- *
+ * Default smp boot helper for Run-on-reset case where all cores start off
+ * together. Non-masters need to wait for Master to start running.
+ * This is implemented using a flag in memory, which Non-masters spin-wait on.
+ * Master sets it to cpu-id of core to "ungate" it.
  */
-void __weak arc_platform_smp_wait_to_boot(int cpu)
+static volatile int wake_flag;
+
+static void arc_default_smp_cpu_kick(int cpu, unsigned long pc)
 {
-	/*
-	 * As a hack for debugging - since debugger will single-step over the
-	 * FLAG insn - wrap the halt itself it in a self loop
-	 */
-	__asm__ __volatile__(
-	"1:		\n"
-	"	flag 1	\n"
-	"	b 1b	\n");
+	BUG_ON(cpu == 0);
+	wake_flag = cpu;
+}
+
+void arc_platform_smp_wait_to_boot(int cpu)
+{
+	while (wake_flag != cpu)
+		;
+
+	wake_flag = 0;
+	__asm__ __volatile__("j @first_lines_of_secondary	\n");
 }
 
+
 const char *arc_platform_smp_cpuinfo(void)
 {
 	return plat_smp_ops.info ? : "";
@@ -129,8 +131,12 @@ void start_kernel_secondary(void)
 
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
-	if (machine_desc->init_smp)
-		machine_desc->init_smp(cpu);
+	/* Some SMP H/w setup - for each cpu */
+	if (plat_smp_ops.init_irq_cpu)
+		plat_smp_ops.init_irq_cpu(cpu);
+
+	if (machine_desc->init_cpu_smp)
+		machine_desc->init_cpu_smp(cpu);
 
 	arc_local_timer_setup();
 
@@ -161,6 +167,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (plat_smp_ops.cpu_kick)
 		plat_smp_ops.cpu_kick(cpu,
 				(unsigned long)first_lines_of_secondary);
+	else
+		arc_default_smp_cpu_kick(cpu, (unsigned long)NULL);
 
 	/* wait for 1 sec after kicking the secondary */
 	wait_till = jiffies + HZ;
diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 4294761a2b3e..dfad287f1db1 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -285,7 +285,4 @@ void __init time_init(void)
 
 	/* sets up the periodic event timer */
 	arc_local_timer_setup();
-
-	if (machine_desc->init_time)
-		machine_desc->init_time();
 }
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index dd35bde39f69..894e696bddaa 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -12,7 +12,7 @@
 #include <asm/thread_info.h>
 
 OUTPUT_ARCH(arc)
-ENTRY(_stext)
+ENTRY(res_service)
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 jiffies = jiffies_64 + 4;
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
index 7beb941556c3..3703a4969349 100644
--- a/arch/arc/mm/Makefile
+++ b/arch/arc/mm/Makefile
@@ -8,3 +8,4 @@
 
 obj-y	:= extable.o ioremap.o dma.o fault.o init.o
 obj-y	+= tlb.o tlbex.o cache.o mmap.o
+obj-$(CONFIG_HIGHMEM)	+= highmem.o
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 0d1a6e96839f..ff7ff6cbb811 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -25,7 +25,7 @@ static int l2_line_sz;
 int ioc_exists;
 volatile int slc_enable = 1, ioc_enable = 1;
 
-void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
+void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
 			       unsigned long sz, const int cacheop);
 
 void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
@@ -37,7 +37,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 	int n = 0;
 	struct cpuinfo_arc_cache *p;
 
-#define IS_USED_RUN(v)		((v) ? "" : "(disabled) ")
 #define PR_CACHE(p, cfg, str)						\
 	if (!(p)->ver)							\
 		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
@@ -47,7 +46,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 			(p)->sz_k, (p)->assoc, (p)->line_len,		\
 			(p)->vipt ? "VIPT" : "PIPT",			\
 			(p)->alias ? " aliasing" : "",			\
-			IS_ENABLED(cfg) ? "" : " (not used)");
+			IS_USED_CFG(cfg));
 
 	PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
 	PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
@@ -63,7 +62,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 
 	if (ioc_exists)
 		n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
-				IS_USED_RUN(ioc_enable));
+				IS_DISABLED_RUN(ioc_enable));
 
 	return buf;
 }
@@ -217,7 +216,7 @@ slc_chk:
  */
 
 static inline
-void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
+void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op)
 {
 	unsigned int aux_cmd;
@@ -254,8 +253,12 @@ void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr,
 	}
 }
 
+/*
+ * For ARC700 MMUv3 I-cache and D-cache flushes
+ * Also reused for HS38 aliasing I-cache configuration
+ */
 static inline
-void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
+void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int op)
 {
 	unsigned int aux_cmd, aux_tag;
@@ -290,6 +293,16 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
 	if (full_page)
 		write_aux_reg(aux_tag, paddr);
 
+	/*
+	 * This is technically for MMU v4, using the MMU v3 programming model
+	 * Special work for HS38 aliasing I-cache configuratino with PAE40
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 * Note that PTAG_HI is hoisted outside the line loop
+	 */
+	if (is_pae40_enabled() && op == OP_INV_IC)
+		write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+
 	while (num_lines-- > 0) {
 		if (!full_page) {
 			write_aux_reg(aux_tag, paddr);
@@ -302,14 +315,20 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr,
 }
 
 /*
- * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache
- * maintenance ops (in IVIL reg), as long as icache doesn't alias.
+ * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
+ * Here's how cache ops are implemented
+ *
+ *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
+ *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
+ *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
+ *    respectively, similar to MMU v3 programming model, hence
+ *    __cache_line_loop_v3() is used)
  *
- * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is
- * specified in PTAG (similar to MMU v3)
+ * If PAE40 is enabled, independent of aliasing considerations, the higher bits
+ * needs to be written into PTAG_HI
  */
 static inline
-void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
+void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz, const int cacheop)
 {
 	unsigned int aux_cmd;
@@ -336,6 +355,22 @@ void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr,
 
 	num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
 
+	/*
+	 * For HS38 PAE40 configuration
+	 *   - upper 8 bits of paddr need to be written into PTAG_HI
+	 *   - (and needs to be written before the lower 32 bits)
+	 */
+	if (is_pae40_enabled()) {
+		if (cacheop == OP_INV_IC)
+			/*
+			 * Non aliasing I-cache in HS38,
+			 * aliasing I-cache handled in __cache_line_loop_v3()
+			 */
+			write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+		else
+			write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+	}
+
 	while (num_lines-- > 0) {
 		write_aux_reg(aux_cmd, paddr);
 		paddr += L1_CACHE_BYTES;
@@ -413,7 +448,7 @@ static inline void __dc_entire_op(const int op)
 /*
  * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
  */
-static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
+static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr,
 				unsigned long sz, const int op)
 {
 	unsigned long flags;
@@ -446,7 +481,7 @@ static inline void __ic_entire_inv(void)
 }
 
 static inline void
-__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
+__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr,
 			  unsigned long sz)
 {
 	unsigned long flags;
@@ -463,7 +498,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
 #else
 
 struct ic_inv_args {
-	unsigned long paddr, vaddr;
+	phys_addr_t paddr, vaddr;
 	int sz;
 };
 
@@ -474,7 +509,7 @@ static void __ic_line_inv_vaddr_helper(void *info)
         __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
 }
 
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 				unsigned long sz)
 {
 	struct ic_inv_args ic_inv = {
@@ -495,7 +530,7 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
+noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
 	/*
@@ -585,7 +620,7 @@ void flush_dcache_page(struct page *page)
 	} else if (page_mapped(page)) {
 
 		/* kernel reading from page with U-mapping */
-		unsigned long paddr = (unsigned long)page_address(page);
+		phys_addr_t paddr = (unsigned long)page_address(page);
 		unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
 
 		if (addr_not_cache_congruent(paddr, vaddr))
@@ -733,14 +768,14 @@ EXPORT_SYMBOL(flush_icache_range);
  *    builtin kernel page will not have any virtual mappings.
  *    kprobe on loadable module will be kernel vaddr.
  */
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
 {
 	__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
 	__ic_line_inv_vaddr(paddr, vaddr, len);
 }
 
 /* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
+void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
 {
 	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
 }
@@ -749,7 +784,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
  * wrapper to clearout kernel or userspace mappings of a page
  * For kernel mappings @vaddr == @paddr
  */
-void __flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
 {
 	__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
 }
@@ -807,8 +842,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
 void copy_user_highpage(struct page *to, struct page *from,
 	unsigned long u_vaddr, struct vm_area_struct *vma)
 {
-	unsigned long kfrom = (unsigned long)page_address(from);
-	unsigned long kto = (unsigned long)page_address(to);
+	void *kfrom = kmap_atomic(from);
+	void *kto = kmap_atomic(to);
 	int clean_src_k_mappings = 0;
 
 	/*
@@ -818,13 +853,16 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 *
 	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
 	 * equally valid for SRC page as well
+	 *
+	 * For !VIPT cache, all of this gets compiled out as
+	 * addr_not_cache_congruent() is 0
 	 */
 	if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
-		__flush_dcache_page(kfrom, u_vaddr);
+		__flush_dcache_page((unsigned long)kfrom, u_vaddr);
 		clean_src_k_mappings = 1;
 	}
 
-	copy_page((void *)kto, (void *)kfrom);
+	copy_page(kto, kfrom);
 
 	/*
 	 * Mark DST page K-mapping as dirty for a later finalization by
@@ -841,11 +879,14 @@ void copy_user_highpage(struct page *to, struct page *from,
 	 * sync the kernel mapping back to physical page
 	 */
 	if (clean_src_k_mappings) {
-		__flush_dcache_page(kfrom, kfrom);
+		__flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
 		set_bit(PG_dc_clean, &from->flags);
 	} else {
 		clear_bit(PG_dc_clean, &from->flags);
 	}
+
+	kunmap_atomic(kto);
+	kunmap_atomic(kfrom);
 }
 
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index d948e4e9d89c..af63f4a13e60 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -18,7 +18,14 @@
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 
-static int handle_vmalloc_fault(unsigned long address)
+/*
+ * kernel virtual address is required to implement vmalloc/pkmap/fixmap
+ * Refer to asm/processor.h for System Memory Map
+ *
+ * It simply copies the PMD entry (pointer to 2nd level page table or hugepage)
+ * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared
+ */
+noinline static int handle_kernel_vaddr_fault(unsigned long address)
 {
 	/*
 	 * Synchronize this task's top level page-table
@@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	 * only copy the information from the master page table,
 	 * nothing more.
 	 */
-	if (address >= VMALLOC_START && address <= VMALLOC_END) {
-		ret = handle_vmalloc_fault(address);
+	if (address >= VMALLOC_START) {
+		ret = handle_kernel_vaddr_fault(address);
 		if (unlikely(ret))
 			goto bad_area_nosemaphore;
 		else
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
new file mode 100644
index 000000000000..065ee6bfa82a
--- /dev/null
+++ b/arch/arc/mm/highmem.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+/*
+ * HIGHMEM API:
+ *
+ * kmap() API provides sleep semantics hence refered to as "permanent maps"
+ * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor
+ * for book-keeping
+ *
+ * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides
+ * shortlived ala "temporary mappings" which historically were implemented as
+ * fixmaps (compile time addr etc). Their book-keeping is done per cpu.
+ *
+ *	Both these facts combined (preemption disabled and per-cpu allocation)
+ *	means the total number of concurrent fixmaps will be limited to max
+ *	such allocations in a single control path. Thus KM_TYPE_NR (another
+ *	historic relic) is a small'ish number which caps max percpu fixmaps
+ *
+ * ARC HIGHMEM Details
+ *
+ * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module)
+ *   is now shared between vmalloc and kmap (non overlapping though)
+ *
+ * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD
+ *   This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means
+ *   2M of kvaddr space for typical config (8K page and 11:8:13 traversal split)
+ *
+ * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE
+ *   slots across NR_CPUS would be more than sufficient (generic code defines
+ *   KM_TYPE_NR as 20).
+ *
+ * - pkmap being preemptible, in theory could do with more than 256 concurrent
+ *   mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse
+ *   the PGD and only works with a single page table @pkmap_page_table, hence
+ *   sets the limit
+ */
+
+extern pte_t * pkmap_page_table;
+static pte_t * fixmap_page_table;
+
+void *kmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	return kmap_high(page);
+}
+
+void *kmap_atomic(struct page *page)
+{
+	int idx, cpu_idx;
+	unsigned long vaddr;
+
+	preempt_disable();
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	cpu_idx = kmap_atomic_idx_push();
+	idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
+	vaddr = FIXMAP_ADDR(idx);
+
+	set_pte_at(&init_mm, vaddr, fixmap_page_table + idx,
+		   mk_pte(page, kmap_prot));
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kv)
+{
+	unsigned long kvaddr = (unsigned long)kv;
+
+	if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) {
+
+		/*
+		 * Because preemption is disabled, this vaddr can be associated
+		 * with the current allocated index.
+		 * But in case of multiple live kmap_atomic(), it still relies on
+		 * callers to unmap in right order.
+		 */
+		int cpu_idx = kmap_atomic_idx();
+		int idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
+
+		WARN_ON(kvaddr != FIXMAP_ADDR(idx));
+
+		pte_clear(&init_mm, kvaddr, fixmap_page_table + idx);
+		local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
+
+		kmap_atomic_idx_pop();
+	}
+
+	pagefault_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr)
+{
+	pgd_t *pgd_k;
+	pud_t *pud_k;
+	pmd_t *pmd_k;
+	pte_t *pte_k;
+
+	pgd_k = pgd_offset_k(kvaddr);
+	pud_k = pud_offset(pgd_k, kvaddr);
+	pmd_k = pmd_offset(pud_k, kvaddr);
+
+	pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+	pmd_populate_kernel(&init_mm, pmd_k, pte_k);
+	return pte_k;
+}
+
+void kmap_init(void)
+{
+	/* Due to recursive include hell, we can't do this in processor.h */
+	BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));
+
+	BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE);
+	pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE);
+
+	BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE);
+	fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE);
+}
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index d44eedd8c322..a9305b5a2cd4 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -15,6 +15,7 @@
 #endif
 #include <linux/swap.h>
 #include <linux/module.h>
+#include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
@@ -24,16 +25,22 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
 char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
 EXPORT_SYMBOL(empty_zero_page);
 
-/* Default tot mem from .config */
-static unsigned long arc_mem_sz = 0x20000000;  /* some default */
+static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE;
+static unsigned long low_mem_sz;
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long min_high_pfn;
+static u64 high_mem_start;
+static u64 high_mem_sz;
+#endif
 
 /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */
 static int __init setup_mem_sz(char *str)
 {
-	arc_mem_sz = memparse(str, NULL) & PAGE_MASK;
+	low_mem_sz = memparse(str, NULL) & PAGE_MASK;
 
 	/* early console might not be setup yet - it will show up later */
-	pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz));
+	pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz));
 
 	return 0;
 }
@@ -41,8 +48,22 @@ early_param("mem", setup_mem_sz);
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-	arc_mem_sz = size & PAGE_MASK;
-	pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz));
+	int in_use = 0;
+
+	if (!low_mem_sz) {
+		BUG_ON(base != low_mem_start);
+		low_mem_sz = size;
+		in_use = 1;
+	} else {
+#ifdef CONFIG_HIGHMEM
+		high_mem_start = base;
+		high_mem_sz = size;
+		in_use = 1;
+#endif
+	}
+
+	pr_info("Memory @ %llx [%lldM] %s\n",
+		base, TO_MB(size), !in_use ? "Not used":"");
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -72,46 +93,62 @@ early_param("initrd", early_initrd);
 void __init setup_arch_memory(void)
 {
 	unsigned long zones_size[MAX_NR_ZONES];
-	unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
+	unsigned long zones_holes[MAX_NR_ZONES];
 
 	init_mm.start_code = (unsigned long)_text;
 	init_mm.end_code = (unsigned long)_etext;
 	init_mm.end_data = (unsigned long)_edata;
 	init_mm.brk = (unsigned long)_end;
 
-	/*
-	 * We do it here, so that memory is correctly instantiated
-	 * even if "mem=xxx" cmline over-ride is given and/or
-	 * DT has memory node. Each causes an update to @arc_mem_sz
-	 * and we finally add memory one here
-	 */
-	memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz);
-
-	/*------------- externs in mm need setting up ---------------*/
-
 	/* first page of system - kernel .vector starts here */
 	min_low_pfn = ARCH_PFN_OFFSET;
 
-	/* Last usable page of low mem (no HIGHMEM yet for ARC port) */
-	max_low_pfn = max_pfn = PFN_DOWN(end_mem);
+	/* Last usable page of low mem */
+	max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz);
 
-	max_mapnr = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	min_high_pfn = PFN_DOWN(high_mem_start);
+	max_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+#endif
+
+	max_mapnr = max_pfn - min_low_pfn;
 
-	/*------------- reserve kernel image -----------------------*/
-	memblock_reserve(CONFIG_LINUX_LINK_BASE,
-			 __pa(_end) - CONFIG_LINUX_LINK_BASE);
+	/*------------- bootmem allocator setup -----------------------*/
+
+	/*
+	 * seed the bootmem allocator after any DT memory node parsing or
+	 * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz
+	 *
+	 * Only low mem is added, otherwise we have crashes when allocating
+	 * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of
+	 * avail memory, ending in highmem with a > 32-bit address. However
+	 * it then tries to memset it with a truncaed 32-bit handle, causing
+	 * the crash
+	 */
+
+	memblock_add(low_mem_start, low_mem_sz);
+	memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
 
 #ifdef CONFIG_BLK_DEV_INITRD
-	/*------------- reserve initrd image -----------------------*/
 	if (initrd_start)
 		memblock_reserve(__pa(initrd_start), initrd_end - initrd_start);
 #endif
 
 	memblock_dump_all();
 
-	/*-------------- node setup --------------------------------*/
+	/*----------------- node/zones setup --------------------------*/
 	memset(zones_size, 0, sizeof(zones_size));
-	zones_size[ZONE_NORMAL] = max_mapnr;
+	memset(zones_holes, 0, sizeof(zones_holes));
+
+	zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
+	zones_holes[ZONE_NORMAL] = 0;
+
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn;
+
+	/* This handles the peripheral address space hole */
+	zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn;
+#endif
 
 	/*
 	 * We can't use the helper free_area_init(zones[]) because it uses
@@ -122,9 +159,12 @@ void __init setup_arch_memory(void)
 	free_area_init_node(0,			/* node-id */
 			    zones_size,		/* num pages per zone */
 			    min_low_pfn,	/* first pfn of node */
-			    NULL);		/* NO holes */
+			    zones_holes);	/* holes */
 
-	high_memory = (void *)end_mem;
+#ifdef CONFIG_HIGHMEM
+	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+	kmap_init();
+#endif
 }
 
 /*
@@ -135,6 +175,14 @@ void __init setup_arch_memory(void)
  */
 void __init mem_init(void)
 {
+#ifdef CONFIG_HIGHMEM
+	unsigned long tmp;
+
+	reset_all_zones_managed_pages();
+	for (tmp = min_high_pfn; tmp < max_pfn; tmp++)
+		free_highmem_page(pfn_to_page(tmp));
+#endif
+
 	free_all_bootmem();
 	mem_init_print_info(NULL);
 }
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 2c7ce8bb7475..0ee739846847 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 static inline void __tlb_entry_erase(void)
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
@@ -182,7 +186,7 @@ static void utlb_invalidate(void)
 
 }
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 	unsigned int idx;
 
@@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
 {
 	write_aux_reg(ARC_REG_TLBPD0, pd0);
 	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
 
@@ -240,22 +248,39 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
 
 noinline void local_flush_tlb_all(void)
 {
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
 	unsigned long flags;
 	unsigned int entry;
-	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+	int num_tlb = mmu->sets * mmu->ways;
 
 	local_irq_save(flags);
 
 	/* Load PD0 and PD1 with template for a Blank Entry */
 	write_aux_reg(ARC_REG_TLBPD1, 0);
+
+	if (is_pae40_enabled())
+		write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 
-	for (entry = 0; entry < mmu->num_tlb; entry++) {
+	for (entry = 0; entry < num_tlb; entry++) {
 		/* write this entry to the TLB */
 		write_aux_reg(ARC_REG_TLBINDEX, entry);
 		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 	}
 
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+		const int stlb_idx = 0x800;
+
+		/* Blank sTLB entry */
+		write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ);
+
+		for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
+			write_aux_reg(ARC_REG_TLBINDEX, entry);
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+		}
+	}
+
 	utlb_invalidate();
 
 	local_irq_restore(flags);
@@ -409,6 +434,15 @@ static inline void ipi_flush_tlb_range(void *arg)
 	local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ipi_flush_pmd_tlb_range(void *arg)
+{
+	struct tlb_args *ta = arg;
+
+	local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+#endif
+
 static inline void ipi_flush_tlb_kernel_range(void *arg)
 {
 	struct tlb_args *ta = (struct tlb_args *)arg;
@@ -449,6 +483,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			 unsigned long end)
+{
+	struct tlb_args ta = {
+		.ta_vma = vma,
+		.ta_start = start,
+		.ta_end = end
+	};
+
+	on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1);
+}
+#endif
+
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	struct tlb_args ta = {
@@ -463,11 +511,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 /*
  * Routine to create a TLB entry
  */
-void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
 	unsigned long flags;
 	unsigned int asid_or_sasid, rwx;
-	unsigned long pd0, pd1;
+	unsigned long pd0;
+	pte_t pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -499,9 +548,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address);
+	tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
 
-	address &= PAGE_MASK;
+	vaddr &= PAGE_MASK;
 
 	/* update this PTE credentials */
 	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
@@ -511,7 +560,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 	/* ASID for this task */
 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
+	pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
 
 	/*
 	 * ARC MMU provides fully orthogonal access bits for K/U mode,
@@ -547,7 +596,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
-	unsigned long paddr = pte_val(*ptep) & PAGE_MASK;
+	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
 	struct page *page = pfn_to_page(pte_pfn(*ptep));
 
 	create_tlb(vma, vaddr, ptep);
@@ -580,6 +629,95 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 	}
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
+ * support.
+ *
+ * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
+ * new bit "SZ" in TLB page desciptor to distinguish between them.
+ * Super Page size is configurable in hardware (4K to 16M), but fixed once
+ * RTL builds.
+ *
+ * The exact THP size a Linx configuration will support is a function of:
+ *  - MMU page size (typical 8K, RTL fixed)
+ *  - software page walker address split between PGD:PTE:PFN (typical
+ *    11:8:13, but can be changed with 1 line)
+ * So for above default, THP size supported is 8K * (2^8) = 2M
+ *
+ * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
+ * reduces to 1 level (as PTE is folded into PGD and canonically referred
+ * to as PMD).
+ * Thus THP PMD accessors are implemented in terms of PTE (just like sparc)
+ */
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd)
+{
+	pte_t pte = __pte(pmd_val(*pmd));
+	update_mmu_cache(vma, addr, &pte);
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+
+	pte_val(pgtable[0]) = 0;
+	pte_val(pgtable[1]) = 0;
+
+	return pgtable;
+}
+
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			       unsigned long end)
+{
+	unsigned int cpu;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) {
+		unsigned int asid = hw_pid(vma->vm_mm, cpu);
+
+		/* No need to loop here: this will always be for 1 Huge Page */
+		tlb_entry_erase(start | _PAGE_HW_SZ | asid);
+	}
+
+	local_irq_restore(flags);
+}
+
+#endif
+
 /* Read the Cache Build Confuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
@@ -598,10 +736,10 @@ void read_decode_mmu_bcr(void)
 
 	struct bcr_mmu_3 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4,
+	unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
 		     u_itlb:4, u_dtlb:4;
 #else
-	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4,
+	unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
 		     ways:4, ver:8;
 #endif
 	} *mmu3;
@@ -622,7 +760,7 @@ void read_decode_mmu_bcr(void)
 
 	if (mmu->ver <= 2) {
 		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-		mmu->pg_sz_k = TO_KB(PAGE_SIZE);
+		mmu->pg_sz_k = TO_KB(0x2000);
 		mmu->sets = 1 << mmu2->sets;
 		mmu->ways = 1 << mmu2->ways;
 		mmu->u_dtlb = mmu2->u_dtlb;
@@ -634,6 +772,7 @@ void read_decode_mmu_bcr(void)
 		mmu->ways = 1 << mmu3->ways;
 		mmu->u_dtlb = mmu3->u_dtlb;
 		mmu->u_itlb = mmu3->u_itlb;
+		mmu->sasid = mmu3->sasid;
 	} else {
 		mmu4 = (struct bcr_mmu_4 *)&tmp;
 		mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -642,9 +781,9 @@ void read_decode_mmu_bcr(void)
 		mmu->ways = mmu4->n_ways * 2;
 		mmu->u_dtlb = mmu4->u_dtlb * 4;
 		mmu->u_itlb = mmu4->u_itlb * 4;
+		mmu->sasid = mmu4->sasid;
+		mmu->pae = mmu4->pae;
 	}
-
-	mmu->num_tlb = mmu->sets * mmu->ways;
 }
 
 char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
@@ -655,14 +794,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
 
 	if (p_mmu->s_pg_sz_m)
 		scnprintf(super_pg, 64, "%dM Super Page%s, ",
-			  p_mmu->s_pg_sz_m, " (not used)");
+			  p_mmu->s_pg_sz_m,
+			  IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
 	n += scnprintf(buf + n, len - n,
-		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
+		      "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
 		       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
-		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
+		       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
 		       p_mmu->u_dtlb, p_mmu->u_itlb,
-		       IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
+		       IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
 
 	return buf;
 }
@@ -690,6 +830,14 @@ void arc_mmu_init(void)
 	if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE))
+		panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
+		      (unsigned long)TO_MB(HPAGE_PMD_SIZE));
+
+	if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
+		panic("Hardware doesn't support PAE40\n");
+
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
@@ -725,15 +873,15 @@ void arc_mmu_init(void)
  *      the duplicate one.
  * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
  */
-volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */
+volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
 
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
 {
-	int set, way, n;
-	unsigned long flags, is_valid;
 	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-	unsigned int pd0[mmu->ways], pd1[mmu->ways];
+	unsigned int pd0[mmu->ways];
+	unsigned long flags;
+	int set;
 
 	local_irq_save(flags);
 
@@ -743,14 +891,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 	/* loop thru all sets of TLB */
 	for (set = 0; set < mmu->sets; set++) {
 
+		int is_valid, way;
+
 		/* read out all the ways of current set */
 		for (way = 0, is_valid = 0; way < mmu->ways; way++) {
 			write_aux_reg(ARC_REG_TLBINDEX,
 					  SET_WAY_TO_IDX(mmu, set, way));
 			write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
 			pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
-			pd1[way] = read_aux_reg(ARC_REG_TLBPD1);
 			is_valid |= pd0[way] & _PAGE_PRESENT;
+			pd0[way] &= PAGE_MASK;
 		}
 
 		/* If all the WAYS in SET are empty, skip to next SET */
@@ -759,30 +909,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 		/* Scan the set for duplicate ways: needs a nested loop */
 		for (way = 0; way < mmu->ways - 1; way++) {
+
+			int n;
+
 			if (!pd0[way])
 				continue;
 
 			for (n = way + 1; n < mmu->ways; n++) {
-				if ((pd0[way] & PAGE_MASK) ==
-				    (pd0[n] & PAGE_MASK)) {
-
-					if (dup_pd_verbose) {
-						pr_info("Duplicate PD's @"
-							"[%d:%d]/[%d:%d]\n",
-						     set, way, set, n);
-						pr_info("TLBPD0[%u]: %08x\n",
-						     way, pd0[way]);
-					}
-
-					/*
-					 * clear entry @way and not @n. This is
-					 * critical to our optimised loop
-					 */
-					pd0[way] = pd1[way] = 0;
-					write_aux_reg(ARC_REG_TLBINDEX,
+				if (pd0[way] != pd0[n])
+					continue;
+
+				if (!dup_pd_silent)
+					pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n",
+						pd0[way], set, way, n);
+
+				/*
+				 * clear entry @way and not @n.
+				 * This is critical to our optimised loop
+				 */
+				pd0[way] = 0;
+				write_aux_reg(ARC_REG_TLBINDEX,
 						SET_WAY_TO_IDX(mmu, set, way));
-					__tlb_entry_erase();
-				}
+				__tlb_entry_erase();
 			}
 		}
 	}
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index f6f4c3cb505d..63860adc4814 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -205,20 +205,38 @@ ex_saved_reg1:
 #endif
 
 	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
-	ld.as   r1, [r1, r0]            ; PGD entry corresp to faulting addr
-	and.f   r1, r1, PAGE_MASK       ; Ignoring protection and other flags
-	;   contains Ptr to Page Table
-	bz.d    do_slow_path_pf         ; if no Page Table, do page fault
+	ld.as   r3, [r1, r0]            ; PGD entry corresp to faulting addr
+	tst	r3, r3
+	bz	do_slow_path_pf         ; if no Page Table, do page fault
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	and.f	0, r3, _PAGE_HW_SZ	; Is this Huge PMD (thp)
+	add2.nz	r1, r1, r0
+	bnz.d	2f		; YES: PGD == PMD has THP PTE: stop pgd walk
+	mov.nz	r0, r3
+
+#endif
+	and	r1, r3, PAGE_MASK
 
 	; Get the PTE entry: The idea is
 	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
 	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
-	; (3) z = pgtbl[y]
-	; To avoid the multiply by in end, we do the -2, <<2 below
+	; (3) z = (pgtbl + y * 4)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_SIZE_LOG	3	/* 8 == 2 ^ 3 */
+#else
+#define PTE_SIZE_LOG	2	/* 4 == 2 ^ 2 */
+#endif
+
+	; multiply in step (3) above avoided by shifting lesser in step (1)
+	lsr     r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
+	and     r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
+	ld.aw   r0, [r1, r0]            ; r0: PTE (lower word only for PAE40)
+					; r1: PTE ptr
+
+2:
 
-	lsr     r0, r2, (PAGE_SHIFT - 2)
-	and     r0, r0, ( (PTRS_PER_PTE - 1) << 2)
-	ld.aw   r0, [r1, r0]            ; get PTE and PTE ptr for fault addr
 #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
 	and.f 0, r0, _PAGE_PRESENT
 	bz   1f
@@ -233,18 +251,23 @@ ex_saved_reg1:
 ;-----------------------------------------------------------------
 ; Convert Linux PTE entry into TLB entry
 ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+;    (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
-	and    r3, r0, PTE_BITS_RWX	;       r w x
-	lsl    r2, r3, 3		; r w x 0 0 0 (GLOBAL, kernel only)
+	and    r3, r0, PTE_BITS_RWX	;          r  w  x
+	lsl    r2, r3, 3		; Kr Kw Kx 0  0  0 (GLOBAL, kernel only)
 	and.f  0,  r0, _PAGE_GLOBAL
-	or.z   r2, r2, r3		; r w x r w x (!GLOBAL, user page)
+	or.z   r2, r2, r3		; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page)
 
 	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
 	or  r3, r3, r2
 
-	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
+	sr  r3, [ARC_REG_TLBPD1]    	; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
+#ifdef	CONFIG_ARC_HAS_PAE40
+	ld	r3, [r1, 4]		; paddr[39..32]
+	sr	r3, [ARC_REG_TLBPD1HI]
+#endif
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
 
@@ -365,7 +388,7 @@ ENTRY(EV_TLBMissD)
 	lr      r3, [ecr]
 	or      r0, r0, _PAGE_ACCESSED        ; Accessed bit always
 	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
-	or.nz   r0, r0, _PAGE_MODIFIED        ; if Write, set Dirty bit as well
+	or.nz   r0, r0, _PAGE_DIRTY           ; if Write, set Dirty bit as well
 	st_s    r0, [r1]                      ; Write back PTE
 
 	CONV_PTE_TO_TLB
diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c
index 0a77b19e1df8..1b0f0f458a2b 100644
--- a/arch/arc/plat-axs10x/axs10x.c
+++ b/arch/arc/plat-axs10x/axs10x.c
@@ -455,11 +455,6 @@ static void __init axs103_early_init(void)
 	axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card");
 
 	axs10x_early_init();
-
-#ifdef CONFIG_ARC_MCIP
-	/* No Hardware init, but filling the smp ops callbacks */
-	mcip_init_early_smp();
-#endif
 }
 #endif
 
@@ -487,9 +482,6 @@ static const char *axs103_compat[] __initconst = {
 MACHINE_START(AXS103, "axs103")
 	.dt_compat	= axs103_compat,
 	.init_early	= axs103_early_init,
-#ifdef CONFIG_ARC_MCIP
-	.init_smp	= mcip_init_smp,
-#endif
 MACHINE_END
 
 /*
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index d9e35b4a2f08..dde692812bc1 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -30,8 +30,4 @@ static const char *simulation_compat[] __initconst = {
 
 MACHINE_START(SIMULATION, "simulation")
 	.dt_compat	= simulation_compat,
-#ifdef CONFIG_ARC_MCIP
-	.init_early	= mcip_init_early_smp,
-	.init_smp	= mcip_init_smp,
-#endif
 MACHINE_END
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 72ad724c67ae..f1ed1109f488 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -645,6 +645,7 @@ config ARCH_SHMOBILE_LEGACY
 
 config ARCH_RPC
 	bool "RiscPC"
+	depends on MMU
 	select ARCH_ACORN
 	select ARCH_MAY_HAVE_PC_FDC
 	select ARCH_SPARSEMEM_ENABLE
@@ -819,6 +820,7 @@ config ARCH_VIRT
 	bool "Dummy Virtual Machine" if ARCH_MULTI_V7
 	select ARM_AMBA
 	select ARM_GIC
+	select ARM_GIC_V3
 	select ARM_PSCI
 	select HAVE_ARM_ARCH_TIMER
 
@@ -1410,7 +1412,6 @@ config HAVE_ARM_ARCH_TIMER
 
 config HAVE_ARM_TWD
 	bool
-	depends on SMP
 	select CLKSRC_OF if OF
 	help
 	  This options enables support for the ARM timer and watchdog unit
@@ -1470,6 +1471,8 @@ choice
 
 	config VMSPLIT_3G
 		bool "3G/1G user/kernel split"
+	config VMSPLIT_3G_OPT
+		bool "3G/1G user/kernel split (for full 1G low memory)"
 	config VMSPLIT_2G
 		bool "2G/2G user/kernel split"
 	config VMSPLIT_1G
@@ -1481,6 +1484,7 @@ config PAGE_OFFSET
 	default PHYS_OFFSET if !MMU
 	default 0x40000000 if VMSPLIT_1G
 	default 0x80000000 if VMSPLIT_2G
+	default 0xB0000000 if VMSPLIT_3G_OPT
 	default 0xC0000000
 
 config NR_CPUS
@@ -1695,8 +1699,9 @@ config HIGHMEM
 	  If unsure, say n.
 
 config HIGHPTE
-	bool "Allocate 2nd-level pagetables from highmem"
+	bool "Allocate 2nd-level pagetables from highmem" if EXPERT
 	depends on HIGHMEM
+	default y
 	help
 	  The VM uses one page of physical memory for each page table.
 	  For systems with a lot of processes, this can use a lot of
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 233159d2eaab..bb8fa023d574 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -578,7 +578,7 @@ dtb-$(CONFIG_MACH_SUN4I) += \
 	sun4i-a10-hackberry.dtb \
 	sun4i-a10-hyundai-a7hd.dtb \
 	sun4i-a10-inet97fv2.dtb \
-	sun4i-a10-itead-iteaduino-plus.dts \
+	sun4i-a10-itead-iteaduino-plus.dtb \
 	sun4i-a10-jesurun-q5.dtb \
 	sun4i-a10-marsboard.dtb \
 	sun4i-a10-mini-xplus.dtb \
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index 0447c04a40cc..d83ff9c9701e 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -591,6 +591,7 @@
 			cpts_clock_mult = <0x80000000>;
 			cpts_clock_shift = <29>;
 			ranges;
+			syscon = <&scm_conf>;
 
 			davinci_mdio: mdio@4a101000 {
 				compatible = "ti,am4372-mdio","ti,davinci_mdio";
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 568adf5efde0..d55e3ea89fda 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -402,11 +402,12 @@
 				/* SMPS9 unused */
 
 				ldo1_reg: ldo1 {
-					/* VDD_SD  */
+					/* VDD_SD / VDDSHV8  */
 					regulator-name = "ldo1";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <3300000>;
 					regulator-boot-on;
+					regulator-always-on;
 				};
 
 				ldo2_reg: ldo2 {
diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts
index 89f5a95954ed..4047621b137e 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -46,7 +46,7 @@
 
 / {
 	model = "Marvell Armada 385 Access Point Development Board";
-	compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada38x";
+	compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380";
 
 	chosen {
 		stdout-path = "serial1:115200n8";
diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi
index 63a48490e2f9..d4dbd28d348c 100644
--- a/arch/arm/boot/dts/berlin2q.dtsi
+++ b/arch/arm/boot/dts/berlin2q.dtsi
@@ -152,7 +152,7 @@
 		};
 
 		usb_phy2: phy@a2f400 {
-			compatible = "marvell,berlin2-usb-phy";
+			compatible = "marvell,berlin2cd-usb-phy";
 			reg = <0xa2f400 0x128>;
 			#phy-cells = <0>;
 			resets = <&chip_rst 0x104 14>;
@@ -170,7 +170,7 @@
 		};
 
 		usb_phy0: phy@b74000 {
-			compatible = "marvell,berlin2-usb-phy";
+			compatible = "marvell,berlin2cd-usb-phy";
 			reg = <0xb74000 0x128>;
 			#phy-cells = <0>;
 			resets = <&chip_rst 0x104 12>;
@@ -178,7 +178,7 @@
 		};
 
 		usb_phy1: phy@b78000 {
-			compatible = "marvell,berlin2-usb-phy";
+			compatible = "marvell,berlin2cd-usb-phy";
 			reg = <0xb78000 0x128>;
 			#phy-cells = <0>;
 			resets = <&chip_rst 0x104 13>;
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index e289c706d27d..8fedddc35999 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1448,6 +1448,7 @@
 				     <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
 				     <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
 			ranges;
+			syscon = <&scm_conf>;
 			status = "disabled";
 
 			davinci_mdio: mdio@48485000 {
diff --git a/arch/arm/boot/dts/emev2-kzm9d.dts b/arch/arm/boot/dts/emev2-kzm9d.dts
index 955c24ee4a8c..8c24975e8f9d 100644
--- a/arch/arm/boot/dts/emev2-kzm9d.dts
+++ b/arch/arm/boot/dts/emev2-kzm9d.dts
@@ -35,28 +35,28 @@
 
 		button@1 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			label = "DSW2-1";
 			linux,code = <KEY_1>;
 			gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>;
 		};
 		button@2 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			label = "DSW2-2";
 			linux,code = <KEY_2>;
 			gpios = <&gpio0 15 GPIO_ACTIVE_HIGH>;
 		};
 		button@3 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			label = "DSW2-3";
 			linux,code = <KEY_3>;
 			gpios = <&gpio0 16 GPIO_ACTIVE_HIGH>;
 		};
 		button@4 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			label = "DSW2-4";
 			linux,code = <KEY_4>;
 			gpios = <&gpio0 17 GPIO_ACTIVE_HIGH>;
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index ca0e3c15977f..294cfe40388d 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -98,6 +98,7 @@
 			opp-hz = /bits/ 64 <800000000>;
 			opp-microvolt = <1000000>;
 			clock-latency-ns = <200000>;
+			opp-suspend;
 		};
 		opp07 {
 			opp-hz = /bits/ 64 <900000000>;
diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts
index 15aea760c1da..c625e71217aa 100644
--- a/arch/arm/boot/dts/exynos5250-smdk5250.dts
+++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts
@@ -197,6 +197,7 @@
 				regulator-name = "P1.8V_LDO_OUT10";
 				regulator-min-microvolt = <1800000>;
 				regulator-max-microvolt = <1800000>;
+				regulator-always-on;
 			};
 
 			ldo11_reg: LDO11 {
diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts
index 8f4d76c5e11c..1b95da79293c 100644
--- a/arch/arm/boot/dts/exynos5420-peach-pit.dts
+++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts
@@ -915,6 +915,11 @@
 	};
 };
 
+&pmu_system_controller {
+	assigned-clocks = <&pmu_system_controller 0>;
+	assigned-clock-parents = <&clock CLK_FIN_PLL>;
+};
+
 &rtc {
 	status = "okay";
 	clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>;
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index df9aee92ecf4..1b3d6c769a3c 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -1117,7 +1117,7 @@
 		interrupt-parent = <&combiner>;
 		interrupts = <3 0>;
 		clock-names = "sysmmu", "master";
-		clocks = <&clock CLK_SMMU_FIMD1M0>, <&clock CLK_FIMD1>;
+		clocks = <&clock CLK_SMMU_FIMD1M1>, <&clock CLK_FIMD1>;
 		power-domains = <&disp_pd>;
 		#iommu-cells = <0>;
 	};
diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
index 79ffdfe712aa..3b43e57845ae 100644
--- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
+++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi
@@ -472,7 +472,6 @@
 	 */
 	pinctrl-0 = <&pwm0_out &pwm1_out &pwm2_out &pwm3_out>;
 	pinctrl-names = "default";
-	samsung,pwm-outputs = <0>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 7d5b386b5ae6..8f40c7e549bd 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -878,6 +878,11 @@
 	};
 };
 
+&pmu_system_controller {
+	assigned-clocks = <&pmu_system_controller 0>;
+	assigned-clock-parents = <&clock CLK_FIN_PLL>;
+};
+
 &rtc {
 	status = "okay";
 	clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>;
diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts
index 66e47de5e826..96d7eede412e 100644
--- a/arch/arm/boot/dts/imx53-qsrb.dts
+++ b/arch/arm/boot/dts/imx53-qsrb.dts
@@ -36,7 +36,7 @@
 		pinctrl-0 = <&pinctrl_pmic>;
 		reg = <0x08>;
 		interrupt-parent = <&gpio5>;
-		interrupts = <23 0x8>;
+		interrupts = <23 IRQ_TYPE_LEVEL_HIGH>;
 		regulators {
 			sw1_reg: sw1a {
 				regulator-name = "SW1";
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index c3e3ca9362fb..cd170376eaca 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -15,6 +15,7 @@
 #include <dt-bindings/clock/imx5-clock.h>
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/interrupt-controller/irq.h>
 
 / {
 	aliases {
diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi
index 3373fd958e95..a50356243888 100644
--- a/arch/arm/boot/dts/imx6qdl-rex.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi
@@ -35,7 +35,6 @@
 			compatible = "regulator-fixed";
 			reg = <1>;
 			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_usbh1>;
 			regulator-name = "usbh1_vbus";
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
@@ -47,7 +46,6 @@
 			compatible = "regulator-fixed";
 			reg = <2>;
 			pinctrl-names = "default";
-			pinctrl-0 = <&pinctrl_usbotg>;
 			regulator-name = "usb_otg_vbus";
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index b738ce0f9d9b..6e444bb873f9 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -588,10 +588,10 @@
 				status = "disabled";
 			};
 
-			uart2: serial@30870000 {
+			uart2: serial@30890000 {
 				compatible = "fsl,imx7d-uart",
 					     "fsl,imx6q-uart";
-				reg = <0x30870000 0x10000>;
+				reg = <0x30890000 0x10000>;
 				interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX7D_UART2_ROOT_CLK>,
 					<&clks IMX7D_UART2_ROOT_CLK>;
diff --git a/arch/arm/boot/dts/kirkwood-net5big.dts b/arch/arm/boot/dts/kirkwood-net5big.dts
index 36155b749d9f..d2d44df9c8c0 100644
--- a/arch/arm/boot/dts/kirkwood-net5big.dts
+++ b/arch/arm/boot/dts/kirkwood-net5big.dts
@@ -86,6 +86,66 @@
 			 clock-frequency = <32768>;
 	       };
 	};
+
+	netxbig-leds {
+		blue-sata2 {
+			label = "netxbig:blue:sata2";
+			mode-addr = <5>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 7
+				    NETXBIG_LED_SATA 1
+				    NETXBIG_LED_TIMER1 3>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		red-sata2 {
+			label = "netxbig:red:sata2";
+			mode-addr = <5>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		blue-sata3 {
+			label = "netxbig:blue:sata3";
+			mode-addr = <6>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 7
+				    NETXBIG_LED_SATA 1
+				    NETXBIG_LED_TIMER1 3>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		red-sata3 {
+			label = "netxbig:red:sata3";
+			mode-addr = <6>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		blue-sata4 {
+			label = "netxbig:blue:sata4";
+			mode-addr = <7>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 7
+				    NETXBIG_LED_SATA 1
+				    NETXBIG_LED_TIMER1 3>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		red-sata4 {
+			label = "netxbig:red:sata4";
+			mode-addr = <7>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+	};
 };
 
 &mdio {
diff --git a/arch/arm/boot/dts/kirkwood-netxbig.dtsi b/arch/arm/boot/dts/kirkwood-netxbig.dtsi
index 1508b12147df..62515a8b99b9 100644
--- a/arch/arm/boot/dts/kirkwood-netxbig.dtsi
+++ b/arch/arm/boot/dts/kirkwood-netxbig.dtsi
@@ -13,6 +13,7 @@
  * warranty of any kind, whether express or implied.
 */
 
+#include <dt-bindings/leds/leds-netxbig.h>
 #include "kirkwood.dtsi"
 #include "kirkwood-6281.dtsi"
 
@@ -105,6 +106,85 @@
 			gpio = <&gpio0 16 GPIO_ACTIVE_HIGH>;
 		};
 	};
+
+	netxbig_gpio_ext: netxbig-gpio-ext {
+		compatible = "lacie,netxbig-gpio-ext";
+
+		addr-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH
+			      &gpio1 16 GPIO_ACTIVE_HIGH
+			      &gpio1 17 GPIO_ACTIVE_HIGH>;
+		data-gpios = <&gpio1 12 GPIO_ACTIVE_HIGH
+			      &gpio1 13 GPIO_ACTIVE_HIGH
+			      &gpio1 14 GPIO_ACTIVE_HIGH>;
+		enable-gpio = <&gpio0 29 GPIO_ACTIVE_HIGH>;
+	};
+
+	netxbig-leds {
+		compatible = "lacie,netxbig-leds";
+
+		gpio-ext = <&netxbig_gpio_ext>;
+
+		timers = <NETXBIG_LED_TIMER1 500 500
+			  NETXBIG_LED_TIMER2 500 1000>;
+
+		blue-power {
+			label = "netxbig:blue:power";
+			mode-addr = <0>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 1
+				    NETXBIG_LED_TIMER1 3
+				    NETXBIG_LED_TIMER2 7>;
+			bright-addr = <1>;
+			max-brightness = <7>;
+		};
+		red-power {
+			label = "netxbig:red:power";
+			mode-addr = <0>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <1>;
+			max-brightness = <7>;
+		};
+		blue-sata0 {
+			label = "netxbig:blue:sata0";
+			mode-addr = <3>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 7
+				    NETXBIG_LED_SATA 1
+				    NETXBIG_LED_TIMER1 3>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		red-sata0 {
+			label = "netxbig:red:sata0";
+			mode-addr = <3>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		blue-sata1 {
+			label = "netxbig:blue:sata1";
+			mode-addr = <4>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 7
+				    NETXBIG_LED_SATA 1
+				    NETXBIG_LED_TIMER1 3>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+		red-sata1 {
+			label = "netxbig:red:sata1";
+			mode-addr = <4>;
+			mode-val = <NETXBIG_LED_OFF 0
+				    NETXBIG_LED_ON 2
+				    NETXBIG_LED_TIMER1 4>;
+			bright-addr = <2>;
+			max-brightness = <7>;
+		};
+	};
 };
 
 &mdio {
diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 91146c318798..5b0430041ec6 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -12,7 +12,7 @@
 
 / {
 	model = "LogicPD Zoom DM3730 Torpedo Development Kit";
-	compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap36xx";
+	compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3630", "ti,omap3";
 
 	gpio_keys {
 		compatible = "gpio-keys";
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 548441384d2a..8c77c87660cd 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -67,7 +67,7 @@
 
 	timer@c1109940 {
 		compatible = "amlogic,meson6-timer";
-		reg = <0xc1109940 0x14>;
+		reg = <0xc1109940 0x18>;
 		interrupts = <0 10 1>;
 	};
 
@@ -80,36 +80,37 @@
 		wdt: watchdog@c1109900 {
 			compatible = "amlogic,meson6-wdt";
 			reg = <0xc1109900 0x8>;
+			interrupts = <0 0 1>;
 		};
 
 		uart_AO: serial@c81004c0 {
 			compatible = "amlogic,meson-uart";
-			reg = <0xc81004c0 0x14>;
+			reg = <0xc81004c0 0x18>;
 			interrupts = <0 90 1>;
 			clocks = <&clk81>;
 			status = "disabled";
 		};
 
-		uart_A: serial@c81084c0 {
+		uart_A: serial@c11084c0 {
 			compatible = "amlogic,meson-uart";
-			reg = <0xc81084c0 0x14>;
-			interrupts = <0 90 1>;
+			reg = <0xc11084c0 0x18>;
+			interrupts = <0 26 1>;
 			clocks = <&clk81>;
 			status = "disabled";
 		};
 
-		uart_B: serial@c81084dc {
+		uart_B: serial@c11084dc {
 			compatible = "amlogic,meson-uart";
-			reg = <0xc81084dc 0x14>;
-			interrupts = <0 90 1>;
+			reg = <0xc11084dc 0x18>;
+			interrupts = <0 75 1>;
 			clocks = <&clk81>;
 			status = "disabled";
 		};
 
-		uart_C: serial@c8108700 {
+		uart_C: serial@c1108700 {
 			compatible = "amlogic,meson-uart";
-			reg = <0xc8108700 0x14>;
-			interrupts = <0 90 1>;
+			reg = <0xc1108700 0x18>;
+			interrupts = <0 93 1>;
 			clocks = <&clk81>;
 			status = "disabled";
 		};
diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts
index 16e8ce350dda..bb339d1648e0 100644
--- a/arch/arm/boot/dts/omap3-evm-37xx.dts
+++ b/arch/arm/boot/dts/omap3-evm-37xx.dts
@@ -13,7 +13,7 @@
 
 / {
 	model = "TI OMAP37XX EVM (TMDSEVM3730)";
-	compatible = "ti,omap3-evm-37xx", "ti,omap36xx";
+	compatible = "ti,omap3-evm-37xx", "ti,omap3630", "ti,omap3";
 
 	memory {
 		device_type = "memory";
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index a0b2a79cbfbd..4624d0f2a754 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1627,6 +1627,7 @@
 				"mix.0", "mix.1",
 				"dvc.0", "dvc.1",
 				"clk_a", "clk_b", "clk_c", "clk_i";
+		power-domains = <&cpg_clocks>;
 
 		status = "disabled";
 
diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi
index 831525dd39a6..1666c8a6b143 100644
--- a/arch/arm/boot/dts/r8a7791.dtsi
+++ b/arch/arm/boot/dts/r8a7791.dtsi
@@ -1677,6 +1677,7 @@
 				"mix.0", "mix.1",
 				"dvc.0", "dvc.1",
 				"clk_a", "clk_b", "clk_c", "clk_i";
+		power-domains = <&cpg_clocks>;
 
 		status = "disabled";
 
diff --git a/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi b/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi
index b5334ecff13c..fec076eb7aef 100644
--- a/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi
@@ -90,7 +90,7 @@
 	regulators {
 		vccio_sd: LDO_REG4 {
 			regulator-name = "vccio_sd";
-			regulator-min-microvolt = <3300000>;
+			regulator-min-microvolt = <1800000>;
 			regulator-max-microvolt = <3300000>;
 			regulator-state-mem {
 				regulator-off-in-suspend;
@@ -116,7 +116,12 @@
 	cap-sd-highspeed;
 	card-detect-delay = <200>;
 	cd-gpios = <&gpio7 5 GPIO_ACTIVE_LOW>;
+	rockchip,default-sample-phase = <90>;
 	num-slots = <1>;
+	sd-uhs-sdr12;
+	sd-uhs-sdr25;
+	sd-uhs-sdr50;
+	sd-uhs-sdr104;
 	vmmc-supply = <&vcc33_sd>;
 	vqmmc-supply = <&vccio_sd>;
 };
diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 275c78ccc0f3..860cea0a7613 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -149,7 +149,9 @@
 	broken-cd;
 	bus-width = <8>;
 	cap-mmc-highspeed;
+	rockchip,default-sample-phase = <158>;
 	disable-wp;
+	mmc-hs200-1_8v;
 	mmc-pwrseq = <&emmc_pwrseq>;
 	non-removable;
 	num-slots = <1>;
@@ -355,6 +357,10 @@
 	num-slots = <1>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sdio0_clk &sdio0_cmd &sdio0_bus4>;
+	sd-uhs-sdr12;
+	sd-uhs-sdr25;
+	sd-uhs-sdr50;
+	sd-uhs-sdr104;
 	vmmc-supply = <&vcc33_sys>;
 	vqmmc-supply = <&vcc18_wl>;
 };
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 906e938fb6bf..4e7c6b7392af 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -222,8 +222,9 @@
 	sdmmc: dwmmc@ff0c0000 {
 		compatible = "rockchip,rk3288-dw-mshc";
 		clock-freq-min-max = <400000 150000000>;
-		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>;
-		clock-names = "biu", "ciu";
+		clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>,
+			 <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
 		reg = <0xff0c0000 0x4000>;
@@ -233,8 +234,9 @@
 	sdio0: dwmmc@ff0d0000 {
 		compatible = "rockchip,rk3288-dw-mshc";
 		clock-freq-min-max = <400000 150000000>;
-		clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>;
-		clock-names = "biu", "ciu";
+		clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>,
+			 <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
 		reg = <0xff0d0000 0x4000>;
@@ -244,8 +246,9 @@
 	sdio1: dwmmc@ff0e0000 {
 		compatible = "rockchip,rk3288-dw-mshc";
 		clock-freq-min-max = <400000 150000000>;
-		clocks = <&cru HCLK_SDIO1>, <&cru SCLK_SDIO1>;
-		clock-names = "biu", "ciu";
+		clocks = <&cru HCLK_SDIO1>, <&cru SCLK_SDIO1>,
+			 <&cru SCLK_SDIO1_DRV>, <&cru SCLK_SDIO1_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
 		reg = <0xff0e0000 0x4000>;
@@ -255,8 +258,9 @@
 	emmc: dwmmc@ff0f0000 {
 		compatible = "rockchip,rk3288-dw-mshc";
 		clock-freq-min-max = <400000 150000000>;
-		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>;
-		clock-names = "biu", "ciu";
+		clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>,
+			 <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>;
+		clock-names = "biu", "ciu", "ciu-drive", "ciu-sample";
 		fifo-depth = <0x100>;
 		interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
 		reg = <0xff0f0000 0x4000>;
diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 034cd48ae28b..cc05cde0f9a4 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -921,6 +921,20 @@
 				clocks = <&twi1_clk>;
 				status = "disabled";
 			};
+
+			pioA: pinctrl@fc038000 {
+				compatible = "atmel,sama5d2-pinctrl";
+				reg = <0xfc038000 0x600>;
+				interrupts = <18 IRQ_TYPE_LEVEL_HIGH 7>,
+					     <68 IRQ_TYPE_LEVEL_HIGH 7>,
+					     <69 IRQ_TYPE_LEVEL_HIGH 7>,
+					     <70 IRQ_TYPE_LEVEL_HIGH 7>;
+				interrupt-controller;
+				#interrupt-cells = <2>;
+				gpio-controller;
+				#gpio-cells = <2>;
+				clocks = <&pioA_clk>;
+			};
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ste-hrefv60plus.dtsi b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
index 810cda743b6d..9c2387b34d0c 100644
--- a/arch/arm/boot/dts/ste-hrefv60plus.dtsi
+++ b/arch/arm/boot/dts/ste-hrefv60plus.dtsi
@@ -56,7 +56,7 @@
 					/* VMMCI level-shifter enable */
 					default_hrefv60_cfg2 {
 						pins = "GPIO169_D22";
-						ste,config = <&gpio_out_lo>;
+						ste,config = <&gpio_out_hi>;
 					};
 					/* VMMCI level-shifter voltage select */
 					default_hrefv60_cfg3 {
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 32a5ccb14e7e..e80e42163883 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -47,35 +47,35 @@
 
 		button@1 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			linux,code = <2>;
 			label = "userpb";
 			gpios = <&gpio1 0 0x4>;
 		};
 		button@2 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			linux,code = <3>;
 			label = "extkb1";
 			gpios = <&gpio4 23 0x4>;
 		};
 		button@3 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			linux,code = <4>;
 			label = "extkb2";
 			gpios = <&gpio4 24 0x4>;
 		};
 		button@4 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			linux,code = <5>;
 			label = "extkb3";
 			gpios = <&gpio5 1 0x4>;
 		};
 		button@5 {
 			debounce_interval = <50>;
-			wakeup = <1>;
+			wakeup-source;
 			linux,code = <6>;
 			label = "extkb4";
 			gpios = <&gpio5 2 0x4>;
diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index ae0527754000..0c24fcb03577 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -610,5 +610,19 @@
 			clocks		= <&clk_sysin>;
 			st,pwm-num-chan = <4>;
 		};
+
+		rng10: rng@08a89000 {
+			compatible      = "st,rng";
+			reg		= <0x08a89000 0x1000>;
+			clocks          = <&clk_sysin>;
+			status		= "okay";
+		};
+
+		rng11: rng@08a8a000 {
+			compatible      = "st,rng";
+			reg		= <0x08a8a000 0x1000>;
+			clocks          = <&clk_sysin>;
+			status		= "okay";
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index d78a4815da8f..5e1e234e8c0a 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -174,6 +174,13 @@
 			reg = <0x40023800 0x400>;
 			clocks = <&clk_hse>;
 		};
+
+		rng: rng@50060800 {
+			compatible = "st,stm32-rng";
+			reg = <0x50060800 0x400>;
+			interrupts = <80>;
+			clocks = <&rcc 0 38>;
+		};
 	};
 };
 
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 2bebaa286f9a..391230c3dc93 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -107,7 +107,7 @@
 				720000	1200000
 				528000	1100000
 				312000	1000000
-				144000	900000
+				144000	1000000
 				>;
 			#cooling-cells = <2>;
 			cooling-min-level = <0>;
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 9d4f86e9c50a..d845bd1448b5 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -234,7 +234,9 @@
 		gpio-controller;
 		#interrupt-cells = <2>;
 		interrupt-controller;
+		/*
 		gpio-ranges = <&pinmux 0 0 246>;
+		*/
 	};
 
 	apbmisc@70000800 {
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 1e204a6de12c..819e2ae2cabe 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -258,7 +258,9 @@
 		gpio-controller;
 		#interrupt-cells = <2>;
 		interrupt-controller;
+		/*
 		gpio-ranges = <&pinmux 0 0 251>;
+		*/
 	};
 
 	apbdma: dma@0,60020000 {
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index e058709e6d98..969b828505ae 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -244,7 +244,9 @@
 		gpio-controller;
 		#interrupt-cells = <2>;
 		interrupt-controller;
+		/*
 		gpio-ranges = <&pinmux 0 0 224>;
+		*/
 	};
 
 	apbmisc@70000800 {
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index fe04fb5e155f..c6938ad1b543 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -349,7 +349,9 @@
 		gpio-controller;
 		#interrupt-cells = <2>;
 		interrupt-controller;
+		/*
 		gpio-ranges = <&pinmux 0 0 248>;
+		*/
 	};
 
 	apbmisc@70000800 {
diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi
index 36ae9160b558..482b7aa37808 100644
--- a/arch/arm/boot/dts/twl4030.dtsi
+++ b/arch/arm/boot/dts/twl4030.dtsi
@@ -22,6 +22,8 @@
 	charger: bci {
 		compatible = "ti,twl4030-bci";
 		interrupts = <9>, <2>;
+		io-channels = <&twl4030_madc 11>;
+		io-channel-name = "vac";
 		bci3v1-supply = <&vusb3v1>;
 	};
 
diff --git a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts
index 33963acd7e8f..f80f772d99fb 100644
--- a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts
+++ b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts
@@ -85,7 +85,7 @@
 };
 
 &ethsc {
-	interrupts = <0 50 4>;
+	interrupts = <0 52 4>;
 };
 
 &serial0 {
diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi
index dc0457e40775..1a5220e05109 100644
--- a/arch/arm/boot/dts/zynq-7000.dtsi
+++ b/arch/arm/boot/dts/zynq-7000.dtsi
@@ -294,6 +294,11 @@
 		devcfg: devcfg@f8007000 {
 			compatible = "xlnx,zynq-devcfg-1.0";
 			reg = <0xf8007000 0x100>;
+			interrupt-parent = <&intc>;
+			interrupts = <0 8 4>;
+			clocks = <&clkc 12>;
+			clock-names = "ref_clk";
+			syscon = <&slcr>;
 		};
 
 		global_timer: timer@f8f00200 {
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 1ff2bfa2e183..13ba48c4b03b 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -166,7 +166,6 @@ CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_S3C=y
 CONFIG_MMC_SDHCI_S3C_DMA=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 CONFIG_MMC_DW_EXYNOS=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX77686=y
diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig
index 5997dbc69822..b2e340b272ee 100644
--- a/arch/arm/configs/hisi_defconfig
+++ b/arch/arm/configs/hisi_defconfig
@@ -69,7 +69,6 @@ CONFIG_NOP_USB_XCEIV=y
 CONFIG_MMC=y
 CONFIG_RTC_CLASS=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 CONFIG_MMC_DW_PLTFM=y
 CONFIG_RTC_DRV_PL031=y
 CONFIG_DMADEVICES=y
diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig
index 1c47f86c3970..b7e8cdab51f9 100644
--- a/arch/arm/configs/lpc18xx_defconfig
+++ b/arch/arm/configs/lpc18xx_defconfig
@@ -119,7 +119,6 @@ CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
 CONFIG_MMC=y
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_PCA9532=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 3f15a5cae167..c5e1943e5427 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -246,7 +246,7 @@ CONFIG_GPIO_TWL4030=y
 CONFIG_GPIO_PALMAS=y
 CONFIG_W1=m
 CONFIG_HDQ_MASTER_OMAP=m
-CONFIG_BATTERY_BQ27x00=m
+CONFIG_BATTERY_BQ27XXX=m
 CONFIG_CHARGER_ISP1704=m
 CONFIG_CHARGER_TWL4030=m
 CONFIG_CHARGER_BQ2415X=m
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
new file mode 100644
index 000000000000..6607d976e07d
--- /dev/null
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -0,0 +1,188 @@
+/*
+ * arch/arm/include/asm/arch_gicv3.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ARCH_GICV3_H
+#define __ASM_ARCH_GICV3_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/io.h>
+
+#define __ACCESS_CP15(CRn, Op1, CRm, Op2)	p15, Op1, %0, CRn, CRm, Op2
+#define __ACCESS_CP15_64(Op1, CRm)		p15, Op1, %Q0, %R0, CRm
+
+#define ICC_EOIR1			__ACCESS_CP15(c12, 0, c12, 1)
+#define ICC_DIR				__ACCESS_CP15(c12, 0, c11, 1)
+#define ICC_IAR1			__ACCESS_CP15(c12, 0, c12, 0)
+#define ICC_SGI1R			__ACCESS_CP15_64(0, c12)
+#define ICC_PMR				__ACCESS_CP15(c4, 0, c6, 0)
+#define ICC_CTLR			__ACCESS_CP15(c12, 0, c12, 4)
+#define ICC_SRE				__ACCESS_CP15(c12, 0, c12, 5)
+#define ICC_IGRPEN1			__ACCESS_CP15(c12, 0, c12, 7)
+
+#define ICC_HSRE			__ACCESS_CP15(c12, 4, c9, 5)
+
+#define ICH_VSEIR			__ACCESS_CP15(c12, 4, c9, 4)
+#define ICH_HCR				__ACCESS_CP15(c12, 4, c11, 0)
+#define ICH_VTR				__ACCESS_CP15(c12, 4, c11, 1)
+#define ICH_MISR			__ACCESS_CP15(c12, 4, c11, 2)
+#define ICH_EISR			__ACCESS_CP15(c12, 4, c11, 3)
+#define ICH_ELSR			__ACCESS_CP15(c12, 4, c11, 5)
+#define ICH_VMCR			__ACCESS_CP15(c12, 4, c11, 7)
+
+#define __LR0(x)			__ACCESS_CP15(c12, 4, c12, x)
+#define __LR8(x)			__ACCESS_CP15(c12, 4, c13, x)
+
+#define ICH_LR0				__LR0(0)
+#define ICH_LR1				__LR0(1)
+#define ICH_LR2				__LR0(2)
+#define ICH_LR3				__LR0(3)
+#define ICH_LR4				__LR0(4)
+#define ICH_LR5				__LR0(5)
+#define ICH_LR6				__LR0(6)
+#define ICH_LR7				__LR0(7)
+#define ICH_LR8				__LR8(0)
+#define ICH_LR9				__LR8(1)
+#define ICH_LR10			__LR8(2)
+#define ICH_LR11			__LR8(3)
+#define ICH_LR12			__LR8(4)
+#define ICH_LR13			__LR8(5)
+#define ICH_LR14			__LR8(6)
+#define ICH_LR15			__LR8(7)
+
+/* LR top half */
+#define __LRC0(x)			__ACCESS_CP15(c12, 4, c14, x)
+#define __LRC8(x)			__ACCESS_CP15(c12, 4, c15, x)
+
+#define ICH_LRC0			__LRC0(0)
+#define ICH_LRC1			__LRC0(1)
+#define ICH_LRC2			__LRC0(2)
+#define ICH_LRC3			__LRC0(3)
+#define ICH_LRC4			__LRC0(4)
+#define ICH_LRC5			__LRC0(5)
+#define ICH_LRC6			__LRC0(6)
+#define ICH_LRC7			__LRC0(7)
+#define ICH_LRC8			__LRC8(0)
+#define ICH_LRC9			__LRC8(1)
+#define ICH_LRC10			__LRC8(2)
+#define ICH_LRC11			__LRC8(3)
+#define ICH_LRC12			__LRC8(4)
+#define ICH_LRC13			__LRC8(5)
+#define ICH_LRC14			__LRC8(6)
+#define ICH_LRC15			__LRC8(7)
+
+#define __AP0Rx(x)			__ACCESS_CP15(c12, 4, c8, x)
+#define ICH_AP0R0			__AP0Rx(0)
+#define ICH_AP0R1			__AP0Rx(1)
+#define ICH_AP0R2			__AP0Rx(2)
+#define ICH_AP0R3			__AP0Rx(3)
+
+#define __AP1Rx(x)			__ACCESS_CP15(c12, 4, c9, x)
+#define ICH_AP1R0			__AP1Rx(0)
+#define ICH_AP1R1			__AP1Rx(1)
+#define ICH_AP1R2			__AP1Rx(2)
+#define ICH_AP1R3			__AP1Rx(3)
+
+/* Low-level accessors */
+
+static inline void gic_write_eoir(u32 irq)
+{
+	asm volatile("mcr " __stringify(ICC_EOIR1) : : "r" (irq));
+	isb();
+}
+
+static inline void gic_write_dir(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_DIR) : : "r" (val));
+	isb();
+}
+
+static inline u32 gic_read_iar(void)
+{
+	u32 irqstat;
+
+	asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+	return irqstat;
+}
+
+static inline void gic_write_pmr(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_PMR) : : "r" (val));
+}
+
+static inline void gic_write_ctlr(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_CTLR) : : "r" (val));
+	isb();
+}
+
+static inline void gic_write_grpen1(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_IGRPEN1) : : "r" (val));
+	isb();
+}
+
+static inline void gic_write_sgi1r(u64 val)
+{
+	asm volatile("mcrr " __stringify(ICC_SGI1R) : : "r" (val));
+}
+
+static inline u32 gic_read_sre(void)
+{
+	u32 val;
+
+	asm volatile("mrc " __stringify(ICC_SRE) : "=r" (val));
+	return val;
+}
+
+static inline void gic_write_sre(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_SRE) : : "r" (val));
+	isb();
+}
+
+/*
+ * Even in 32bit systems that use LPAE, there is no guarantee that the I/O
+ * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't
+ * make much sense.
+ * Moreover, 64bit I/O emulation is extremely difficult to implement on
+ * AArch32, since the syndrome register doesn't provide any information for
+ * them.
+ * Consequently, the following IO helpers use 32bit accesses.
+ *
+ * There are only two registers that need 64bit accesses in this driver:
+ * - GICD_IROUTERn, contain the affinity values associated to each interrupt.
+ *   The upper-word (aff3) will always be 0, so there is no need for a lock.
+ * - GICR_TYPER is an ID register and doesn't need atomicity.
+ */
+static inline void gic_write_irouter(u64 val, volatile void __iomem *addr)
+{
+	writel_relaxed((u32)val, addr);
+	writel_relaxed((u32)(val >> 32), addr + 4);
+}
+
+static inline u64 gic_read_typer(const volatile void __iomem *addr)
+{
+	u64 val;
+
+	val = readl_relaxed(addr);
+	val |= (u64)readl_relaxed(addr + 4) << 32;
+	return val;
+}
+
+#endif /* !__ASSEMBLY__ */
+#endif /* !__ASM_ARCH_GICV3_H */
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index fe3ef397f5a4..9e10c4567eb4 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -27,8 +27,8 @@
  * strex/ldrex monitor on some implementations. The reason we can use it for
  * atomic_set() is the clrex or dummy strex done on every exception return.
  */
-#define atomic_read(v)	ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)	(((v)->counter) = (i))
+#define atomic_read(v)	READ_ONCE((v)->counter)
+#define atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
@@ -210,8 +210,8 @@ ATOMIC_OP(xor, ^=, eor)
 
 #define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
 #define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
-#define atomic_inc_return(v)    (atomic_add_return(1, v))
-#define atomic_dec_return(v)    (atomic_sub_return(1, v))
+#define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
+#define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
 #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
@@ -442,11 +442,11 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 916a2744d5c6..97882f9bad12 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -39,6 +39,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 
 	switch (size) {
 #if __LINUX_ARM_ARCH__ >= 6
+#ifndef CONFIG_CPU_V6 /* MIN ARCH >= V6K */
 	case 1:
 		asm volatile("@	__xchg1\n"
 		"1:	ldrexb	%0, [%3]\n"
@@ -49,6 +50,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 			: "r" (x), "r" (ptr)
 			: "memory", "cc");
 		break;
+	case 2:
+		asm volatile("@	__xchg2\n"
+		"1:	ldrexh	%0, [%3]\n"
+		"	strexh	%1, %2, [%3]\n"
+		"	teq	%1, #0\n"
+		"	bne	1b"
+			: "=&r" (ret), "=&r" (tmp)
+			: "r" (x), "r" (ptr)
+			: "memory", "cc");
+		break;
+#endif
 	case 4:
 		asm volatile("@	__xchg4\n"
 		"1:	ldrex	%0, [%3]\n"
diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h
index 43908146a5cf..e6b70d9d084e 100644
--- a/arch/arm/include/asm/irqflags.h
+++ b/arch/arm/include/asm/irqflags.h
@@ -54,6 +54,14 @@ static inline void arch_local_irq_disable(void)
 
 #define local_fiq_enable()  __asm__("cpsie f	@ __stf" : : : "memory", "cc")
 #define local_fiq_disable() __asm__("cpsid f	@ __clf" : : : "memory", "cc")
+
+#ifndef CONFIG_CPU_V7M
+#define local_abt_enable()  __asm__("cpsie a	@ __sta" : : : "memory", "cc")
+#define local_abt_disable() __asm__("cpsid a	@ __cla" : : : "memory", "cc")
+#else
+#define local_abt_enable()	do { } while (0)
+#define local_abt_disable()	do { } while (0)
+#endif
 #else
 
 /*
@@ -136,6 +144,8 @@ static inline void arch_local_irq_disable(void)
 	: "memory", "cc");					\
 	})
 
+#define local_abt_enable()	do { } while (0)
+#define local_abt_disable()	do { } while (0)
 #endif
 
 /*
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d995821f1698..dc641ddf0784 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -218,4 +218,24 @@
 #define HSR_DABT_CM		(1U << 8)
 #define HSR_DABT_EA		(1U << 9)
 
+#define kvm_arm_exception_type	\
+	{0, "RESET" }, 		\
+	{1, "UNDEFINED" },	\
+	{2, "SOFTWARE" },	\
+	{3, "PREF_ABORT" },	\
+	{4, "DATA_ABORT" },	\
+	{5, "IRQ" },		\
+	{6, "FIQ" },		\
+	{7, "HVC" }
+
+#define HSRECN(x) { HSR_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+	HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
+	HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
+	HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
+	HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
+	HSRECN(DABT), HSRECN(DABT_HYP)
+
+
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c4072d9f32c7..6692982c9b57 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -126,7 +126,10 @@ struct kvm_vcpu_arch {
 	 * here.
 	 */
 
-	/* Don't run the guest on this vcpu */
+	/* vcpu power-off state */
+	bool power_off;
+
+	 /* Don't run the guest (internal implementation need) */
 	bool pause;
 
 	/* IO related fields */
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index cb3a40717edd..5c1ad11aa392 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -47,7 +47,7 @@ struct machine_desc {
 	unsigned		l2c_aux_val;	/* L2 cache aux value	*/
 	unsigned		l2c_aux_mask;	/* L2 cache aux mask	*/
 	void			(*l2c_write_sec)(unsigned long, unsigned);
-	struct smp_operations	*smp;		/* SMP operations	*/
+	const struct smp_operations	*smp;	/* SMP operations	*/
 	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **);
 	void			(*dt_fixup)(void);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 98d58bb04ac5..c79b57bf71c4 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -76,10 +76,12 @@
  */
 #define XIP_VIRT_ADDR(physaddr)  (MODULES_VADDR + ((physaddr) & 0x000fffff))
 
+#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
 /*
  * Allow 16MB-aligned ioremap pages
  */
 #define IOREMAP_MAX_ORDER	24
+#endif
 
 #else /* CONFIG_MMU */
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f40354198bad..348caabb7625 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -43,7 +43,7 @@
  */
 #define VMALLOC_OFFSET		(8*1024*1024)
 #define VMALLOC_START		(((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
-#define VMALLOC_END		0xff000000UL
+#define VMALLOC_END		0xff800000UL
 
 #define LIBRARY_TEXT_START	0x0c000000
 
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index ef356659b4f4..3d6dc8b460e4 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -112,7 +112,7 @@ struct smp_operations {
 
 struct of_cpu_method {
 	const char *method;
-	struct smp_operations *ops;
+	const struct smp_operations *ops;
 };
 
 #define CPU_METHOD_OF_DECLARE(name, _method, _ops)			\
@@ -122,6 +122,6 @@ struct of_cpu_method {
 /*
  * set platform specific SMP operations
  */
-extern void smp_set_ops(struct smp_operations *);
+extern void smp_set_ops(const struct smp_operations *);
 
 #endif /* ifndef __ASM_ARM_SMP_H */
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 7cba573c2cc9..7b84657fba35 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -21,13 +21,6 @@
  */
 #define __NR_syscalls  (392)
 
-/*
- * *NOTE*: This is a ghost syscall private to the kernel.  Only the
- * __kuser_cmpxchg code in entry-armv.S should be aware of its
- * existence.  Don't ever use this from user code.
- */
-#define __ARM_NR_cmpxchg		(__ARM_NR_BASE+0x00fff0)
-
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_PAUSE
diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h
index 04ff8e7b37df..95251512e2c4 100644
--- a/arch/arm/include/asm/xen/hypervisor.h
+++ b/arch/arm/include/asm/xen/hypervisor.h
@@ -26,4 +26,14 @@ void __init xen_early_init(void);
 static inline void xen_early_init(void) { return; }
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void xen_arch_register_cpu(int num)
+{
+}
+
+static inline void xen_arch_unregister_cpu(int num)
+{
+}
+#endif
+
 #endif /* _ASM_ARM_XEN_HYPERVISOR_H */
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index efd562412850..0375c8caa061 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -35,11 +35,15 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
 	     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-	bool local = PFN_DOWN(dev_addr) == page_to_pfn(page);
-	/* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise
-	 * is a foreign page grant-mapped in dom0. If the page is local we
-	 * can safely call the native dma_ops function, otherwise we call
-	 * the xen specific function. */
+	bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+	 * multiple Xen page, it's not possible to have a mix of local and
+	 * foreign Xen page. So if the first xen_pfn == mfn the page is local
+	 * otherwise it's a foreign page grant-mapped in dom0. If the page is
+	 * local we can safely call the native dma_ops function, otherwise we
+	 * call the xen specific function.
+	 */
 	if (local)
 		__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
 	else
@@ -51,10 +55,14 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
 		struct dma_attrs *attrs)
 {
 	unsigned long pfn = PFN_DOWN(handle);
-	/* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will
-	 * always return false. If the page is local we can safely call the
-	 * native dma_ops function, otherwise we call the xen specific
-	 * function. */
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+	 * multiple Xen page, it's not possible to have a mix of local and
+	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
+	 * foreign mfn will always return false. If the page is local we can
+	 * safely call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
 	if (pfn_valid(pfn)) {
 		if (__generic_dma_ops(hwdev)->unmap_page)
 			__generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 127956353b00..415dbc6e43fd 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -13,9 +13,6 @@
 
 #define phys_to_machine_mapping_valid(pfn) (1)
 
-#define pte_mfn	    pte_pfn
-#define mfn_pte	    pfn_pte
-
 /* Xen machine address */
 typedef struct xmaddr {
 	phys_addr_t maddr;
@@ -31,6 +28,17 @@ typedef struct xpaddr {
 
 #define INVALID_P2M_ENTRY      (~0UL)
 
+/*
+ * The pseudo-physical frame (pfn) used in all the helpers is always based
+ * on Xen page granularity (i.e 4KB).
+ *
+ * A Linux page may be split across multiple non-contiguous Xen page so we
+ * have to keep track with frame based on 4KB page granularity.
+ *
+ * PV drivers should never make a direct usage of those helpers (particularly
+ * pfn_to_gfn and gfn_to_pfn).
+ */
+
 unsigned long __pfn_to_mfn(unsigned long pfn);
 extern struct rb_root phys_to_mach;
 
@@ -67,8 +75,8 @@ static inline unsigned long bfn_to_pfn(unsigned long bfn)
 #define bfn_to_local_pfn(bfn)	bfn_to_pfn(bfn)
 
 /* VIRT <-> GUEST conversion */
-#define virt_to_gfn(v)		(pfn_to_gfn(virt_to_pfn(v)))
-#define gfn_to_virt(m)		(__va(gfn_to_pfn(m) << PAGE_SHIFT))
+#define virt_to_gfn(v)		(pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT))
+#define gfn_to_virt(m)		(__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT))
 
 /* Only used in PV code. But ARM guests are always HVM. */
 static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
@@ -107,8 +115,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 #define xen_unmap(cookie) iounmap((cookie))
 
 bool xen_arch_need_swiotlb(struct device *dev,
-			   unsigned long pfn,
-			   unsigned long bfn);
+			   phys_addr_t phys,
+			   dma_addr_t dev_addr);
 unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 11c54de9f8cf..65addcbf5b30 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -101,6 +101,7 @@ void __init arm_dt_init_cpu_maps(void)
 		if (of_property_read_u32(cpu, "reg", &hwid)) {
 			pr_debug(" * %s missing reg property\n",
 				     cpu->full_name);
+			of_node_put(cpu);
 			return;
 		}
 
@@ -108,8 +109,10 @@ void __init arm_dt_init_cpu_maps(void)
 		 * 8 MSBs must be set to 0 in the DT since the reg property
 		 * defines the MPIDR[23:0].
 		 */
-		if (hwid & ~MPIDR_HWID_BITMASK)
+		if (hwid & ~MPIDR_HWID_BITMASK) {
+			of_node_put(cpu);
 			return;
+		}
 
 		/*
 		 * Duplicate MPIDRs are a recipe for disaster.
@@ -119,9 +122,11 @@ void __init arm_dt_init_cpu_maps(void)
 		 * to avoid matching valid MPIDR[23:0] values.
 		 */
 		for (j = 0; j < cpuidx; j++)
-			if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg "
-						     "properties in the DT\n"))
+			if (WARN(tmp_map[j] == hwid,
+				 "Duplicate /cpu reg properties in the DT\n")) {
+				of_node_put(cpu);
 				return;
+			}
 
 		/*
 		 * Build a stashed array of MPIDR values. Numbering scheme
@@ -143,6 +148,7 @@ void __init arm_dt_init_cpu_maps(void)
 					       "max cores %u, capping them\n",
 					       cpuidx, nr_cpu_ids)) {
 			cpuidx = nr_cpu_ids;
+			of_node_put(cpu);
 			break;
 		}
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3e1c26eb32b4..3ce377f7251f 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -427,8 +427,7 @@ ENDPROC(__fiq_abt)
 	.endm
 
 	.macro	kuser_cmpxchg_check
-#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \
-    !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS)
 #ifndef CONFIG_MMU
 #warning "NPTL on non MMU needs fixing"
 #else
@@ -859,20 +858,7 @@ __kuser_helper_start:
 
 __kuser_cmpxchg64:				@ 0xffff0f60
 
-#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
-
-	/*
-	 * Poor you.  No fast solution possible...
-	 * The kernel itself must perform the operation.
-	 * A special ghost syscall is used for that (see traps.c).
-	 */
-	stmfd	sp!, {r7, lr}
-	ldr	r7, 1f			@ it's 20 bits
-	swi	__ARM_NR_cmpxchg64
-	ldmfd	sp!, {r7, pc}
-1:	.word	__ARM_NR_cmpxchg64
-
-#elif defined(CONFIG_CPU_32v6K)
+#if defined(CONFIG_CPU_32v6K)
 
 	stmfd	sp!, {r4, r5, r6, r7}
 	ldrd	r4, r5, [r0]			@ load old val
@@ -948,20 +934,7 @@ __kuser_memory_barrier:				@ 0xffff0fa0
 
 __kuser_cmpxchg:				@ 0xffff0fc0
 
-#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
-
-	/*
-	 * Poor you.  No fast solution possible...
-	 * The kernel itself must perform the operation.
-	 * A special ghost syscall is used for that (see traps.c).
-	 */
-	stmfd	sp!, {r7, lr}
-	ldr	r7, 1f			@ it's 20 bits
-	swi	__ARM_NR_cmpxchg
-	ldmfd	sp!, {r7, pc}
-1:	.word	__ARM_NR_cmpxchg
-
-#elif __LINUX_ARM_ARCH__ < 6
+#if __LINUX_ARM_ARCH__ < 6
 
 #ifdef CONFIG_MMU
 
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index dc7d0a95bd36..6284779d64ee 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -35,7 +35,6 @@
 #include <asm/cputype.h>
 #include <asm/current.h>
 #include <asm/hw_breakpoint.h>
-#include <asm/kdebug.h>
 #include <asm/traps.h>
 
 /* Breakpoint currently in use for each BRP. */
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index fd9eefce0a7b..9232caee7060 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -74,7 +74,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 void
 sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 {
-	struct pt_regs *thread_regs;
+	struct thread_info *ti;
 	int regno;
 
 	/* Just making sure... */
@@ -86,24 +86,17 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 		gdb_regs[regno] = 0;
 
 	/* Otherwise, we have only some registers from switch_to() */
-	thread_regs		= task_pt_regs(task);
-	gdb_regs[_R0]		= thread_regs->ARM_r0;
-	gdb_regs[_R1]		= thread_regs->ARM_r1;
-	gdb_regs[_R2]		= thread_regs->ARM_r2;
-	gdb_regs[_R3]		= thread_regs->ARM_r3;
-	gdb_regs[_R4]		= thread_regs->ARM_r4;
-	gdb_regs[_R5]		= thread_regs->ARM_r5;
-	gdb_regs[_R6]		= thread_regs->ARM_r6;
-	gdb_regs[_R7]		= thread_regs->ARM_r7;
-	gdb_regs[_R8]		= thread_regs->ARM_r8;
-	gdb_regs[_R9]		= thread_regs->ARM_r9;
-	gdb_regs[_R10]		= thread_regs->ARM_r10;
-	gdb_regs[_FP]		= thread_regs->ARM_fp;
-	gdb_regs[_IP]		= thread_regs->ARM_ip;
-	gdb_regs[_SPT]		= thread_regs->ARM_sp;
-	gdb_regs[_LR]		= thread_regs->ARM_lr;
-	gdb_regs[_PC]		= thread_regs->ARM_pc;
-	gdb_regs[_CPSR]		= thread_regs->ARM_cpsr;
+	ti			= task_thread_info(task);
+	gdb_regs[_R4]		= ti->cpu_context.r4;
+	gdb_regs[_R5]		= ti->cpu_context.r5;
+	gdb_regs[_R6]		= ti->cpu_context.r6;
+	gdb_regs[_R7]		= ti->cpu_context.r7;
+	gdb_regs[_R8]		= ti->cpu_context.r8;
+	gdb_regs[_R9]		= ti->cpu_context.r9;
+	gdb_regs[_R10]		= ti->cpu_context.sl;
+	gdb_regs[_FP]		= ti->cpu_context.fp;
+	gdb_regs[_SPT]		= ti->cpu_context.sp;
+	gdb_regs[_PC]		= ti->cpu_context.pc;
 }
 
 void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 48185a773852..b26361355dae 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -80,7 +80,7 @@ static DECLARE_COMPLETION(cpu_running);
 
 static struct smp_operations smp_ops;
 
-void __init smp_set_ops(struct smp_operations *ops)
+void __init smp_set_ops(const struct smp_operations *ops)
 {
 	if (ops)
 		smp_ops = *ops;
@@ -400,6 +400,7 @@ asmlinkage void secondary_start_kernel(void)
 
 	local_irq_enable();
 	local_fiq_enable();
+	local_abt_enable();
 
 	/*
 	 * OK, it's off to the idle thread for us
@@ -748,6 +749,15 @@ core_initcall(register_cpufreq_notifier);
 
 static void raise_nmi(cpumask_t *mask)
 {
+	/*
+	 * Generate the backtrace directly if we are running in a calling
+	 * context that is not preemptible by the backtrace IPI. Note
+	 * that nmi_cpu_backtrace() automatically removes the current cpu
+	 * from mask.
+	 */
+	if (cpumask_test_cpu(smp_processor_id(), mask) && irqs_disabled())
+		nmi_cpu_backtrace(NULL);
+
 	smp_cross_call(mask, IPI_CPU_BACKTRACE);
 }
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index e9035cda1485..1bfa7a7f5533 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -23,7 +23,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 
-#include <asm/smp_plat.h>
 #include <asm/smp_twd.h>
 
 /* set up by the platform code */
@@ -34,6 +33,8 @@ static unsigned long twd_timer_rate;
 static DEFINE_PER_CPU(bool, percpu_setup_called);
 
 static struct clock_event_device __percpu *twd_evt;
+static unsigned int twd_features =
+		CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
 static int twd_ppi;
 
 static int twd_shutdown(struct clock_event_device *clk)
@@ -294,8 +295,7 @@ static void twd_timer_setup(void)
 	writel_relaxed(0, twd_base + TWD_TIMER_CONTROL);
 
 	clk->name = "local_timer";
-	clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
-			CLOCK_EVT_FEAT_C3STOP;
+	clk->features = twd_features;
 	clk->rating = 350;
 	clk->set_state_shutdown = twd_shutdown;
 	clk->set_state_periodic = twd_set_periodic;
@@ -350,6 +350,8 @@ static int __init twd_local_timer_common_register(struct device_node *np)
 		goto out_irq;
 
 	twd_get_clock(np);
+	if (!of_property_read_bool(np, "always-on"))
+		twd_features |= CLOCK_EVT_FEAT_C3STOP;
 
 	/*
 	 * Immediately configure the timer on the boot CPU, unless we need
@@ -392,9 +394,6 @@ static void __init twd_local_timer_of_register(struct device_node *np)
 {
 	int err;
 
-	if (!is_smp() || !setup_max_cpus)
-		return;
-
 	twd_ppi = irq_of_parse_and_map(np, 0);
 	if (!twd_ppi) {
 		err = -EINVAL;
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index a66e37e211a9..97b22fa7cb3a 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -120,6 +120,6 @@ void __init time_init(void)
 #ifdef CONFIG_COMMON_CLK
 		of_clk_init(NULL);
 #endif
-		clocksource_of_init();
+		clocksource_probe();
 	}
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 969f9d9e665f..bc698383e822 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -625,58 +625,6 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		set_tls(regs->ARM_r0);
 		return 0;
 
-#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG
-	/*
-	 * Atomically store r1 in *r2 if *r2 is equal to r0 for user space.
-	 * Return zero in r0 if *MEM was changed or non-zero if no exchange
-	 * happened.  Also set the user C flag accordingly.
-	 * If access permissions have to be fixed up then non-zero is
-	 * returned and the operation has to be re-attempted.
-	 *
-	 * *NOTE*: This is a ghost syscall private to the kernel.  Only the
-	 * __kuser_cmpxchg code in entry-armv.S should be aware of its
-	 * existence.  Don't ever use this from user code.
-	 */
-	case NR(cmpxchg):
-	for (;;) {
-		extern void do_DataAbort(unsigned long addr, unsigned int fsr,
-					 struct pt_regs *regs);
-		unsigned long val;
-		unsigned long addr = regs->ARM_r2;
-		struct mm_struct *mm = current->mm;
-		pgd_t *pgd; pmd_t *pmd; pte_t *pte;
-		spinlock_t *ptl;
-
-		regs->ARM_cpsr &= ~PSR_C_BIT;
-		down_read(&mm->mmap_sem);
-		pgd = pgd_offset(mm, addr);
-		if (!pgd_present(*pgd))
-			goto bad_access;
-		pmd = pmd_offset(pgd, addr);
-		if (!pmd_present(*pmd))
-			goto bad_access;
-		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
-		if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) {
-			pte_unmap_unlock(pte, ptl);
-			goto bad_access;
-		}
-		val = *(unsigned long *)addr;
-		val -= regs->ARM_r0;
-		if (val == 0) {
-			*(unsigned long *)addr = regs->ARM_r1;
-			regs->ARM_cpsr |= PSR_C_BIT;
-		}
-		pte_unmap_unlock(pte, ptl);
-		up_read(&mm->mmap_sem);
-		return val;
-
-		bad_access:
-		up_read(&mm->mmap_sem);
-		/* simulate a write access fault */
-		do_DataAbort(addr, 15 + (1 << 11), regs);
-	}
-#endif
-
 	default:
 		/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
 		   if not implemented, rather than raising SIGILL.  This
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 210eccadb69a..95a000515e43 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	depends on MMU && OF
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select ARM_GIC
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
@@ -45,4 +46,6 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
+source drivers/vhost/Kconfig
+
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dc017adfddc8..eab83b2435b8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 	return kvm_timer_should_fire(vcpu);
 }
 
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+	kvm_timer_schedule(vcpu);
+}
+
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+	kvm_timer_unschedule(vcpu);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
@@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	if (vcpu->arch.pause)
+	if (vcpu->arch.power_off)
 		mp_state->mp_state = KVM_MP_STATE_STOPPED;
 	else
 		mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
@@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 {
 	switch (mp_state->mp_state) {
 	case KVM_MP_STATE_RUNNABLE:
-		vcpu->arch.pause = false;
+		vcpu->arch.power_off = false;
 		break;
 	case KVM_MP_STATE_STOPPED:
-		vcpu->arch.pause = true;
+		vcpu->arch.power_off = true;
 		break;
 	default:
 		return -EINVAL;
@@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
  */
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
+	return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
+		&& !v->arch.power_off && !v->arch.pause);
 }
 
 /* Just ensure a guest exit from a particular CPU */
@@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 	return vgic_initialized(kvm);
 }
 
-static void vcpu_pause(struct kvm_vcpu *vcpu)
+static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused;
+static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused;
+
+static void kvm_arm_halt_guest(struct kvm *kvm)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		vcpu->arch.pause = true;
+	force_vm_exit(cpu_all_mask);
+}
+
+static void kvm_arm_resume_guest(struct kvm *kvm)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+
+		vcpu->arch.pause = false;
+		wake_up_interruptible(wq);
+	}
+}
+
+static void vcpu_sleep(struct kvm_vcpu *vcpu)
 {
 	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
 
-	wait_event_interruptible(*wq, !vcpu->arch.pause);
+	wait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+				       (!vcpu->arch.pause)));
 }
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 		update_vttbr(vcpu->kvm);
 
-		if (vcpu->arch.pause)
-			vcpu_pause(vcpu);
+		if (vcpu->arch.power_off || vcpu->arch.pause)
+			vcpu_sleep(vcpu);
 
 		/*
 		 * Disarming the background timer must be done in a
@@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			run->exit_reason = KVM_EXIT_INTR;
 		}
 
-		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
+		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
+			vcpu->arch.power_off || vcpu->arch.pause) {
 			local_irq_enable();
+			kvm_timer_sync_hwstate(vcpu);
 			kvm_vgic_sync_hwstate(vcpu);
 			preempt_enable();
-			kvm_timer_sync_hwstate(vcpu);
 			continue;
 		}
 
@@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * guest time.
 		 */
 		kvm_guest_exit();
-		trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+		trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+
+		/*
+		 * We must sync the timer state before the vgic state so that
+		 * the vgic can properly sample the updated state of the
+		 * interrupt line.
+		 */
+		kvm_timer_sync_hwstate(vcpu);
 
 		kvm_vgic_sync_hwstate(vcpu);
 
 		preempt_enable();
 
-		kvm_timer_sync_hwstate(vcpu);
-
 		ret = handle_exit(vcpu, run, ret);
 	}
 
@@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	vcpu_reset_hcr(vcpu);
 
 	/*
-	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 * Handle the "start in power-off" case.
 	 */
 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
-		vcpu->arch.pause = true;
+		vcpu->arch.power_off = true;
 	else
-		vcpu->arch.pause = false;
+		vcpu->arch.power_off = false;
 
 	return 0;
 }
@@ -1080,7 +1124,7 @@ static int init_hyp_mode(void)
 	 */
 	err = kvm_timer_hyp_init();
 	if (err)
-		goto out_free_mappings;
+		goto out_free_context;
 
 #ifndef CONFIG_HOTPLUG_CPU
 	free_boot_hyp_pgd();
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index ad6f6424f1d1..0b556968a6da 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
 
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.pause = true;
+	vcpu->arch.power_off = true;
 }
 
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
@@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 */
 	if (!vcpu)
 		return PSCI_RET_INVALID_PARAMS;
-	if (!vcpu->arch.pause) {
+	if (!vcpu->arch.power_off) {
 		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
 			return PSCI_RET_ALREADY_ON;
 		else
@@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * the general puspose registers are undefined upon CPU_ON.
 	 */
 	*vcpu_reg(vcpu, 0) = context_id;
-	vcpu->arch.pause = false;
+	vcpu->arch.power_off = false;
 	smp_mb();		/* Make sure the above is visible */
 
 	wq = kvm_arch_vcpu_wq(vcpu);
@@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
 		if ((mpidr & target_affinity_mask) == target_affinity) {
 			matching_cpus++;
-			if (!tmp->arch.pause)
+			if (!tmp->arch.power_off)
 				return PSCI_0_2_AFFINITY_LEVEL_ON;
 		}
 	}
@@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 	 * re-initialized.
 	 */
 	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
-		tmp->arch.pause = true;
+		tmp->arch.power_off = true;
 		kvm_vcpu_kick(tmp);
 	}
 
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 0ec35392d208..c25a88598eb0 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
 );
 
 TRACE_EVENT(kvm_exit,
-	TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
-	TP_ARGS(exit_reason, vcpu_pc),
+	TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
+	TP_ARGS(idx, exit_reason, vcpu_pc),
 
 	TP_STRUCT__entry(
+		__field(	int,		idx		)
 		__field(	unsigned int,	exit_reason	)
 		__field(	unsigned long,	vcpu_pc		)
 	),
 
 	TP_fast_assign(
+		__entry->idx			= idx;
 		__entry->exit_reason		= exit_reason;
 		__entry->vcpu_pc		= vcpu_pc;
 	),
 
-	TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx",
+	TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
+		  __print_symbolic(__entry->idx, kvm_arm_exception_type),
 		  __entry->exit_reason,
+		  __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
 		  __entry->vcpu_pc)
 );
 
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 970d6c043774..e936352ccb00 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -9,6 +9,7 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/unwind.h>
 
 		.text
 
@@ -20,6 +21,8 @@
  */
 ENTRY(__clear_user_std)
 WEAK(arm_clear_user)
+UNWIND(.fnstart)
+UNWIND(.save {r1, lr})
 		stmfd	sp!, {r1, lr}
 		mov	r2, #0
 		cmp	r1, #4
@@ -44,6 +47,7 @@ WEAK(arm_clear_user)
 USER(		strnebt	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
+UNWIND(.fnend)
 ENDPROC(arm_clear_user)
 ENDPROC(__clear_user_std)
 
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 89a755b90db2..92673006e55c 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -102,6 +102,9 @@ config HAVE_AT91_SMD
 config HAVE_AT91_H32MX
 	bool
 
+config HAVE_AT91_GENERATED_CLK
+	bool
+
 config SOC_SAM_V4_V5
 	bool
 
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 1319c3c14327..84bd26535ae9 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -14,7 +14,7 @@ config ARCH_BCM_IPROC
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
 	select ARM_GLOBAL_TIMER
-
+	select COMMON_CLK_IPROC
 	select CLKSRC_MMIO
 	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index 9bdf54795f05..56978199c479 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -20,6 +20,7 @@
 #include <asm/cputype.h>
 #include <asm/cp15.h>
 #include <asm/mcpm.h>
+#include <asm/smp_plat.h>
 
 #include "regs-pmu.h"
 #include "common.h"
@@ -70,7 +71,31 @@ static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster)
 		cluster >= EXYNOS5420_NR_CLUSTERS)
 		return -EINVAL;
 
-	exynos_cpu_power_up(cpunr);
+	if (!exynos_cpu_power_state(cpunr)) {
+		exynos_cpu_power_up(cpunr);
+
+		/*
+		 * This assumes the cluster number of the big cores(Cortex A15)
+		 * is 0 and the Little cores(Cortex A7) is 1.
+		 * When the system was booted from the Little core,
+		 * they should be reset during power up cpu.
+		 */
+		if (cluster &&
+		    cluster == MPIDR_AFFINITY_LEVEL(cpu_logical_map(0), 1)) {
+			/*
+			 * Before we reset the Little cores, we should wait
+			 * the SPARE2 register is set to 1 because the init
+			 * codes of the iROM will set the register after
+			 * initialization.
+			 */
+			while (!pmu_raw_readl(S5P_PMU_SPARE2))
+				udelay(10);
+
+			pmu_raw_writel(EXYNOS5420_KFC_CORE_RESET(cpu),
+					EXYNOS_SWRESET);
+		}
+	}
+
 	return 0;
 }
 
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 4a87e86dec45..7c21760f590f 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -200,15 +200,15 @@ no_clk:
 		args.args_count = 0;
 		child_domain = of_genpd_get_from_provider(&args);
 		if (IS_ERR(child_domain))
-			goto next_pd;
+			continue;
 
 		if (of_parse_phandle_with_args(np, "power-domains",
 					 "#power-domain-cells", 0, &args) != 0)
-			goto next_pd;
+			continue;
 
 		parent_domain = of_genpd_get_from_provider(&args);
 		if (IS_ERR(parent_domain))
-			goto next_pd;
+			continue;
 
 		if (pm_genpd_add_subdomain(parent_domain, child_domain))
 			pr_warn("%s failed to add subdomain: %s\n",
@@ -216,8 +216,6 @@ no_clk:
 		else
 			pr_info("%s has as child subdomain: %s.\n",
 				parent_domain->name, child_domain->name);
-next_pd:
-		of_node_put(np);
 	}
 
 	return 0;
diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
index b7614333d296..fba9068ed260 100644
--- a/arch/arm/mach-exynos/regs-pmu.h
+++ b/arch/arm/mach-exynos/regs-pmu.h
@@ -513,6 +513,12 @@ static inline unsigned int exynos_pmu_cpunr(unsigned int mpidr)
 #define SPREAD_ENABLE						0xF
 #define SPREAD_USE_STANDWFI					0xF
 
+#define EXYNOS5420_KFC_CORE_RESET0				BIT(8)
+#define EXYNOS5420_KFC_ETM_RESET0				BIT(20)
+
+#define EXYNOS5420_KFC_CORE_RESET(_nr)				\
+	((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr))
+
 #define EXYNOS5420_BB_CON1					0x0784
 #define EXYNOS5420_BB_SEL_EN					BIT(31)
 #define EXYNOS5420_BB_PMOS_EN					BIT(7)
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index e00eb39453a4..5a7e47ceec91 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -177,54 +177,57 @@ static struct irq_chip exynos_pmu_chip = {
 #endif
 };
 
-static int exynos_pmu_domain_xlate(struct irq_domain *domain,
-				   struct device_node *controller,
-				   const u32 *intspec,
-				   unsigned int intsize,
-				   unsigned long *out_hwirq,
-				   unsigned int *out_type)
+static int exynos_pmu_domain_translate(struct irq_domain *d,
+				       struct irq_fwspec *fwspec,
+				       unsigned long *hwirq,
+				       unsigned int *type)
 {
-	if (domain->of_node != controller)
-		return -EINVAL;	/* Shouldn't happen, really... */
-	if (intsize != 3)
-		return -EINVAL;	/* Not GIC compliant */
-	if (intspec[0] != 0)
-		return -EINVAL;	/* No PPI should point to this domain */
+	if (is_of_node(fwspec->fwnode)) {
+		if (fwspec->param_count != 3)
+			return -EINVAL;
 
-	*out_hwirq = intspec[1];
-	*out_type = intspec[2];
-	return 0;
+		/* No PPI should point to this domain */
+		if (fwspec->param[0] != 0)
+			return -EINVAL;
+
+		*hwirq = fwspec->param[1];
+		*type = fwspec->param[2];
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static int exynos_pmu_domain_alloc(struct irq_domain *domain,
 				   unsigned int virq,
 				   unsigned int nr_irqs, void *data)
 {
-	struct of_phandle_args *args = data;
-	struct of_phandle_args parent_args;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
 	irq_hw_number_t hwirq;
 	int i;
 
-	if (args->args_count != 3)
+	if (fwspec->param_count != 3)
 		return -EINVAL;	/* Not GIC compliant */
-	if (args->args[0] != 0)
+	if (fwspec->param[0] != 0)
 		return -EINVAL;	/* No PPI should point to this domain */
 
-	hwirq = args->args[1];
+	hwirq = fwspec->param[1];
 
 	for (i = 0; i < nr_irqs; i++)
 		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
 					      &exynos_pmu_chip, NULL);
 
-	parent_args = *args;
-	parent_args.np = domain->parent->of_node;
-	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+	parent_fwspec = *fwspec;
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+					    &parent_fwspec);
 }
 
 static const struct irq_domain_ops exynos_pmu_domain_ops = {
-	.xlate	= exynos_pmu_domain_xlate,
-	.alloc	= exynos_pmu_domain_alloc,
-	.free	= irq_domain_free_irqs_common,
+	.translate	= exynos_pmu_domain_translate,
+	.alloc		= exynos_pmu_domain_alloc,
+	.free		= irq_domain_free_irqs_common,
 };
 
 static int __init exynos_pmu_irq_init(struct device_node *node,
diff --git a/arch/arm/mach-gemini/board-nas4220b.c b/arch/arm/mach-gemini/board-nas4220b.c
index ca8a25bb3521..18b12796acf9 100644
--- a/arch/arm/mach-gemini/board-nas4220b.c
+++ b/arch/arm/mach-gemini/board-nas4220b.c
@@ -18,7 +18,6 @@
 #include <linux/leds.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
-#include <linux/mdio-gpio.h>
 #include <linux/io.h>
 
 #include <asm/setup.h>
diff --git a/arch/arm/mach-gemini/board-wbd111.c b/arch/arm/mach-gemini/board-wbd111.c
index 418188cd1712..14c56f3f0ec2 100644
--- a/arch/arm/mach-gemini/board-wbd111.c
+++ b/arch/arm/mach-gemini/board-wbd111.c
@@ -15,7 +15,6 @@
 #include <linux/input.h>
 #include <linux/skbuff.h>
 #include <linux/gpio_keys.h>
-#include <linux/mdio-gpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-gemini/board-wbd222.c b/arch/arm/mach-gemini/board-wbd222.c
index 266b265090cd..6070282ce243 100644
--- a/arch/arm/mach-gemini/board-wbd222.c
+++ b/arch/arm/mach-gemini/board-wbd222.c
@@ -15,7 +15,6 @@
 #include <linux/input.h>
 #include <linux/skbuff.h>
 #include <linux/gpio_keys.h>
-#include <linux/mdio-gpio.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 8c4467fad837..10bf7159b27d 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -181,40 +181,42 @@ static struct irq_chip imx_gpc_chip = {
 #endif
 };
 
-static int imx_gpc_domain_xlate(struct irq_domain *domain,
-				struct device_node *controller,
-				const u32 *intspec,
-				unsigned int intsize,
-				unsigned long *out_hwirq,
-				unsigned int *out_type)
+static int imx_gpc_domain_translate(struct irq_domain *d,
+				    struct irq_fwspec *fwspec,
+				    unsigned long *hwirq,
+				    unsigned int *type)
 {
-	if (domain->of_node != controller)
-		return -EINVAL;	/* Shouldn't happen, really... */
-	if (intsize != 3)
-		return -EINVAL;	/* Not GIC compliant */
-	if (intspec[0] != 0)
-		return -EINVAL;	/* No PPI should point to this domain */
+	if (is_of_node(fwspec->fwnode)) {
+		if (fwspec->param_count != 3)
+			return -EINVAL;
 
-	*out_hwirq = intspec[1];
-	*out_type = intspec[2];
-	return 0;
+		/* No PPI should point to this domain */
+		if (fwspec->param[0] != 0)
+			return -EINVAL;
+
+		*hwirq = fwspec->param[1];
+		*type = fwspec->param[2];
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static int imx_gpc_domain_alloc(struct irq_domain *domain,
 				  unsigned int irq,
 				  unsigned int nr_irqs, void *data)
 {
-	struct of_phandle_args *args = data;
-	struct of_phandle_args parent_args;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
 	irq_hw_number_t hwirq;
 	int i;
 
-	if (args->args_count != 3)
+	if (fwspec->param_count != 3)
 		return -EINVAL;	/* Not GIC compliant */
-	if (args->args[0] != 0)
+	if (fwspec->param[0] != 0)
 		return -EINVAL;	/* No PPI should point to this domain */
 
-	hwirq = args->args[1];
+	hwirq = fwspec->param[1];
 	if (hwirq >= GPC_MAX_IRQS)
 		return -EINVAL;	/* Can't deal with this */
 
@@ -222,15 +224,16 @@ static int imx_gpc_domain_alloc(struct irq_domain *domain,
 		irq_domain_set_hwirq_and_chip(domain, irq + i, hwirq + i,
 					      &imx_gpc_chip, NULL);
 
-	parent_args = *args;
-	parent_args.np = domain->parent->of_node;
-	return irq_domain_alloc_irqs_parent(domain, irq, nr_irqs, &parent_args);
+	parent_fwspec = *fwspec;
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	return irq_domain_alloc_irqs_parent(domain, irq, nr_irqs,
+					    &parent_fwspec);
 }
 
 static const struct irq_domain_ops imx_gpc_domain_ops = {
-	.xlate	= imx_gpc_domain_xlate,
-	.alloc	= imx_gpc_domain_alloc,
-	.free	= irq_domain_free_irqs_common,
+	.translate	= imx_gpc_domain_translate,
+	.alloc		= imx_gpc_domain_alloc,
+	.free		= irq_domain_free_irqs_common,
 };
 
 static int __init imx_gpc_init(struct device_node *node,
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 9602cc12d2f1..3878494bd118 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -350,7 +350,7 @@ static void __init imx6q_opp_init(void)
 		return;
 	}
 
-	if (of_init_opp_table(cpu_dev)) {
+	if (dev_pm_opp_of_add_table(cpu_dev)) {
 		pr_warn("failed to init OPP table\n");
 		goto put_node;
 	}
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index c86a5a0aefac..e20fc4178b15 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -117,11 +117,4 @@ config MACH_KIRKWOOD
 	  Say 'Y' here if you want your kernel to support boards based
 	  on the Marvell Kirkwood device tree.
 
-config MACH_NETXBIG
-	bool "LaCie 2Big and 5Big Network v2"
-	depends on MACH_KIRKWOOD
-	help
-	  Say 'Y' here if you want your kernel to support the
-	  LaCie 2Big and 5Big Network v2
-
 endif
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index b4f01497ce0b..ecf9e0c3b107 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -13,4 +13,3 @@ endif
 
 obj-$(CONFIG_MACH_DOVE)		 += dove.o
 obj-$(CONFIG_MACH_KIRKWOOD)	 += kirkwood.o kirkwood-pm.o
-obj-$(CONFIG_MACH_NETXBIG)	 += netxbig.o
diff --git a/arch/arm/mach-mvebu/board.h b/arch/arm/mach-mvebu/board.h
deleted file mode 100644
index 98e32cc2ef3d..000000000000
--- a/arch/arm/mach-mvebu/board.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Board functions for Marvell System On Chip
- *
- * Copyright (C) 2014
- *
- * Andrew Lunn <andrew@lunn.ch>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __ARCH_MVEBU_BOARD_H
-#define __ARCH_MVEBU_BOARD_H
-
-#ifdef CONFIG_MACH_NETXBIG
-void netxbig_init(void);
-#else
-static inline void netxbig_init(void) {};
-#endif
-#endif
diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
index 925f75f54268..f9d8e1ea7183 100644
--- a/arch/arm/mach-mvebu/kirkwood.c
+++ b/arch/arm/mach-mvebu/kirkwood.c
@@ -25,7 +25,6 @@
 #include "kirkwood.h"
 #include "kirkwood-pm.h"
 #include "common.h"
-#include "board.h"
 
 static struct resource kirkwood_cpufreq_resources[] = {
 	[0] = {
@@ -180,9 +179,6 @@ static void __init kirkwood_dt_init(void)
 	kirkwood_pm_init();
 	kirkwood_dt_eth_fixup();
 
-	if (of_machine_is_compatible("lacie,netxbig"))
-		netxbig_init();
-
 	of_platform_populate(NULL, of_default_bus_match_table, auxdata, NULL);
 }
 
diff --git a/arch/arm/mach-mvebu/netxbig.c b/arch/arm/mach-mvebu/netxbig.c
deleted file mode 100644
index 94b11b6585a4..000000000000
--- a/arch/arm/mach-mvebu/netxbig.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * arch/arm/mach-mvbu/board-netxbig.c
- *
- * LaCie 2Big and 5Big Network v2 board setup
- *
- * Copyright (C) 2010 Simon Guinot <sguinot@lacie.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/leds-kirkwood-netxbig.h>
-#include "common.h"
-
-/*****************************************************************************
- * GPIO extension LEDs
- ****************************************************************************/
-
-/*
- * The LEDs are controlled by a CPLD and can be configured through a GPIO
- * extension bus:
- *
- * - address register : bit [0-2] -> GPIO [47-49]
- * - data register    : bit [0-2] -> GPIO [44-46]
- * - enable register  : GPIO 29
- */
-
-static int netxbig_v2_gpio_ext_addr[] = { 47, 48, 49 };
-static int netxbig_v2_gpio_ext_data[] = { 44, 45, 46 };
-
-static struct netxbig_gpio_ext netxbig_v2_gpio_ext = {
-	.addr		= netxbig_v2_gpio_ext_addr,
-	.num_addr	= ARRAY_SIZE(netxbig_v2_gpio_ext_addr),
-	.data		= netxbig_v2_gpio_ext_data,
-	.num_data	= ARRAY_SIZE(netxbig_v2_gpio_ext_data),
-	.enable		= 29,
-};
-
-/*
- * Address register selection:
- *
- * addr | register
- * ----------------------------
- *   0  | front LED
- *   1  | front LED brightness
- *   2  | SATA LED brightness
- *   3  | SATA0 LED
- *   4  | SATA1 LED
- *   5  | SATA2 LED
- *   6  | SATA3 LED
- *   7  | SATA4 LED
- *
- * Data register configuration:
- *
- * data | LED brightness
- * -------------------------------------------------
- *   0  | min (off)
- *   -  | -
- *   7  | max
- *
- * data | front LED mode
- * -------------------------------------------------
- *   0  | fix off
- *   1  | fix blue on
- *   2  | fix red on
- *   3  | blink blue on=1 sec and blue off=1 sec
- *   4  | blink red on=1 sec and red off=1 sec
- *   5  | blink blue on=2.5 sec and red on=0.5 sec
- *   6  | blink blue on=1 sec and red on=1 sec
- *   7  | blink blue on=0.5 sec and blue off=2.5 sec
- *
- * data | SATA LED mode
- * -------------------------------------------------
- *   0  | fix off
- *   1  | SATA activity blink
- *   2  | fix red on
- *   3  | blink blue on=1 sec and blue off=1 sec
- *   4  | blink red on=1 sec and red off=1 sec
- *   5  | blink blue on=2.5 sec and red on=0.5 sec
- *   6  | blink blue on=1 sec and red on=1 sec
- *   7  | fix blue on
- */
-
-static int netxbig_v2_red_mled[NETXBIG_LED_MODE_NUM] = {
-	[NETXBIG_LED_OFF]	= 0,
-	[NETXBIG_LED_ON]	= 2,
-	[NETXBIG_LED_SATA]	= NETXBIG_LED_INVALID_MODE,
-	[NETXBIG_LED_TIMER1]	= 4,
-	[NETXBIG_LED_TIMER2]	= NETXBIG_LED_INVALID_MODE,
-};
-
-static int netxbig_v2_blue_pwr_mled[NETXBIG_LED_MODE_NUM] = {
-	[NETXBIG_LED_OFF]	= 0,
-	[NETXBIG_LED_ON]	= 1,
-	[NETXBIG_LED_SATA]	= NETXBIG_LED_INVALID_MODE,
-	[NETXBIG_LED_TIMER1]	= 3,
-	[NETXBIG_LED_TIMER2]	= 7,
-};
-
-static int netxbig_v2_blue_sata_mled[NETXBIG_LED_MODE_NUM] = {
-	[NETXBIG_LED_OFF]	= 0,
-	[NETXBIG_LED_ON]	= 7,
-	[NETXBIG_LED_SATA]	= 1,
-	[NETXBIG_LED_TIMER1]	= 3,
-	[NETXBIG_LED_TIMER2]	= NETXBIG_LED_INVALID_MODE,
-};
-
-static struct netxbig_led_timer netxbig_v2_led_timer[] = {
-	[0] = {
-		.delay_on	= 500,
-		.delay_off	= 500,
-		.mode		= NETXBIG_LED_TIMER1,
-	},
-	[1] = {
-		.delay_on	= 500,
-		.delay_off	= 1000,
-		.mode		= NETXBIG_LED_TIMER2,
-	},
-};
-
-#define NETXBIG_LED(_name, maddr, mval, baddr)			\
-	{ .name		= _name,				\
-	  .mode_addr	= maddr,				\
-	  .mode_val	= mval,					\
-	  .bright_addr	= baddr }
-
-static struct netxbig_led net2big_v2_leds_ctrl[] = {
-	NETXBIG_LED("net2big-v2:blue:power", 0, netxbig_v2_blue_pwr_mled,  1),
-	NETXBIG_LED("net2big-v2:red:power",  0, netxbig_v2_red_mled,       1),
-	NETXBIG_LED("net2big-v2:blue:sata0", 3, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net2big-v2:red:sata0",  3, netxbig_v2_red_mled,       2),
-	NETXBIG_LED("net2big-v2:blue:sata1", 4, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net2big-v2:red:sata1",  4, netxbig_v2_red_mled,       2),
-};
-
-static struct netxbig_led_platform_data net2big_v2_leds_data = {
-	.gpio_ext	= &netxbig_v2_gpio_ext,
-	.timer		= netxbig_v2_led_timer,
-	.num_timer	= ARRAY_SIZE(netxbig_v2_led_timer),
-	.leds		= net2big_v2_leds_ctrl,
-	.num_leds	= ARRAY_SIZE(net2big_v2_leds_ctrl),
-};
-
-static struct netxbig_led net5big_v2_leds_ctrl[] = {
-	NETXBIG_LED("net5big-v2:blue:power", 0, netxbig_v2_blue_pwr_mled,  1),
-	NETXBIG_LED("net5big-v2:red:power",  0, netxbig_v2_red_mled,       1),
-	NETXBIG_LED("net5big-v2:blue:sata0", 3, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net5big-v2:red:sata0",  3, netxbig_v2_red_mled,       2),
-	NETXBIG_LED("net5big-v2:blue:sata1", 4, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net5big-v2:red:sata1",  4, netxbig_v2_red_mled,       2),
-	NETXBIG_LED("net5big-v2:blue:sata2", 5, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net5big-v2:red:sata2",  5, netxbig_v2_red_mled,       2),
-	NETXBIG_LED("net5big-v2:blue:sata3", 6, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net5big-v2:red:sata3",  6, netxbig_v2_red_mled,       2),
-	NETXBIG_LED("net5big-v2:blue:sata4", 7, netxbig_v2_blue_sata_mled, 2),
-	NETXBIG_LED("net5big-v2:red:sata4",  7, netxbig_v2_red_mled,       2),
-};
-
-static struct netxbig_led_platform_data net5big_v2_leds_data = {
-	.gpio_ext	= &netxbig_v2_gpio_ext,
-	.timer		= netxbig_v2_led_timer,
-	.num_timer	= ARRAY_SIZE(netxbig_v2_led_timer),
-	.leds		= net5big_v2_leds_ctrl,
-	.num_leds	= ARRAY_SIZE(net5big_v2_leds_ctrl),
-};
-
-static struct platform_device netxbig_v2_leds = {
-	.name		= "leds-netxbig",
-	.id		= -1,
-	.dev		= {
-		.platform_data	= &net2big_v2_leds_data,
-	},
-};
-
-void __init netxbig_init(void)
-{
-
-	if (of_machine_is_compatible("lacie,net5big_v2"))
-		netxbig_v2_leds.dev.platform_data = &net5big_v2_leds_data;
-	platform_device_register(&netxbig_v2_leds);
-}
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index b3a0dff67e3f..33d1460a5639 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -49,6 +49,7 @@ config SOC_OMAP5
 	select OMAP_INTERCONNECT
 	select OMAP_INTERCONNECT_BARRIER
 	select PM_OPP if PM
+	select ZONE_DMA if ARM_LPAE
 
 config SOC_AM33XX
 	bool "TI AM33XX"
@@ -78,6 +79,7 @@ config SOC_DRA7XX
 	select OMAP_INTERCONNECT
 	select OMAP_INTERCONNECT_BARRIER
 	select PM_OPP if PM
+	select ZONE_DMA if ARM_LPAE
 
 config ARCH_OMAP2PLUS
 	bool
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 6133eaac685d..fb219a30c10c 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -106,6 +106,7 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)")
 MACHINE_END
 
 static const char *const omap36xx_boards_compat[] __initconst = {
+	"ti,omap3630",
 	"ti,omap36xx",
 	NULL,
 };
@@ -243,6 +244,9 @@ static const char *const omap5_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(OMAP5_DT, "Generic OMAP5 (Flattened Device Tree)")
+#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
+	.dma_zone_size	= SZ_2G,
+#endif
 	.reserve	= omap_reserve,
 	.smp		= smp_ops(omap4_smp_ops),
 	.map_io		= omap5_map_io,
@@ -288,6 +292,9 @@ static const char *const dra74x_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened Device Tree)")
+#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
+	.dma_zone_size	= SZ_2G,
+#endif
 	.reserve	= omap_reserve,
 	.smp		= smp_ops(omap4_smp_ops),
 	.map_io		= dra7xx_map_io,
@@ -308,6 +315,9 @@ static const char *const dra72x_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened Device Tree)")
+#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
+	.dma_zone_size	= SZ_2G,
+#endif
 	.reserve	= omap_reserve,
 	.map_io		= dra7xx_map_io,
 	.init_early	= dra7xx_init_early,
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index e1d2e991d17a..db7e0bab3587 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -399,40 +399,42 @@ static struct irq_chip wakeupgen_chip = {
 #endif
 };
 
-static int wakeupgen_domain_xlate(struct irq_domain *domain,
-				  struct device_node *controller,
-				  const u32 *intspec,
-				  unsigned int intsize,
-				  unsigned long *out_hwirq,
-				  unsigned int *out_type)
+static int wakeupgen_domain_translate(struct irq_domain *d,
+				      struct irq_fwspec *fwspec,
+				      unsigned long *hwirq,
+				      unsigned int *type)
 {
-	if (domain->of_node != controller)
-		return -EINVAL;	/* Shouldn't happen, really... */
-	if (intsize != 3)
-		return -EINVAL;	/* Not GIC compliant */
-	if (intspec[0] != 0)
-		return -EINVAL;	/* No PPI should point to this domain */
+	if (is_of_node(fwspec->fwnode)) {
+		if (fwspec->param_count != 3)
+			return -EINVAL;
 
-	*out_hwirq = intspec[1];
-	*out_type = intspec[2];
-	return 0;
+		/* No PPI should point to this domain */
+		if (fwspec->param[0] != 0)
+			return -EINVAL;
+
+		*hwirq = fwspec->param[1];
+		*type = fwspec->param[2];
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static int wakeupgen_domain_alloc(struct irq_domain *domain,
 				  unsigned int virq,
 				  unsigned int nr_irqs, void *data)
 {
-	struct of_phandle_args *args = data;
-	struct of_phandle_args parent_args;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
 	irq_hw_number_t hwirq;
 	int i;
 
-	if (args->args_count != 3)
+	if (fwspec->param_count != 3)
 		return -EINVAL;	/* Not GIC compliant */
-	if (args->args[0] != 0)
+	if (fwspec->param[0] != 0)
 		return -EINVAL;	/* No PPI should point to this domain */
 
-	hwirq = args->args[1];
+	hwirq = fwspec->param[1];
 	if (hwirq >= MAX_IRQS)
 		return -EINVAL;	/* Can't deal with this */
 
@@ -440,15 +442,16 @@ static int wakeupgen_domain_alloc(struct irq_domain *domain,
 		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
 					      &wakeupgen_chip, NULL);
 
-	parent_args = *args;
-	parent_args.np = domain->parent->of_node;
-	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args);
+	parent_fwspec = *fwspec;
+	parent_fwspec.fwnode = domain->parent->fwnode;
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+					    &parent_fwspec);
 }
 
 static const struct irq_domain_ops wakeupgen_domain_ops = {
-	.xlate	= wakeupgen_domain_xlate,
-	.alloc	= wakeupgen_domain_alloc,
-	.free	= irq_domain_free_irqs_common,
+	.translate	= wakeupgen_domain_translate,
+	.alloc		= wakeupgen_domain_alloc,
+	.free		= irq_domain_free_irqs_common,
 };
 
 /*
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index ea56397599c2..1dfe34654c43 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -559,7 +559,14 @@ static void pdata_quirks_check(struct pdata_init *quirks)
 
 void __init pdata_quirks_init(const struct of_device_id *omap_dt_match_table)
 {
-	omap_sdrc_init(NULL, NULL);
+	/*
+	 * We still need this for omap2420 and omap3 PM to work, others are
+	 * using drivers/misc/sram.c already.
+	 */
+	if (of_machine_is_compatible("ti,omap2420") ||
+	    of_machine_is_compatible("ti,omap3"))
+		omap_sdrc_init(NULL, NULL);
+
 	pdata_quirks_check(auxdata_quirks);
 	of_platform_populate(NULL, omap_dt_match_table,
 			     omap_auxdata_lookup, NULL);
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index a55655127ef2..bef41837bf7f 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -647,7 +647,7 @@ static OMAP_SYS_32K_TIMER_INIT(4, 1, "timer_32k_ck", "ti,timer-alwon",
 void __init omap4_local_timer_init(void)
 {
 	omap4_sync32k_timer_init();
-	clocksource_of_init();
+	clocksource_probe();
 }
 #else
 void __init omap4_local_timer_init(void)
@@ -663,7 +663,7 @@ void __init omap5_realtime_timer_init(void)
 	omap4_sync32k_timer_init();
 	realtime_counter_init();
 
-	clocksource_of_init();
+	clocksource_probe();
 }
 #endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */
 
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 06005d3f2ba3..20ce2d386f17 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -42,10 +42,6 @@
 #define PECR_IS(n)	((1 << ((n) * 2)) << 29)
 
 extern void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int));
-#ifdef CONFIG_PM
-
-#define ISRAM_START	0x5c000000
-#define ISRAM_SIZE	SZ_256K
 
 /*
  * NAND NFC: DFI bus arbitration subset
@@ -54,6 +50,11 @@ extern void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int));
 #define NDCR_ND_ARB_EN		(1 << 12)
 #define NDCR_ND_ARB_CNTL	(1 << 19)
 
+#ifdef CONFIG_PM
+
+#define ISRAM_START	0x5c000000
+#define ISRAM_SIZE	SZ_256K
+
 static void __iomem *sram;
 static unsigned long wakeup_src;
 
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index b6cf3b449428..251c7b9c5f9b 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -67,7 +67,7 @@ static void __init rockchip_timer_init(void)
 	}
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static void __init rockchip_dt_init(void)
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 65c426bc45f7..14bd9ae3f476 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -809,7 +809,7 @@ static const struct gpio_led_platform_data gpio_leds_pdata = {
 	.num_leds = ARRAY_SIZE(gpio_leds),
 };
 
-static struct s3c_hsotg_plat crag6410_hsotg_pdata;
+static struct dwc2_hsotg_plat crag6410_hsotg_pdata;
 
 static void __init crag6410_machine_init(void)
 {
@@ -835,7 +835,7 @@ static void __init crag6410_machine_init(void)
 	s3c_i2c0_set_platdata(&i2c0_pdata);
 	s3c_i2c1_set_platdata(&i2c1_pdata);
 	s3c_fb_set_platdata(&crag6410_lcd_pdata);
-	s3c_hsotg_set_platdata(&crag6410_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata);
 
 	i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0));
 	i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1));
diff --git a/arch/arm/mach-s3c64xx/mach-smartq.c b/arch/arm/mach-s3c64xx/mach-smartq.c
index b3d13537a7f0..719843dca510 100644
--- a/arch/arm/mach-s3c64xx/mach-smartq.c
+++ b/arch/arm/mach-s3c64xx/mach-smartq.c
@@ -189,7 +189,7 @@ static struct s3c_hwmon_pdata smartq_hwmon_pdata __initdata = {
 	},
 };
 
-static struct s3c_hsotg_plat smartq_hsotg_pdata;
+static struct dwc2_hsotg_plat smartq_hsotg_pdata;
 
 static int __init smartq_lcd_setup_gpio(void)
 {
@@ -382,7 +382,7 @@ void __init smartq_map_io(void)
 void __init smartq_machine_init(void)
 {
 	s3c_i2c0_set_platdata(NULL);
-	s3c_hsotg_set_platdata(&smartq_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&smartq_hsotg_pdata);
 	s3c_hwmon_set_platdata(&smartq_hwmon_pdata);
 	s3c_sdhci1_set_platdata(&smartq_internal_hsmmc_pdata);
 	s3c_sdhci2_set_platdata(&smartq_internal_hsmmc_pdata);
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index d590b88bd8a8..286c9bd676e1 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -628,7 +628,7 @@ static struct platform_pwm_backlight_data smdk6410_bl_data = {
 	.enable_gpio = -1,
 };
 
-static struct s3c_hsotg_plat smdk6410_hsotg_pdata;
+static struct dwc2_hsotg_plat smdk6410_hsotg_pdata;
 
 static void __init smdk6410_map_io(void)
 {
@@ -659,7 +659,7 @@ static void __init smdk6410_machine_init(void)
 	s3c_i2c0_set_platdata(NULL);
 	s3c_i2c1_set_platdata(NULL);
 	s3c_fb_set_platdata(&smdk6410_lcd_pdata);
-	s3c_hsotg_set_platdata(&smdk6410_hsotg_pdata);
+	dwc2_hsotg_set_platdata(&smdk6410_hsotg_pdata);
 
 	samsung_keypad_set_platdata(&smdk6410_keypad_data);
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 6bfa6407a27c..1e572a903f8e 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -97,7 +97,7 @@ static u32 __init r8a7779_read_mode_pins(void)
 static void __init r8a7779_init_time(void)
 {
 	r8a7779_clocks_init(r8a7779_read_mode_pins());
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static const char *const r8a7779_compat_dt[] __initconst = {
diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c
index aa3339258d9c..9eccde3c7b13 100644
--- a/arch/arm/mach-shmobile/setup-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c
@@ -128,7 +128,7 @@ void __init rcar_gen2_timer_init(void)
 #endif /* CONFIG_ARM_ARCH_TIMER */
 
 	rcar_gen2_clocks_init(mode);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 struct memory_reserve_config {
diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c
index b7afce6795f4..ca2f6a82a414 100644
--- a/arch/arm/mach-spear/spear13xx.c
+++ b/arch/arm/mach-spear/spear13xx.c
@@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void)
 	clk_put(pclk);
 
 	spear_setup_of_timer();
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 65bab2876343..223c9e99380d 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -46,7 +46,7 @@ static void __init sun6i_timer_init(void)
 	of_clk_init(NULL);
 	if (IS_ENABLED(CONFIG_RESET_CONTROLLER))
 		sun6i_reset_init();
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 35670b15f281..546338bbacf8 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -408,7 +408,7 @@ static const char * u300_board_compat[] = {
 DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)")
 	.map_io		= u300_map_io,
 	.init_irq	= u300_init_irq_dt,
-	.init_time	= clocksource_of_init,
+	.init_time	= clocksource_probe,
 	.init_machine	= u300_init_machine_dt,
 	.restart	= u300_restart,
 	.dt_compat      = u300_board_compat,
diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c
index 131996805690..68fe986ca42e 100644
--- a/arch/arm/mach-u300/dummyspichip.c
+++ b/arch/arm/mach-u300/dummyspichip.c
@@ -264,7 +264,6 @@ static const struct of_device_id pl022_dummy_dt_match[] = {
 static struct spi_driver pl022_dummy_driver = {
 	.driver = {
 		.name	= "spi-dummy",
-		.owner	= THIS_MODULE,
 		.of_match_table = pl022_dummy_dt_match,
 	},
 	.probe	= pl022_dummy_probe,
diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c
index ff28d8ad1ed7..8d2d233f8e6c 100644
--- a/arch/arm/mach-ux500/timer.c
+++ b/arch/arm/mach-ux500/timer.c
@@ -44,5 +44,5 @@ void __init ux500_timer_init(void)
 
 dt_fail:
 	clksrc_dbx500_prcmu_init(prcmu_timer_base);
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 5a6e4e20ca0a..6f39d03cc27e 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -154,7 +154,7 @@ static void __init zynq_timer_init(void)
 
 	zynq_clock_init();
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 static struct map_desc zynq_cortex_a9_scu_map __initdata = {
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index df7537f12469..c21941349b3e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -419,28 +419,24 @@ config CPU_THUMBONLY
 config CPU_32v3
 	bool
 	select CPU_USE_DOMAINS if MMU
-	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
 
 config CPU_32v4
 	bool
 	select CPU_USE_DOMAINS if MMU
-	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
 
 config CPU_32v4T
 	bool
 	select CPU_USE_DOMAINS if MMU
-	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
 
 config CPU_32v5
 	bool
 	select CPU_USE_DOMAINS if MMU
-	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select NEED_KUSER_HELPERS
 	select TLS_REG_EMUL if SMP || !MMU
 
@@ -805,14 +801,6 @@ config TLS_REG_EMUL
 	  a few prototypes like that in existence) and therefore access to
 	  that required register must be emulated.
 
-config NEEDS_SYSCALL_FOR_CMPXCHG
-	bool
-	select NEED_KUSER_HELPERS
-	help
-	  SMP on a pre-ARMv6 processor?  Well OK then.
-	  Forget about fast user space cmpxchg support.
-	  It is just not possible.
-
 config NEED_KUSER_HELPERS
 	bool
 
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 00b7f7de28a1..7d5f4c736a16 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -803,7 +803,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 			}
 		}
 	} else {
-		fault = probe_kernel_address(instrptr, instr);
+		fault = probe_kernel_address((void *)instrptr, instr);
 		instr = __mem_to_opcode_arm(instr);
 	}
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1a7815e5421b..ad4eb2d26e16 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1407,12 +1407,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	unsigned long uaddr = vma->vm_start;
 	unsigned long usize = vma->vm_end - vma->vm_start;
 	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
 
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
 
 	if (!pages)
 		return -ENXIO;
 
+	if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
+		return -ENXIO;
+
+	pages += off;
+
 	do {
 		int ret = vm_insert_page(vma, uaddr, *pages++);
 		if (ret) {
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0d629b8f973f..daafcf121ce0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -593,6 +593,28 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 	arm_notify_die("", regs, &info, ifsr, 0);
 }
 
+/*
+ * Abort handler to be used only during first unmasking of asynchronous aborts
+ * on the boot CPU. This makes sure that the machine will not die if the
+ * firmware/bootloader left an imprecise abort pending for us to trip over.
+ */
+static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
+				      struct pt_regs *regs)
+{
+	pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
+		"first unmask, this is most likely caused by a "
+		"firmware/bootloader bug.\n", fsr);
+
+	return 0;
+}
+
+void __init early_abt_enable(void)
+{
+	fsr_info[22].fn = early_abort_handler;
+	local_abt_enable();
+	fsr_info[22].fn = do_bad;
+}
+
 #ifndef CONFIG_ARM_LPAE
 static int __init exceptions_init(void)
 {
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
index cf08bdfbe0d6..05ec5e0df32d 100644
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -24,5 +24,6 @@ static inline int fsr_fs(unsigned int fsr)
 
 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 unsigned long search_exception_table(unsigned long addr);
+void early_abt_enable(void);
 
 #endif	/* __ARCH_ARM_FAULT_H */
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 7cd15143a507..4867f5daf82c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -38,6 +38,7 @@
 #include <asm/mach/pci.h>
 #include <asm/fixmap.h>
 
+#include "fault.h"
 #include "mm.h"
 #include "tcm.h"
 
@@ -1363,6 +1364,9 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
 	 */
 	local_flush_tlb_all();
 	flush_cache_all();
+
+	/* Enable asynchronous aborts */
+	early_abt_enable();
 }
 
 static void __init kmap_init(void)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 876060bcceeb..2f4b14cfddb4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -125,7 +125,7 @@ static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
 }
 
 /*
- * Wrapper that handles both OABI and EABI and assures Thumb2 interworking
+ * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
  * (where the assembly routines like __aeabi_uidiv could cause problems).
  */
 static u32 jit_udiv(u32 dividend, u32 divisor)
@@ -133,6 +133,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor)
 	return dividend / divisor;
 }
 
+static u32 jit_mod(u32 dividend, u32 divisor)
+{
+	return dividend % divisor;
+}
+
 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx)
 {
 	inst |= (cond << 28);
@@ -471,11 +476,17 @@ static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
 #endif
 }
 
-static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
+				int bpf_op)
 {
 #if __LINUX_ARM_ARCH__ == 7
 	if (elf_hwcap & HWCAP_IDIVA) {
-		emit(ARM_UDIV(rd, rm, rn), ctx);
+		if (bpf_op == BPF_DIV)
+			emit(ARM_UDIV(rd, rm, rn), ctx);
+		else {
+			emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
+			emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
+		}
 		return;
 	}
 #endif
@@ -496,7 +507,8 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
 		emit(ARM_MOV_R(ARM_R0, rm), ctx);
 
 	ctx->seen |= SEEN_CALL;
-	emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
+	emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
+		   ctx);
 	emit_blx_r(ARM_R3, ctx);
 
 	if (rd != ARM_R0)
@@ -614,6 +626,7 @@ load_common:
 		case BPF_LD | BPF_B | BPF_IND:
 			load_order = 0;
 load_ind:
+			update_on_xread(ctx);
 			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
 			goto load_common;
 		case BPF_LDX | BPF_IMM:
@@ -697,13 +710,27 @@ load_ind:
 			if (k == 1)
 				break;
 			emit_mov_i(r_scratch, k, ctx);
-			emit_udiv(r_A, r_A, r_scratch, ctx);
+			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV);
 			break;
 		case BPF_ALU | BPF_DIV | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_CMP_I(r_X, 0), ctx);
 			emit_err_ret(ARM_COND_EQ, ctx);
-			emit_udiv(r_A, r_A, r_X, ctx);
+			emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV);
+			break;
+		case BPF_ALU | BPF_MOD | BPF_K:
+			if (k == 1) {
+				emit_mov_i(r_A, 0, ctx);
+				break;
+			}
+			emit_mov_i(r_scratch, k, ctx);
+			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD);
+			break;
+		case BPF_ALU | BPF_MOD | BPF_X:
+			update_on_xread(ctx);
+			emit(ARM_CMP_I(r_X, 0), ctx);
+			emit_err_ret(ARM_COND_EQ, ctx);
+			emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
 			break;
 		case BPF_ALU | BPF_OR | BPF_K:
 			/* A |= K */
@@ -1047,7 +1074,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 out:
 	kfree(ctx.offsets);
 	return;
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index 4b17d5ab652a..c46fca2972f7 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -115,6 +115,8 @@
 
 #define ARM_INST_UMULL		0x00800090
 
+#define ARM_INST_MLS		0x00600090
+
 /*
  * Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
  * We need to be careful not to conflict with those used by other modules
@@ -210,4 +212,7 @@
 #define ARM_UMULL(rd_lo, rd_hi, rn, rm)	(ARM_INST_UMULL | (rd_hi) << 16 \
 					 | (rd_lo) << 12 | (rm) << 8 | rn)
 
+#define ARM_MLS(rd, rn, rm, ra)	(ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
+				 | (ra) << 12)
+
 #endif /* PFILTER_OPCODES_ARM_H */
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 2235081a04ee..8861c367d061 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -495,7 +495,7 @@ void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq)
 
 	d->netdev = &orion_ge00.dev;
 	for (i = 0; i < d->nr_chips; i++)
-		d->chip[i].host_dev = &orion_ge00_shared.dev;
+		d->chip[i].host_dev = &orion_ge_mvmdio.dev;
 	orion_switch_device.dev.platform_data = d;
 
 	platform_device_register(&orion_switch_device);
diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c
index 83c7d154bde0..82074625de5c 100644
--- a/arch/arm/plat-samsung/devs.c
+++ b/arch/arm/plat-samsung/devs.c
@@ -1042,11 +1042,11 @@ struct platform_device s3c_device_usb_hsotg = {
 	},
 };
 
-void __init s3c_hsotg_set_platdata(struct s3c_hsotg_plat *pd)
+void __init dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd)
 {
-	struct s3c_hsotg_plat *npd;
+	struct dwc2_hsotg_plat *npd;
 
-	npd = s3c_set_platdata(pd, sizeof(struct s3c_hsotg_plat),
+	npd = s3c_set_platdata(pd, sizeof(struct dwc2_hsotg_plat),
 			&s3c_device_usb_hsotg);
 
 	if (!npd->phy_init)
diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
index aedec81d1198..f6455273b2f8 100644
--- a/arch/arm/vdso/vdsomunge.c
+++ b/arch/arm/vdso/vdsomunge.c
@@ -45,7 +45,6 @@
  * it does.
  */
 
-#include <byteswap.h>
 #include <elf.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -59,6 +58,16 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#define swab16(x) \
+	((((x) & 0x00ff) << 8) | \
+	 (((x) & 0xff00) >> 8))
+
+#define swab32(x) \
+	((((x) & 0x000000ff) << 24) | \
+	 (((x) & 0x0000ff00) <<  8) | \
+	 (((x) & 0x00ff0000) >>  8) | \
+	 (((x) & 0xff000000) >> 24))
+
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define HOST_ORDER ELFDATA2LSB
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
@@ -104,17 +113,17 @@ static void cleanup(void)
 
 static Elf32_Word read_elf_word(Elf32_Word word, bool swap)
 {
-	return swap ? bswap_32(word) : word;
+	return swap ? swab32(word) : word;
 }
 
 static Elf32_Half read_elf_half(Elf32_Half half, bool swap)
 {
-	return swap ? bswap_16(half) : half;
+	return swap ? swab16(half) : half;
 }
 
 static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap)
 {
-	*dst = swap ? bswap_32(val) : val;
+	*dst = swap ? swab32(val) : val;
 }
 
 int main(int argc, char **argv)
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index eeeab074e154..fc7ea529f462 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -86,16 +86,25 @@ static void xen_percpu_init(void)
 	int err;
 	int cpu = get_cpu();
 
+	/* 
+	 * VCPUOP_register_vcpu_info cannot be called twice for the same
+	 * vcpu, so if vcpu_info is already registered, just get out. This
+	 * can happen with cpu-hotplug.
+	 */
+	if (per_cpu(xen_vcpu, cpu) != NULL)
+		goto after_register_vcpu_info;
+
 	pr_info("Xen: initializing cpu%d\n", cpu);
 	vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
-	info.mfn = __pa(vcpup) >> PAGE_SHIFT;
-	info.offset = offset_in_page(vcpup);
+	info.mfn = virt_to_gfn(vcpup);
+	info.offset = xen_offset_in_page(vcpup);
 
 	err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
 	BUG_ON(err);
 	per_cpu(xen_vcpu, cpu) = vcpup;
 
+after_register_vcpu_info:
 	enable_percpu_irq(xen_events_irq, 0);
 	put_cpu();
 }
@@ -124,6 +133,9 @@ static int xen_cpu_notification(struct notifier_block *self,
 	case CPU_STARTING:
 		xen_percpu_init();
 		break;
+	case CPU_DYING:
+		disable_percpu_irq(xen_events_irq);
+		break;
 	default:
 		break;
 	}
@@ -213,7 +225,7 @@ static int __init xen_guest_init(void)
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
 	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+	xatp.gpfn = virt_to_gfn(shared_info_page);
 	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 		BUG();
 
@@ -284,7 +296,7 @@ void xen_arch_resume(void) { }
 void xen_arch_suspend(void) { }
 
 
-/* In the hypervisor.S file. */
+/* In the hypercall.S file. */
 EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
 EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 6dd911d1f0ac..7c34f7126b04 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -48,22 +48,22 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
 	size_t size, enum dma_data_direction dir, enum dma_cache_op op)
 {
 	struct gnttab_cache_flush cflush;
-	unsigned long pfn;
+	unsigned long xen_pfn;
 	size_t left = size;
 
-	pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
-	offset %= PAGE_SIZE;
+	xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
+	offset %= XEN_PAGE_SIZE;
 
 	do {
 		size_t len = left;
 	
 		/* buffers in highmem or foreign pages cannot cross page
 		 * boundaries */
-		if (len + offset > PAGE_SIZE)
-			len = PAGE_SIZE - offset;
+		if (len + offset > XEN_PAGE_SIZE)
+			len = XEN_PAGE_SIZE - offset;
 
 		cflush.op = 0;
-		cflush.a.dev_bus_addr = pfn << PAGE_SHIFT;
+		cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
 		cflush.offset = offset;
 		cflush.length = len;
 
@@ -79,7 +79,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
 			HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
 
 		offset = 0;
-		pfn++;
+		xen_pfn++;
 		left -= len;
 	} while (left);
 }
@@ -138,10 +138,29 @@ void __xen_dma_sync_single_for_device(struct device *hwdev,
 }
 
 bool xen_arch_need_swiotlb(struct device *dev,
-			   unsigned long pfn,
-			   unsigned long bfn)
+			   phys_addr_t phys,
+			   dma_addr_t dev_addr)
 {
-	return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev));
+	unsigned int xen_pfn = XEN_PFN_DOWN(phys);
+	unsigned int bfn = XEN_PFN_DOWN(dev_addr);
+
+	/*
+	 * The swiotlb buffer should be used if
+	 *	- Xen doesn't have the cache flush hypercall
+	 *	- The Linux page refers to foreign memory
+	 *	- The device doesn't support coherent DMA request
+	 *
+	 * The Linux page may be spanned acrros multiple Xen page, although
+	 * it's not possible to have a mix of local and foreign Xen page.
+	 * Furthermore, range_straddles_page_boundary is already checking
+	 * if buffer is physically contiguous in the host RAM.
+	 *
+	 * Therefore we only need to check the first Xen page to know if we
+	 * require a bounce buffer because the device doesn't support coherent
+	 * memory and we are not able to flush the cache.
+	 */
+	return (!hypercall_cflush && (xen_pfn != bfn) &&
+		!is_device_dma_coherent(dev));
 }
 
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 887596c67b12..0ed01f2d5ee4 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -93,8 +93,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 	for (i = 0; i < count; i++) {
 		if (map_ops[i].status)
 			continue;
-		set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT,
-				    map_ops[i].dev_bus_addr >> PAGE_SHIFT);
+		set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+				    map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT);
 	}
 
 	return 0;
@@ -108,7 +108,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 	int i;
 
 	for (i = 0; i < count; i++) {
-		set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT,
+		set_phys_to_machine(unmap_ops[i].host_addr >> XEN_PAGE_SHIFT,
 				    INVALID_P2M_ENTRY);
 	}
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 07d1811aa03f..851fe11c6069 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -48,6 +48,7 @@ config ARM64
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -75,6 +76,7 @@ config ARM64
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_SYSCALL_TRACEPOINTS
+	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA
@@ -169,10 +171,12 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
 	int
+	default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
 	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
 	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
 	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
-	default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+	default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
+	default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
 
 source "init/Kconfig"
 
@@ -348,6 +352,33 @@ config ARM64_ERRATUM_843419
 
 	  If unsure, say Y.
 
+config CAVIUM_ERRATUM_22375
+	bool "Cavium erratum 22375, 24313"
+	default y
+	help
+	  Enable workaround for erratum 22375, 24313.
+
+	  This implements two gicv3-its errata workarounds for ThunderX. Both
+	  with small impact affecting only ITS table allocation.
+
+	    erratum 22375: only alloc 8MB table size
+	    erratum 24313: ignore memory access type
+
+	  The fixes are in ITS initialization and basically ignore memory access
+	  type and table size provided by the TYPER and BASER registers.
+
+	  If unsure, say Y.
+
+config CAVIUM_ERRATUM_23154
+	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+	default y
+	help
+	  The gicv3 of ThunderX requires a modified version for
+	  reading the IAR status to ensure data synchronization
+	  (access to icc_iar1_el1 is not sync'ed before and after).
+
+	  If unsure, say Y.
+
 endmenu
 
 
@@ -362,25 +393,37 @@ config ARM64_4K_PAGES
 	help
 	  This feature enables 4KB pages support.
 
+config ARM64_16K_PAGES
+	bool "16KB"
+	help
+	  The system will use 16KB pages support. AArch32 emulation
+	  requires applications compiled with 16K (or a multiple of 16K)
+	  aligned segments.
+
 config ARM64_64K_PAGES
 	bool "64KB"
 	help
 	  This feature enables 64KB pages support (4KB by default)
 	  allowing only two levels of page tables and faster TLB
-	  look-up. AArch32 emulation is not available when this feature
-	  is enabled.
+	  look-up. AArch32 emulation requires applications compiled
+	  with 64K aligned segments.
 
 endchoice
 
 choice
 	prompt "Virtual address space size"
 	default ARM64_VA_BITS_39 if ARM64_4K_PAGES
+	default ARM64_VA_BITS_47 if ARM64_16K_PAGES
 	default ARM64_VA_BITS_42 if ARM64_64K_PAGES
 	help
 	  Allows choosing one of multiple possible virtual address
 	  space sizes. The level of translation table is determined by
 	  a combination of page size and virtual address space size.
 
+config ARM64_VA_BITS_36
+	bool "36-bit" if EXPERT
+	depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_39
 	bool "39-bit"
 	depends on ARM64_4K_PAGES
@@ -389,6 +432,10 @@ config ARM64_VA_BITS_42
 	bool "42-bit"
 	depends on ARM64_64K_PAGES
 
+config ARM64_VA_BITS_47
+	bool "47-bit"
+	depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_48
 	bool "48-bit"
 
@@ -396,8 +443,10 @@ endchoice
 
 config ARM64_VA_BITS
 	int
+	default 36 if ARM64_VA_BITS_36
 	default 39 if ARM64_VA_BITS_39
 	default 42 if ARM64_VA_BITS_42
+	default 47 if ARM64_VA_BITS_47
 	default 48 if ARM64_VA_BITS_48
 
 config CPU_BIG_ENDIAN
@@ -427,15 +476,13 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
+	select GENERIC_IRQ_MIGRATION
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
 
 source kernel/Kconfig.preempt
-
-config HZ
-	int
-	default 100
+source kernel/Kconfig.hz
 
 config ARCH_HAS_HOLES_MEMORYMODEL
 	def_bool y if SPARSEMEM
@@ -454,12 +501,8 @@ config HAVE_ARCH_PFN_VALID
 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
 
 config HW_PERF_EVENTS
-	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS
-	default y
-	help
-	  Enable hardware performance counter support for perf events. If
-	  disabled, perf events will use software events only.
+	def_bool y
+	depends on ARM_PMU
 
 config SYS_SUPPORTS_HUGETLBFS
 	def_bool y
@@ -468,7 +511,7 @@ config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
 config ARCH_WANT_HUGE_PMD_SHARE
-	def_bool y if !ARM64_64K_PAGES
+	def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	def_bool y
@@ -505,7 +548,25 @@ config XEN
 config FORCE_MAX_ZONEORDER
 	int
 	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+	default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
 	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  We make sure that we can allocate upto a HugePage size for each configuration.
+	  Hence we have :
+		MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
+
+	  However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
+	  4M allocations matching the default size used by generic code.
 
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
@@ -680,7 +741,7 @@ source "fs/Kconfig.binfmt"
 
 config COMPAT
 	bool "Kernel support for 32-bit EL0"
-	depends on !ARM64_64K_PAGES || EXPERT
+	depends on ARM64_4K_PAGES || EXPERT
 	select COMPAT_BINFMT_ELF
 	select HAVE_UID16
 	select OLD_SIGSUSPEND3
@@ -691,9 +752,9 @@ config COMPAT
 	  the user helper functions, VFP support and the ptrace interface are
 	  handled appropriately by the kernel.
 
-	  If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
-	  will only be able to execute AArch32 binaries that were compiled with
-	  64k aligned segments.
+	  If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+	  that you will only be able to execute AArch32 binaries that were compiled
+	  with page size aligned segments.
 
 	  If you want to execute 32-bit userspace applications, say Y.
 
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index d6285ef9b5f9..c24d6adc0420 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -77,7 +77,7 @@ config DEBUG_RODATA
           If in doubt, say Y
 
 config DEBUG_ALIGN_RODATA
-	depends on DEBUG_RODATA && !ARM64_64K_PAGES
+	depends on DEBUG_RODATA && ARM64_4K_PAGES
 	bool "Align linker sections up to SECTION_SIZE"
 	help
 	  If this option is enabled, sections that may potentially be marked as
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f9914d7c1bb0..cd822d8454c0 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -42,7 +42,7 @@ endif
 CHECKFLAGS	+= -D__aarch64__
 
 ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
-CFLAGS_MODULE	+= -mcmodel=large
+KBUILD_CFLAGS_MODULE	+= -mcmodel=large
 endif
 
 # Default value
@@ -55,6 +55,13 @@ else
 TEXT_OFFSET := 0x00080000
 endif
 
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
+# in 32-bit arithmetic
+KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
+			(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+			+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
+			- (1 << (64 - 32 - 3)) )) )
+
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index d831bc2ac204..d6c9630a5c20 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -207,6 +207,17 @@
 				clock-output-names = "xge0clk";
 			};
 
+			xge1clk: xge1clk@1f62c000 {
+				compatible = "apm,xgene-device-clock";
+				status = "disabled";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f62c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				csr-mask = <0x3>;
+				clock-output-names = "xge1clk";
+			};
+
 			sataphy1clk: sataphy1clk@1f21c000 {
 				compatible = "apm,xgene-device-clock";
 				#clock-cells = <1>;
@@ -477,6 +488,16 @@
 				reg = <0x0 0x7c600000 0x0 0x200000>;
 				pmd-controller = <3>;
 			};
+
+			edacl3@7e600000 {
+				compatible = "apm,xgene-edac-l3";
+				reg = <0x0 0x7e600000 0x0 0x1000>;
+			};
+
+			edacsoc@7e930000 {
+				compatible = "apm,xgene-edac-soc-v1";
+				reg = <0x0 0x7e930000 0x0 0x1000>;
+			};
 		};
 
 		pcie0: pcie@1f2b0000 {
@@ -816,6 +837,23 @@
 			phy-connection-type = "xgmii";
 		};
 
+		xgenet1: ethernet@1f620000 {
+			compatible = "apm,xgene1-xgenet";
+			status = "disabled";
+			reg = <0x0 0x1f620000 0x0 0xd100>,
+			      <0x0 0x1f600000 0x0 0Xc300>,
+			      <0x0 0x18000000 0x0 0X8000>;
+			reg-names = "enet_csr", "ring_csr", "ring_cmd";
+			interrupts = <0x0 0x6C 0x4>,
+				     <0x0 0x6D 0x4>;
+			port-id = <1>;
+			dma-coherent;
+			clocks = <&xge1clk 0>;
+			/* mac address will be overwritten by the bootloader */
+			local-mac-address = [00 00 00 00 00 00];
+			phy-connection-type = "xgmii";
+		};
+
 		rng: rng@10520000 {
 			compatible = "apm,xgene-rng";
 			reg = <0x0 0x10520000 0x0 0x100>;
diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
index 637e046f0e36..3c386680357e 100644
--- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi
@@ -61,42 +61,42 @@
 
 				button@1 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <116>;
 					label = "POWER";
 					gpios = <&iofpga_gpio0 0 0x4>;
 				};
 				button@2 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <102>;
 					label = "HOME";
 					gpios = <&iofpga_gpio0 1 0x4>;
 				};
 				button@3 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <152>;
 					label = "RLOCK";
 					gpios = <&iofpga_gpio0 2 0x4>;
 				};
 				button@4 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <115>;
 					label = "VOL+";
 					gpios = <&iofpga_gpio0 3 0x4>;
 				};
 				button@5 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <114>;
 					label = "VOL-";
 					gpios = <&iofpga_gpio0 4 0x4>;
 				};
 				button@6 {
 					debounce_interval = <50>;
-					wakeup = <1>;
+					wakeup-source;
 					linux,code = <99>;
 					label = "NMI";
 					gpios = <&iofpga_gpio0 5 0x4>;
diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts
index c62751153a4f..734e1272b19f 100644
--- a/arch/arm64/boot/dts/arm/juno-r1.dts
+++ b/arch/arm64/boot/dts/arm/juno-r1.dts
@@ -91,17 +91,21 @@
 		};
 	};
 
-	pmu {
-		compatible = "arm,armv8-pmuv3";
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
 		interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&A57_0>,
+				     <&A57_1>;
+	};
+
+	pmu_a53 {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-affinity = <&A57_0>,
-				     <&A57_1>,
-				     <&A53_0>,
+		interrupt-affinity = <&A53_0>,
 				     <&A53_1>,
 				     <&A53_2>,
 				     <&A53_3>;
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index d7cbdd482a61..ffa05aeab3c7 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -91,17 +91,21 @@
 		};
 	};
 
-	pmu {
-		compatible = "arm,armv8-pmuv3";
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
 		interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&A57_0>,
+				     <&A57_1>;
+	};
+
+	pmu_a53 {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-affinity = <&A57_0>,
-				     <&A57_1>,
-				     <&A53_0>,
+		interrupt-affinity = <&A53_0>,
 				     <&A53_1>,
 				     <&A53_2>,
 				     <&A53_3>;
diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
new file mode 100644
index 000000000000..606dd5a05c2d
--- /dev/null
+++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi
@@ -0,0 +1,191 @@
+soc0: soc@000000000 {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	device_type = "soc";
+	compatible = "simple-bus";
+	ranges = <0x0 0x0 0x0 0x0 0x1 0x0>;
+	chip-id = <0>;
+
+	soc0_mdio0: mdio@803c0000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "hisilicon,hns-mdio";
+		reg = <0x0 0x803c0000 0x0 0x10000
+		       0x0 0x80000000 0x0 0x10000>;
+
+		soc0_phy0: ethernet-phy@0 {
+			reg = <0x0>;
+			compatible = "ethernet-phy-ieee802.3-c22";
+		};
+		soc0_phy1: ethernet-phy@1 {
+			reg = <0x1>;
+			compatible = "ethernet-phy-ieee802.3-c22";
+		};
+	};
+
+	dsa: dsa@c7000000 {
+		compatible = "hisilicon,hns-dsaf-v1";
+		dsa_name = "dsaf0";
+		mode = "6port-16rss";
+		interrupt-parent = <&mbigen_dsa>;
+
+		reg = <0x0 0xC0000000 0x0 0x420000
+		       0x0 0xC2000000 0x0 0x300000
+		       0x0 0xc5000000 0x0 0x890000
+		       0x0 0xc7000000 0x0 0x60000
+		       >;
+
+		phy-handle = <0 0 0 0 &soc0_phy0 &soc0_phy1 0 0>;
+		interrupts = <
+			/* [14] ge fifo err 8 / xge 6**/
+			149 0x4 150 0x4 151 0x4 152 0x4
+			153 0x4 154 0x4  26 0x4 27 0x4
+			155 0x4 156 0x4 157 0x4 158 0x4 159 0x4 160 0x4
+			/* [12] rcb com 4*3**/
+			0x6 0x4 0x7 0x4 0x8 0x4 0x9 0x4
+			 16 0x4  17 0x4  18 0x4  19 0x4
+			 22 0x4  23 0x4  24 0x4  25 0x4
+			/* [8] ppe tnl 0-7***/
+			0x0 0x4 0x1 0x4 0x2 0x4 0x3 0x4
+			0x4 0x4 0x5 0x4 12 0x4 13 0x4
+			/* [21] dsaf event int 3+18**/
+			 128 0x4  129 0x4  130 0x4
+			0x83 0x4 0x84 0x4 0x85 0x4 0x86 0x4 0x87 0x4 0x88 0x4
+			0x89 0x4 0x8a 0x4 0x8b 0x4 0x8c 0x4 0x8d 0x4 0x8e 0x4
+			0x8f 0x4 0x90 0x4 0x91 0x4 0x92 0x4 0x93 0x4 0x94 0x4
+			/* [4] debug rcb 2*2*/
+			0xe 0x1 0xf 0x1 0x14 0x1 0x15 0x1
+			/* [256] sevice rcb 2*128*/
+			0x180 0x1 0x181 0x1 0x182 0x1 0x183 0x1
+			0x184 0x1 0x185 0x1 0x186 0x1 0x187 0x1
+			0x188 0x1 0x189 0x1 0x18a 0x1 0x18b 0x1
+			0x18c 0x1 0x18d 0x1 0x18e 0x1 0x18f 0x1
+			0x190 0x1 0x191 0x1 0x192 0x1 0x193 0x1
+			0x194 0x1 0x195 0x1 0x196 0x1 0x197 0x1
+			0x198 0x1 0x199 0x1 0x19a 0x1 0x19b 0x1
+			0x19c 0x1 0x19d 0x1 0x19e 0x1 0x19f 0x1
+			0x1a0 0x1 0x1a1 0x1 0x1a2 0x1 0x1a3 0x1
+			0x1a4 0x1 0x1a5 0x1 0x1a6 0x1 0x1a7 0x1
+			0x1a8 0x1 0x1a9 0x1 0x1aa 0x1 0x1ab 0x1
+			0x1ac 0x1 0x1ad 0x1 0x1ae 0x1 0x1af 0x1
+			0x1b0 0x1 0x1b1 0x1 0x1b2 0x1 0x1b3 0x1
+			0x1b4 0x1 0x1b5 0x1 0x1b6 0x1 0x1b7 0x1
+			0x1b8 0x1 0x1b9 0x1 0x1ba 0x1 0x1bb 0x1
+			0x1bc 0x1 0x1bd 0x1 0x1be 0x1 0x1bf 0x1
+			0x1c0 0x1 0x1c1 0x1 0x1c2 0x1 0x1c3 0x1
+			0x1c4 0x1 0x1c5 0x1 0x1c6 0x1 0x1c7 0x1
+			0x1c8 0x1 0x1c9 0x1 0x1ca 0x1 0x1cb 0x1
+			0x1cc 0x1 0x1cd 0x1 0x1ce 0x1 0x1cf 0x1
+			0x1d0 0x1 0x1d1 0x1 0x1d2 0x1 0x1d3 0x1
+			0x1d4 0x1 0x1d5 0x1 0x1d6 0x1 0x1d7 0x1
+			0x1d8 0x1 0x1d9 0x1 0x1da 0x1 0x1db 0x1
+			0x1dc 0x1 0x1dd 0x1 0x1de 0x1 0x1df 0x1
+			0x1e0 0x1 0x1e1 0x1 0x1e2 0x1 0x1e3 0x1
+			0x1e4 0x1 0x1e5 0x1 0x1e6 0x1 0x1e7 0x1
+			0x1e8 0x1 0x1e9 0x1 0x1ea 0x1 0x1eb 0x1
+			0x1ec 0x1 0x1ed 0x1 0x1ee 0x1 0x1ef 0x1
+			0x1f0 0x1 0x1f1 0x1 0x1f2 0x1 0x1f3 0x1
+			0x1f4 0x1 0x1f5 0x1 0x1f6 0x1 0x1f7 0x1
+			0x1f8 0x1 0x1f9 0x1 0x1fa 0x1 0x1fb 0x1
+			0x1fc 0x1 0x1fd 0x1 0x1fe 0x1 0x1ff 0x1
+			0x200 0x1 0x201 0x1 0x202 0x1 0x203 0x1
+			0x204 0x1 0x205 0x1 0x206 0x1 0x207 0x1
+			0x208 0x1 0x209 0x1 0x20a 0x1 0x20b 0x1
+			0x20c 0x1 0x20d 0x1 0x20e 0x1 0x20f 0x1
+			0x210 0x1 0x211 0x1 0x212 0x1 0x213 0x1
+			0x214 0x1 0x215 0x1 0x216 0x1 0x217 0x1
+			0x218 0x1 0x219 0x1 0x21a 0x1 0x21b 0x1
+			0x21c 0x1 0x21d 0x1 0x21e 0x1 0x21f 0x1
+			0x220 0x1 0x221 0x1 0x222 0x1 0x223 0x1
+			0x224 0x1 0x225 0x1 0x226 0x1 0x227 0x1
+			0x228 0x1 0x229 0x1 0x22a 0x1 0x22b 0x1
+			0x22c 0x1 0x22d 0x1 0x22e 0x1 0x22f 0x1
+			0x230 0x1 0x231 0x1 0x232 0x1 0x233 0x1
+			0x234 0x1 0x235 0x1 0x236 0x1 0x237 0x1
+			0x238 0x1 0x239 0x1 0x23a 0x1 0x23b 0x1
+			0x23c 0x1 0x23d 0x1 0x23e 0x1 0x23f 0x1
+			0x240 0x1 0x241 0x1 0x242 0x1 0x243 0x1
+			0x244 0x1 0x245 0x1 0x246 0x1 0x247 0x1
+			0x248 0x1 0x249 0x1 0x24a 0x1 0x24b 0x1
+			0x24c 0x1 0x24d 0x1 0x24e 0x1 0x24f 0x1
+			0x250 0x1 0x251 0x1 0x252 0x1 0x253 0x1
+			0x254 0x1 0x255 0x1 0x256 0x1 0x257 0x1
+			0x258 0x1 0x259 0x1 0x25a 0x1 0x25b 0x1
+			0x25c 0x1 0x25d 0x1 0x25e 0x1 0x25f 0x1
+			0x260 0x1 0x261 0x1 0x262 0x1 0x263 0x1
+			0x264 0x1 0x265 0x1 0x266 0x1 0x267 0x1
+			0x268 0x1 0x269 0x1 0x26a 0x1 0x26b 0x1
+			0x26c 0x1 0x26d 0x1 0x26e 0x1 0x26f 0x1
+			0x270 0x1 0x271 0x1 0x272 0x1 0x273 0x1
+			0x274 0x1 0x275 0x1 0x276 0x1 0x277 0x1
+			0x278 0x1 0x279 0x1 0x27a 0x1 0x27b 0x1
+			0x27c 0x1 0x27d 0x1 0x27e 0x1 0x27f 0x1>;
+		buf-size = <4096>;
+		desc-num = <1024>;
+		dma-coherent;
+	};
+
+	eth0: ethernet@0{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <0>;
+		local-mac-address = [00 00 00 01 00 58];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth1: ethernet@1{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <1>;
+		status = "disabled";
+		dma-coherent;
+	};
+	eth2: ethernet@2{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <2>;
+		local-mac-address = [00 00 00 01 00 5a];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth3: ethernet@3{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <3>;
+		local-mac-address = [00 00 00 01 00 5b];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth4: ethernet@4{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <4>;
+		local-mac-address = [00 00 00 01 00 5c];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth5: ethernet@5{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <5>;
+		local-mac-address = [00 00 00 01 00 5d];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth6: ethernet@6{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <6>;
+		local-mac-address = [00 00 00 01 00 5e];
+		status = "disabled";
+		dma-coherent;
+	};
+	eth7: ethernet@7{
+		compatible = "hisilicon,hns-nic-v1";
+		ae-name = "dsaf0";
+		port-id = <7>;
+		local-mac-address = [00 00 00 01 00 5f];
+		status = "disabled";
+		dma-coherent;
+	};
+};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 34d71dd86781..5f760347aee2 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -51,6 +51,7 @@ CONFIG_PCI=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_XGENE=y
 CONFIG_SMP=y
+CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_SAMSUNG=y
+CONFIG_SERIAL_SAMSUNG_UARTS_4=y
+CONFIG_SERIAL_SAMSUNG_UARTS=4
+CONFIG_SERIAL_SAMSUNG_CONSOLE=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_MMC_DW_PLTFM=y
+CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_SYSCON=y
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 208cec08a74f..caafd63b8092 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -12,7 +12,6 @@
 #ifndef _ASM_ACPI_H
 #define _ASM_ACPI_H
 
-#include <linux/irqchip/arm-gic-acpi.h>
 #include <linux/mm.h>
 #include <linux/psci.h>
 
@@ -92,4 +91,9 @@ static inline const char *acpi_get_enable_method(int cpu)
 {
 	return acpi_psci_present() ? "psci" : NULL;
 }
+
+#ifdef	CONFIG_ACPI_APEI
+pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
+#endif
+
 #endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
new file mode 100644
index 000000000000..030cdcb46c6b
--- /dev/null
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -0,0 +1,170 @@
+/*
+ * arch/arm64/include/asm/arch_gicv3.h
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_ARCH_GICV3_H
+#define __ASM_ARCH_GICV3_H
+
+#include <asm/sysreg.h>
+
+#define ICC_EOIR1_EL1			sys_reg(3, 0, 12, 12, 1)
+#define ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
+#define ICC_IAR1_EL1			sys_reg(3, 0, 12, 12, 0)
+#define ICC_SGI1R_EL1			sys_reg(3, 0, 12, 11, 5)
+#define ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
+
+#define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
+
+/*
+ * System register definitions
+ */
+#define ICH_VSEIR_EL2			sys_reg(3, 4, 12, 9, 4)
+#define ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
+#define ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
+#define ICH_MISR_EL2			sys_reg(3, 4, 12, 11, 2)
+#define ICH_EISR_EL2			sys_reg(3, 4, 12, 11, 3)
+#define ICH_ELSR_EL2			sys_reg(3, 4, 12, 11, 5)
+#define ICH_VMCR_EL2			sys_reg(3, 4, 12, 11, 7)
+
+#define __LR0_EL2(x)			sys_reg(3, 4, 12, 12, x)
+#define __LR8_EL2(x)			sys_reg(3, 4, 12, 13, x)
+
+#define ICH_LR0_EL2			__LR0_EL2(0)
+#define ICH_LR1_EL2			__LR0_EL2(1)
+#define ICH_LR2_EL2			__LR0_EL2(2)
+#define ICH_LR3_EL2			__LR0_EL2(3)
+#define ICH_LR4_EL2			__LR0_EL2(4)
+#define ICH_LR5_EL2			__LR0_EL2(5)
+#define ICH_LR6_EL2			__LR0_EL2(6)
+#define ICH_LR7_EL2			__LR0_EL2(7)
+#define ICH_LR8_EL2			__LR8_EL2(0)
+#define ICH_LR9_EL2			__LR8_EL2(1)
+#define ICH_LR10_EL2			__LR8_EL2(2)
+#define ICH_LR11_EL2			__LR8_EL2(3)
+#define ICH_LR12_EL2			__LR8_EL2(4)
+#define ICH_LR13_EL2			__LR8_EL2(5)
+#define ICH_LR14_EL2			__LR8_EL2(6)
+#define ICH_LR15_EL2			__LR8_EL2(7)
+
+#define __AP0Rx_EL2(x)			sys_reg(3, 4, 12, 8, x)
+#define ICH_AP0R0_EL2			__AP0Rx_EL2(0)
+#define ICH_AP0R1_EL2			__AP0Rx_EL2(1)
+#define ICH_AP0R2_EL2			__AP0Rx_EL2(2)
+#define ICH_AP0R3_EL2			__AP0Rx_EL2(3)
+
+#define __AP1Rx_EL2(x)			sys_reg(3, 4, 12, 9, x)
+#define ICH_AP1R0_EL2			__AP1Rx_EL2(0)
+#define ICH_AP1R1_EL2			__AP1Rx_EL2(1)
+#define ICH_AP1R2_EL2			__AP1Rx_EL2(2)
+#define ICH_AP1R3_EL2			__AP1Rx_EL2(3)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stringify.h>
+
+/*
+ * Low-level accessors
+ *
+ * These system registers are 32 bits, but we make sure that the compiler
+ * sets the GP register's most significant bits to 0 with an explicit cast.
+ */
+
+static inline void gic_write_eoir(u32 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" ((u64)irq));
+	isb();
+}
+
+static inline void gic_write_dir(u32 irq)
+{
+	asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" ((u64)irq));
+	isb();
+}
+
+static inline u64 gic_read_iar_common(void)
+{
+	u64 irqstat;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat));
+	return irqstat;
+}
+
+/*
+ * Cavium ThunderX erratum 23154
+ *
+ * The gicv3 of ThunderX requires a modified version for reading the
+ * IAR status to ensure data synchronization (access to icc_iar1_el1
+ * is not sync'ed before and after).
+ */
+static inline u64 gic_read_iar_cavium_thunderx(void)
+{
+	u64 irqstat;
+
+	asm volatile(
+		"nop;nop;nop;nop\n\t"
+		"nop;nop;nop;nop\n\t"
+		"mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t"
+		"nop;nop;nop;nop"
+		: "=r" (irqstat));
+	mb();
+
+	return irqstat;
+}
+
+static inline void gic_write_pmr(u32 val)
+{
+	asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" ((u64)val));
+}
+
+static inline void gic_write_ctlr(u32 val)
+{
+	asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" ((u64)val));
+	isb();
+}
+
+static inline void gic_write_grpen1(u32 val)
+{
+	asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" ((u64)val));
+	isb();
+}
+
+static inline void gic_write_sgi1r(u64 val)
+{
+	asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val));
+}
+
+static inline u32 gic_read_sre(void)
+{
+	u64 val;
+
+	asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val));
+	return val;
+}
+
+static inline void gic_write_sre(u32 val)
+{
+	asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((u64)val));
+	isb();
+}
+
+#define gic_read_typer(c)		readq_relaxed(c)
+#define gic_write_irouter(v, c)		writeq_relaxed(v, c)
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_ARCH_GICV3_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b51f2cc22ca9..12eff928ef8b 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -193,4 +193,15 @@ lr	.req	x30		// link register
 	str	\src, [\tmp, :lo12:\sym]
 	.endm
 
+/*
+ * Annotate a function as position independent, i.e., safe to be called before
+ * the kernel virtual mapping is activated.
+ */
+#define ENDPIPROC(x)			\
+	.globl	__pi_##x;		\
+	.type 	__pi_##x, %function;	\
+	.set	__pi_##x, x;		\
+	.size	__pi_##x, . - x;	\
+	ENDPROC(x)
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 35a67783cfa0..f3a3586a421c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -54,14 +54,43 @@
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)			READ_ONCE((v)->counter)
-#define atomic_set(v, i)		(((v)->counter) = (i))
+#define atomic_set(v, i)		WRITE_ONCE(((v)->counter), (i))
+
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_add_return_acquire	atomic_add_return_acquire
+#define atomic_add_return_release	atomic_add_return_release
+#define atomic_add_return		atomic_add_return
+
+#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_sub_return_acquire	atomic_sub_return_acquire
+#define atomic_sub_return_release	atomic_sub_return_release
+#define atomic_sub_return		atomic_sub_return
+
+#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+
+#define atomic_xchg_relaxed(v, new)	xchg_relaxed(&((v)->counter), (new))
+#define atomic_xchg_acquire(v, new)	xchg_acquire(&((v)->counter), (new))
+#define atomic_xchg_release(v, new)	xchg_release(&((v)->counter), (new))
 #define atomic_xchg(v, new)		xchg(&((v)->counter), (new))
+
+#define atomic_cmpxchg_relaxed(v, old, new)				\
+	cmpxchg_relaxed(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_acquire(v, old, new)				\
+	cmpxchg_acquire(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_release(v, old, new)				\
+	cmpxchg_release(&((v)->counter), (old), (new))
 #define atomic_cmpxchg(v, old, new)	cmpxchg(&((v)->counter), (old), (new))
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
@@ -75,13 +104,39 @@
 #define ATOMIC64_INIT			ATOMIC_INIT
 #define atomic64_read			atomic_read
 #define atomic64_set			atomic_set
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_add_return_acquire	atomic64_add_return_acquire
+#define atomic64_add_return_release	atomic64_add_return_release
+#define atomic64_add_return		atomic64_add_return
+
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_sub_return_acquire	atomic64_sub_return_acquire
+#define atomic64_sub_return_release	atomic64_sub_return_release
+#define atomic64_sub_return		atomic64_sub_return
+
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+
+#define atomic64_xchg_relaxed		atomic_xchg_relaxed
+#define atomic64_xchg_acquire		atomic_xchg_acquire
+#define atomic64_xchg_release		atomic_xchg_release
 #define atomic64_xchg			atomic_xchg
+
+#define atomic64_cmpxchg_relaxed	atomic_cmpxchg_relaxed
+#define atomic64_cmpxchg_acquire	atomic_cmpxchg_acquire
+#define atomic64_cmpxchg_release	atomic_cmpxchg_release
 #define atomic64_cmpxchg		atomic_cmpxchg
 
 #define atomic64_inc(v)			atomic64_add(1, (v))
 #define atomic64_dec(v)			atomic64_sub(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
 #define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index b3b5c4ae3800..74d0b8eb0799 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))				\
 }									\
 __LL_SC_EXPORT(atomic_##op);
 
-#define ATOMIC_OP_RETURN(op, asm_op)					\
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
 __LL_SC_INLINE int							\
-__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))		\
+__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
-	asm volatile("// atomic_" #op "_return\n"			\
+	asm volatile("// atomic_" #op "_return" #name "\n"		\
 "	prfm	pstl1strm, %2\n"					\
-"1:	ldxr	%w0, %2\n"						\
+"1:	ld" #acq "xr	%w0, %2\n"					\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
-"	stlxr	%w1, %w0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	st" #rel "xr	%w1, %w0, %2\n"					\
+"	cbnz	%w1, 1b\n"						\
+"	" #mb								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: "Ir" (i)							\
-	: "memory");							\
+	: cl);								\
 									\
-	smp_mb();							\
 	return result;							\
 }									\
-__LL_SC_EXPORT(atomic_##op##_return);
+__LL_SC_EXPORT(atomic_##op##_return##name);
+
+#define ATOMIC_OPS(...)							\
+	ATOMIC_OP(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS_RLX(...)						\
+	ATOMIC_OPS(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS_RLX(add, add)
+ATOMIC_OPS_RLX(sub, sub)
 
 ATOMIC_OP(and, and)
 ATOMIC_OP(andnot, bic)
 ATOMIC_OP(or, orr)
 ATOMIC_OP(xor, eor)
 
+#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))			\
 }									\
 __LL_SC_EXPORT(atomic64_##op);
 
-#define ATOMIC64_OP_RETURN(op, asm_op)					\
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
 __LL_SC_INLINE long							\
-__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))		\
+__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))	\
 {									\
 	long result;							\
 	unsigned long tmp;						\
 									\
-	asm volatile("// atomic64_" #op "_return\n"			\
+	asm volatile("// atomic64_" #op "_return" #name "\n"		\
 "	prfm	pstl1strm, %2\n"					\
-"1:	ldxr	%0, %2\n"						\
+"1:	ld" #acq "xr	%0, %2\n"					\
 "	" #asm_op "	%0, %0, %3\n"					\
-"	stlxr	%w1, %0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	st" #rel "xr	%w1, %0, %2\n"					\
+"	cbnz	%w1, 1b\n"						\
+"	" #mb								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: "Ir" (i)							\
-	: "memory");							\
+	: cl);								\
 									\
-	smp_mb();							\
 	return result;							\
 }									\
-__LL_SC_EXPORT(atomic64_##op##_return);
+__LL_SC_EXPORT(atomic64_##op##_return##name);
+
+#define ATOMIC64_OPS(...)						\
+	ATOMIC64_OP(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC64_OPS(op, asm_op)					\
-	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS_RLX(...)						\
+	ATOMIC64_OPS(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS_RLX(add, add)
+ATOMIC64_OPS_RLX(sub, sub)
 
 ATOMIC64_OP(and, and)
 ATOMIC64_OP(andnot, bic)
 ATOMIC64_OP(or, orr)
 ATOMIC64_OP(xor, eor)
 
+#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
-#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl)			\
+#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl)			\
 __LL_SC_INLINE unsigned long						\
 __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 				     unsigned long old,			\
@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 									\
 	asm volatile(							\
 	"	prfm	pstl1strm, %[v]\n"				\
-	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
+	"1:	ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n"		\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
 	"	cbnz	%" #w "[tmp], 2f\n"				\
 	"	st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"	\
@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 }									\
 __LL_SC_EXPORT(__cmpxchg_case_##name);
 
-__CMPXCHG_CASE(w, b,    1,        ,  ,         )
-__CMPXCHG_CASE(w, h,    2,        ,  ,         )
-__CMPXCHG_CASE(w,  ,    4,        ,  ,         )
-__CMPXCHG_CASE( ,  ,    8,        ,  ,         )
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
-__CMPXCHG_CASE(w,  , mb_4, dmb ish, l, "memory")
-__CMPXCHG_CASE( ,  , mb_8, dmb ish, l, "memory")
+__CMPXCHG_CASE(w, b,     1,        ,  ,  ,         )
+__CMPXCHG_CASE(w, h,     2,        ,  ,  ,         )
+__CMPXCHG_CASE(w,  ,     4,        ,  ,  ,         )
+__CMPXCHG_CASE( ,  ,     8,        ,  ,  ,         )
+__CMPXCHG_CASE(w, b, acq_1,        , a,  , "memory")
+__CMPXCHG_CASE(w, h, acq_2,        , a,  , "memory")
+__CMPXCHG_CASE(w,  , acq_4,        , a,  , "memory")
+__CMPXCHG_CASE( ,  , acq_8,        , a,  , "memory")
+__CMPXCHG_CASE(w, b, rel_1,        ,  , l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,        ,  , l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,        ,  , l, "memory")
+__CMPXCHG_CASE( ,  , rel_8,        ,  , l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, dmb ish,  , l, "memory")
+__CMPXCHG_CASE( ,  ,  mb_8, dmb ish,  , l, "memory")
 
 #undef __CMPXCHG_CASE
 
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 55d740e63459..1fce7908e690 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
 	: "x30");
 }
 
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
+static inline int atomic_add_return##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(add_return##name),				\
+	/* LSE atomics */						\
+	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	"	add	%w[i], %w[i], w30")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return w0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(add_return),
-	/* LSE atomics */
-	"	ldaddal	%w[i], w30, %[v]\n"
-	"	add	%w[i], %w[i], w30")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC_OP_ADD_RETURN(_relaxed,   )
+ATOMIC_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC_OP_ADD_RETURN(        , al, "memory")
 
-	return w0;
-}
+#undef ATOMIC_OP_ADD_RETURN
 
 static inline void atomic_and(int i, atomic_t *v)
 {
@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
 	: "x30");
 }
 
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(sub_return)
-	"	nop",
-	/* LSE atomics */
-	"	neg	%w[i], %w[i]\n"
-	"	ldaddal	%w[i], w30, %[v]\n"
-	"	add	%w[i], %w[i], w30")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
-
-	return w0;
+#define ATOMIC_OP_SUB_RETURN(name, mb, cl...)				\
+static inline int atomic_sub_return##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(sub_return##name)				\
+	"	nop",							\
+	/* LSE atomics */						\
+	"	neg	%w[i], %w[i]\n"					\
+	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	"	add	%w[i], %w[i], w30")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return w0;							\
 }
 
+ATOMIC_OP_SUB_RETURN(_relaxed,   )
+ATOMIC_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC_OP_SUB_RETURN(        , al, "memory")
+
+#undef ATOMIC_OP_SUB_RETURN
 #undef __LL_SC_ATOMIC
 
 #define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
 	: "x30");
 }
 
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
+static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(add_return##name),				\
+	/* LSE atomics */						\
+	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	"	add	%[i], %[i], x30")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return x0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(add_return),
-	/* LSE atomics */
-	"	ldaddal	%[i], x30, %[v]\n"
-	"	add	%[i], %[i], x30")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC64_OP_ADD_RETURN(_relaxed,   )
+ATOMIC64_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 
-	return x0;
-}
+#undef ATOMIC64_OP_ADD_RETURN
 
 static inline void atomic64_and(long i, atomic64_t *v)
 {
@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 	: "x30");
 }
 
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)				\
+static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(sub_return##name)				\
+	"	nop",							\
+	/* LSE atomics */						\
+	"	neg	%[i], %[i]\n"					\
+	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	"	add	%[i], %[i], x30")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return x0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(sub_return)
-	"	nop",
-	/* LSE atomics */
-	"	neg	%[i], %[i]\n"
-	"	ldaddal	%[i], x30, %[v]\n"
-	"	add	%[i], %[i], x30")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC64_OP_SUB_RETURN(_relaxed,   )
+ATOMIC64_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
-	return x0;
-}
+#undef ATOMIC64_OP_SUB_RETURN
 
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 	return x0;							\
 }
 
-__CMPXCHG_CASE(w, b,    1,   )
-__CMPXCHG_CASE(w, h,    2,   )
-__CMPXCHG_CASE(w,  ,    4,   )
-__CMPXCHG_CASE(x,  ,    8,   )
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w,  , mb_4, al, "memory")
-__CMPXCHG_CASE(x,  , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b,     1,   )
+__CMPXCHG_CASE(w, h,     2,   )
+__CMPXCHG_CASE(w,  ,     4,   )
+__CMPXCHG_CASE(x,  ,     8,   )
+__CMPXCHG_CASE(w, b, acq_1,  a, "memory")
+__CMPXCHG_CASE(w, h, acq_2,  a, "memory")
+__CMPXCHG_CASE(w,  , acq_4,  a, "memory")
+__CMPXCHG_CASE(x,  , acq_8,  a, "memory")
+__CMPXCHG_CASE(w, b, rel_1,  l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,  l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,  l, "memory")
+__CMPXCHG_CASE(x,  , rel_8,  l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, al, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, al, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, al, "memory")
+__CMPXCHG_CASE(x,  ,  mb_8, al, "memory")
 
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index bde449936e2f..5082b30bc2c0 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -18,7 +18,7 @@
 
 #include <asm/cachetype.h>
 
-#define L1_CACHE_SHIFT		6
+#define L1_CACHE_SHIFT		7
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
 /*
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c75b8d027eb1..54efedaf331f 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+static inline void __local_flush_icache_all(void)
+{
+	asm("ic iallu");
+	dsb(nsh);
+	isb();
+}
+
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index da2fc9e3cedd..f5588692f1d4 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -34,8 +34,8 @@
 
 #define CTR_L1IP(ctr)	(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
-#define ICACHEF_ALIASING	BIT(0)
-#define ICACHEF_AIVIVT		BIT(1)
+#define ICACHEF_ALIASING	0
+#define ICACHEF_AIVIVT		1
 
 extern unsigned long __icache_flags;
 
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 899e9f1d19e4..9ea611ea69df 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -25,154 +25,151 @@
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
-{
-	unsigned long ret, tmp;
-
-	switch (size) {
-	case 1:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxrb	%w0, %2\n"
-		"	stlxrb	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpalb	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 2:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxrh	%w0, %2\n"
-		"	stlxrh	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpalh	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 4:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxr	%w0, %2\n"
-		"	stlxr	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpal	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 8:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxr	%0, %2\n"
-		"	stlxr	%w1, %3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpal	%3, %0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	default:
-		BUILD_BUG();
-	}
-
-	return ret;
+/*
+ * We need separate acquire parameters for ll/sc and lse, since the full
+ * barrier case is generated as release+dmb for the former and
+ * acquire+release for the latter.
+ */
+#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl)	\
+static inline unsigned long __xchg_case_##name(unsigned long x,		\
+					       volatile void *ptr)	\
+{									\
+	unsigned long ret, tmp;						\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	prfm	pstl1strm, %2\n"				\
+	"1:	ld" #acq "xr" #sz "\t%" #w "0, %2\n"			\
+	"	st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n"		\
+	"	cbnz	%w1, 1b\n"					\
+	"	" #mb,							\
+	/* LSE atomics */						\
+	"	nop\n"							\
+	"	nop\n"							\
+	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
+	"	nop\n"							\
+	"	" #nop_lse)						\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "r" (x)							\
+	: cl);								\
+									\
+	return ret;							\
 }
 
-#define xchg(ptr,x) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
-	__ret; \
+__XCHG_CASE(w, b,     1,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     2,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     4,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_1,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_2,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_4,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_1,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_2,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_4,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_1, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_2, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_4, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_8, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
+#define __XCHG_GEN(sfx)							\
+static inline unsigned long __xchg##sfx(unsigned long x,		\
+					volatile void *ptr,		\
+					int size)			\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __xchg_case##sfx##_1(x, ptr);			\
+	case 2:								\
+		return __xchg_case##sfx##_2(x, ptr);			\
+	case 4:								\
+		return __xchg_case##sfx##_4(x, ptr);			\
+	case 8:								\
+		return __xchg_case##sfx##_8(x, ptr);			\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
+}
+
+__XCHG_GEN()
+__XCHG_GEN(_acq)
+__XCHG_GEN(_rel)
+__XCHG_GEN(_mb)
+
+#undef __XCHG_GEN
+
+#define __xchg_wrapper(sfx, ptr, x)					\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__ret = (__typeof__(*(ptr)))					\
+		__xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+	__ret;								\
 })
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_case_1(ptr, (u8)old, new);
-	case 2:
-		return __cmpxchg_case_2(ptr, (u16)old, new);
-	case 4:
-		return __cmpxchg_case_4(ptr, old, new);
-	case 8:
-		return __cmpxchg_case_8(ptr, old, new);
-	default:
-		BUILD_BUG();
-	}
-
-	unreachable();
+/* xchg */
+#define xchg_relaxed(...)	__xchg_wrapper(    , __VA_ARGS__)
+#define xchg_acquire(...)	__xchg_wrapper(_acq, __VA_ARGS__)
+#define xchg_release(...)	__xchg_wrapper(_rel, __VA_ARGS__)
+#define xchg(...)		__xchg_wrapper( _mb, __VA_ARGS__)
+
+#define __CMPXCHG_GEN(sfx)						\
+static inline unsigned long __cmpxchg##sfx(volatile void *ptr,		\
+					   unsigned long old,		\
+					   unsigned long new,		\
+					   int size)			\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __cmpxchg_case##sfx##_1(ptr, (u8)old, new);	\
+	case 2:								\
+		return __cmpxchg_case##sfx##_2(ptr, (u16)old, new);	\
+	case 4:								\
+		return __cmpxchg_case##sfx##_4(ptr, old, new);		\
+	case 8:								\
+		return __cmpxchg_case##sfx##_8(ptr, old, new);		\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
 }
 
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
-					 unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_case_mb_1(ptr, (u8)old, new);
-	case 2:
-		return __cmpxchg_case_mb_2(ptr, (u16)old, new);
-	case 4:
-		return __cmpxchg_case_mb_4(ptr, old, new);
-	case 8:
-		return __cmpxchg_case_mb_8(ptr, old, new);
-	default:
-		BUILD_BUG();
-	}
-
-	unreachable();
-}
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+__CMPXCHG_GEN(_rel)
+__CMPXCHG_GEN(_mb)
 
-#define cmpxchg(ptr, o, n) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
-			     sizeof(*(ptr))); \
-	__ret; \
-})
+#undef __CMPXCHG_GEN
 
-#define cmpxchg_local(ptr, o, n) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__cmpxchg((ptr), (unsigned long)(o), \
-			  (unsigned long)(n), sizeof(*(ptr))); \
-	__ret; \
+#define __cmpxchg_wrapper(sfx, ptr, o, n)				\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__ret = (__typeof__(*(ptr)))					\
+		__cmpxchg##sfx((ptr), (unsigned long)(o),		\
+				(unsigned long)(n), sizeof(*(ptr)));	\
+	__ret;								\
 })
 
+/* cmpxchg */
+#define cmpxchg_relaxed(...)	__cmpxchg_wrapper(    , __VA_ARGS__)
+#define cmpxchg_acquire(...)	__cmpxchg_wrapper(_acq, __VA_ARGS__)
+#define cmpxchg_release(...)	__cmpxchg_wrapper(_rel, __VA_ARGS__)
+#define cmpxchg(...)		__cmpxchg_wrapper( _mb, __VA_ARGS__)
+#define cmpxchg_local		cmpxchg_relaxed
+
+/* cmpxchg64 */
+#define cmpxchg64_relaxed	cmpxchg_relaxed
+#define cmpxchg64_acquire	cmpxchg_acquire
+#define cmpxchg64_release	cmpxchg_release
+#define cmpxchg64		cmpxchg
+#define cmpxchg64_local		cmpxchg_local
+
+/* cmpxchg_double */
 #define system_has_cmpxchg_double()     1
 
 #define __cmpxchg_double_check(ptr1, ptr2)					\
@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret; \
 })
 
+/* this_cpu_cmpxchg */
 #define _protect_cmpxchg_local(pcp, o, n)			\
 ({								\
 	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret;								\
 })
 
-#define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
-#define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
-
-#define cmpxchg64_relaxed(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
-
 #endif	/* __ASM_CMPXCHG_H */
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 8e797b2fcc01..b5e9cee4b5f8 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
 
+void __init init_cpu_features(struct cpuinfo_arm64 *info);
+void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+				 struct cpuinfo_arm64 *boot);
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 171570702bb8..11d5bb0fdd54 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,6 +10,7 @@
 #define __ASM_CPUFEATURE_H
 
 #include <asm/hwcap.h>
+#include <asm/sysreg.h>
 
 /*
  * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
@@ -27,18 +28,50 @@
 #define ARM64_HAS_SYSREG_GIC_CPUIF		3
 #define ARM64_HAS_PAN				4
 #define ARM64_HAS_LSE_ATOMICS			5
+#define ARM64_WORKAROUND_CAVIUM_23154		6
 
-#define ARM64_NCAPS				6
+#define ARM64_NCAPS				7
 
 #ifndef __ASSEMBLY__
 
 #include <linux/kernel.h>
 
+/* CPU feature register tracking */
+enum ftr_type {
+	FTR_EXACT,	/* Use a predefined safe value */
+	FTR_LOWER_SAFE,	/* Smaller value is safe */
+	FTR_HIGHER_SAFE,/* Bigger value is safe */
+};
+
+#define FTR_STRICT	true	/* SANITY check strict matching required */
+#define FTR_NONSTRICT	false	/* SANITY check ignored */
+
+struct arm64_ftr_bits {
+	bool		strict;	  /* CPU Sanity check: strict matching required ? */
+	enum ftr_type	type;
+	u8		shift;
+	u8		width;
+	s64		safe_val; /* safe value for discrete features */
+};
+
+/*
+ * @arm64_ftr_reg - Feature register
+ * @strict_mask		Bits which should match across all CPUs for sanity.
+ * @sys_val		Safe value across the CPUs (system view)
+ */
+struct arm64_ftr_reg {
+	u32			sys_id;
+	const char		*name;
+	u64			strict_mask;
+	u64			sys_val;
+	struct arm64_ftr_bits	*ftr_bits;
+};
+
 struct arm64_cpu_capabilities {
 	const char *desc;
 	u16 capability;
 	bool (*matches)(const struct arm64_cpu_capabilities *);
-	void (*enable)(void);
+	void (*enable)(void *);		/* Called on all active CPUs */
 	union {
 		struct {	/* To be used for erratum handling only */
 			u32 midr_model;
@@ -46,8 +79,11 @@ struct arm64_cpu_capabilities {
 		};
 
 		struct {	/* Feature register checking */
+			u32 sys_reg;
 			int field_pos;
 			int min_field_value;
+			int hwcap_type;
+			unsigned long hwcap;
 		};
 	};
 };
@@ -75,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num)
 		__set_bit(num, cpu_hwcaps);
 }
 
-static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
-								  int field)
+static inline int __attribute_const__
+cpuid_feature_extract_field_width(u64 features, int field, int width)
+{
+	return (s64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field)
+{
+	return cpuid_feature_extract_field_width(features, field, 4);
+}
+
+static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+{
+	return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
+}
+
+static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+{
+	return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+}
+
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 {
-	return (s64)(features << (64 - 4 - field)) >> (64 - 4);
+	return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+		cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
 }
 
+void __init setup_cpu_features(void);
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void check_local_cpu_errata(void);
-void check_local_cpu_features(void);
-bool cpu_supports_mixed_endian_el0(void);
-bool system_supports_mixed_endian_el0(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void verify_local_cpu_capabilities(void);
+#else
+static inline void verify_local_cpu_capabilities(void)
+{
+}
+#endif
+
+u64 read_system_reg(u32 id);
+
+static inline bool cpu_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+static inline bool system_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
+}
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ee6403df9fe4..1a5949364ed0 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -62,24 +62,18 @@
 	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
 	((partnum)		<< MIDR_PARTNUM_SHIFT))
 
-#define ARM_CPU_IMP_ARM		0x41
-#define ARM_CPU_IMP_APM		0x50
+#define ARM_CPU_IMP_ARM			0x41
+#define ARM_CPU_IMP_APM			0x50
+#define ARM_CPU_IMP_CAVIUM		0x43
 
-#define ARM_CPU_PART_AEM_V8	0xD0F
-#define ARM_CPU_PART_FOUNDATION	0xD00
-#define ARM_CPU_PART_CORTEX_A57	0xD07
-#define ARM_CPU_PART_CORTEX_A53	0xD03
+#define ARM_CPU_PART_AEM_V8		0xD0F
+#define ARM_CPU_PART_FOUNDATION		0xD00
+#define ARM_CPU_PART_CORTEX_A57		0xD07
+#define ARM_CPU_PART_CORTEX_A53		0xD03
 
-#define APM_CPU_PART_POTENZA	0x000
+#define APM_CPU_PART_POTENZA		0x000
 
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
-#define ID_AA64MMFR0_BIGENDEL0_MASK	(0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGENDEL0(mmfr0)	\
-	(((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGEND_SHIFT	8
-#define ID_AA64MMFR0_BIGEND_MASK	(0xf << ID_AA64MMFR0_BIGEND_SHIFT)
-#define ID_AA64MMFR0_BIGEND(mmfr0)	\
-	(((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
+#define CAVIUM_CPU_PART_THUNDERX	0x0A1
 
 #ifndef __ASSEMBLY__
 
@@ -112,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
 	return read_cpuid(CTR_EL0);
 }
-
-static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-{
-	return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
-		(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
-}
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h
new file mode 100644
index 000000000000..65e0190e97c8
--- /dev/null
+++ b/arch/arm64/include/asm/dcc.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2014-2015 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * A call to __dcc_getchar() or __dcc_putchar() is typically followed by
+ * a call to __dcc_getstatus().  We want to make sure that the CPU does
+ * not speculative read the DCC status before executing the read or write
+ * instruction.  That's what the ISBs are for.
+ *
+ * The 'volatile' ensures that the compiler does not cache the status bits,
+ * and instead reads the DCC register every time.
+ */
+#ifndef __ASM_DCC_H
+#define __ASM_DCC_H
+
+#include <asm/barrier.h>
+
+static inline u32 __dcc_getstatus(void)
+{
+	u32 ret;
+
+	asm volatile("mrs %0, mdccsr_el0" : "=r" (ret));
+
+	return ret;
+}
+
+static inline char __dcc_getchar(void)
+{
+	char c;
+
+	asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c));
+	isb();
+
+	return c;
+}
+
+static inline void __dcc_putchar(char c)
+{
+	/*
+	 * The typecast is to make absolutely certain that 'c' is
+	 * zero-extended.
+	 */
+	asm volatile("msr dbgdtrtx_el0, %0"
+			: : "r" ((unsigned long)(unsigned char)c));
+	isb();
+}
+
+#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index cfdb34bedbcd..54d0ead41afc 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -54,16 +54,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 		return __generic_dma_ops(dev);
 }
 
-static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-				      struct iommu_ops *iommu, bool coherent)
-{
-	if (!acpi_disabled && !dev->archdata.dma_ops)
-		dev->archdata.dma_ops = dma_ops;
-
-	dev->archdata.dma_coherent = coherent;
-}
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops	arch_setup_dma_ops
 
+#ifdef CONFIG_IOMMU_DMA
+void arch_teardown_dma_ops(struct device *dev);
+#define arch_teardown_dma_ops	arch_teardown_dma_ops
+#endif
+
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 8b9884c726ad..309704544d22 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -17,6 +17,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
+#include <linux/sizes.h>
 #include <asm/boot.h>
 #include <asm/page.h>
 
@@ -55,11 +56,7 @@ enum fixed_addresses {
 	 * Temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
 	 */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define NR_FIX_BTMAPS		4
-#else
-#define NR_FIX_BTMAPS		64
-#endif
+#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
 #define FIX_BTMAPS_SLOTS	7
 #define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 4c47cb2fbb52..e54415ec6935 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -17,6 +17,7 @@
 #define __ASM_HW_BREAKPOINT_H
 
 #include <asm/cputype.h>
+#include <asm/cpufeature.h>
 
 #ifdef __KERNEL__
 
@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+	return 1 +
+		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_BRPS_SHIFT);
 }
 
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+	return 1 +
+		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_WRPS_SHIFT);
 }
 
 #endif	/* __KERNEL__ */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 0ad735166d9f..400b80b49595 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -52,6 +52,14 @@
 extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
 #endif
 
+enum {
+	CAP_HWCAP = 1,
+#ifdef CONFIG_COMPAT
+	CAP_COMPAT_HWCAP,
+	CAP_COMPAT_HWCAP2,
+#endif
+};
+
 extern unsigned long elf_hwcap;
 #endif
 #endif
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746..23eb450b820b 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,24 +1,10 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
-#include <linux/irqchip/arm-gic-acpi.h>
-
 #include <asm-generic/irq.h>
 
 struct pt_regs;
 
-extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
-static inline void acpi_irq_init(void)
-{
-	/*
-	 * Hardcode ACPI IRQ chip initialization to GICv2 for now.
-	 * Proper irqchip infrastructure will be implemented along with
-	 * incoming  GICv2m|GICv3|ITS bits.
-	 */
-	acpi_gic_init();
-}
-#define acpi_irq_init acpi_irq_init
-
 #endif
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
new file mode 100644
index 000000000000..2774fa384c47
--- /dev/null
+++ b/arch/arm64/include/asm/kasan.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_KASAN
+
+#include <linux/linkage.h>
+#include <asm/memory.h>
+
+/*
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
+ */
+#define KASAN_SHADOW_START      (VA_START)
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+
+/*
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ *     shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
+ * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
+ * should satisfy the following equation:
+ *      KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
+ */
+#define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644
index 000000000000..a459714ee29e
--- /dev/null
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -0,0 +1,83 @@
+/*
+ * Kernel page table mapping
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KERNEL_PGTABLE_H
+#define __ASM_KERNEL_PGTABLE_H
+
+
+/*
+ * The linear mapping and the start of memory are both 2M aligned (per
+ * the arm64 booting.txt requirements). Hence we can use section mapping
+ * with 4K (section size = 2M) but not with 16K (section size = 32M) or
+ * 64K (section size = 512M).
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#else
+#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#endif
+
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
+ * map the kernel. With the 64K page configuration, swapper and idmap need to
+ * map to pte level. The swapper also maps the FDT (see __create_page_tables
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so pages required to map highest possible PA are reserved in all
+ * cases.
+ */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
+#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
+#else
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
+#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
+#endif
+
+#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* Initial memory map size */
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
+#define SWAPPER_BLOCK_SIZE	SECTION_SIZE
+#define SWAPPER_TABLE_SHIFT	PUD_SHIFT
+#else
+#define SWAPPER_BLOCK_SHIFT	PAGE_SHIFT
+#define SWAPPER_BLOCK_SIZE	PAGE_SIZE
+#define SWAPPER_TABLE_SHIFT	PMD_SHIFT
+#endif
+
+/* The size of the initial kernel direct mapping */
+#define SWAPPER_INIT_MAP_SIZE	(_AC(1, UL) << SWAPPER_TABLE_SHIFT)
+
+/*
+ * Initial memory map attributes.
+ */
+#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#if ARM64_SWAPPER_USES_SECTION_MAPS
+#define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#else
+#define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#endif
+
+
+#endif	/* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 9694f2654593..5e6857b6bdc4 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -200,4 +200,20 @@
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK	(~UL(0xf))
 
+#define kvm_arm_exception_type	\
+	{0, "IRQ" }, 		\
+	{1, "TRAP" }
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+	ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+	ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
+	ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
+	ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+	ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+	ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ed039688c221..a35ce7266aac 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -149,7 +149,10 @@ struct kvm_vcpu_arch {
 		u32	mdscr_el1;
 	} guest_debug_preserved;
 
-	/* Don't run the guest */
+	/* vcpu power-off state */
+	bool power_off;
+
+	/* Don't run the guest (internal implementation need) */
 	bool pause;
 
 	/* IO related fields */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 6b4c3ad75a2a..853953cd1f08 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -42,12 +42,14 @@
  * PAGE_OFFSET - the virtual address of the start of the kernel image (top
  *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
+ * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
+#define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END		(PAGE_OFFSET)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
@@ -68,10 +70,6 @@
 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
-#if TASK_SIZE_64 > MODULES_VADDR
-#error Top of 64-bit user space clashes with start of module space
-#endif
-
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
@@ -94,6 +92,7 @@
 #define MT_DEVICE_GRE		2
 #define MT_NORMAL_NC		3
 #define MT_NORMAL		4
+#define MT_NORMAL_WT		5
 
 /*
  * Memory types for Stage-2 translation
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 030208767185..990124a67eeb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,15 +17,16 @@
 #define __ASM_MMU_H
 
 typedef struct {
-	unsigned int id;
-	raw_spinlock_t id_lock;
-	void *vdso;
+	atomic64_t	id;
+	void		*vdso;
 } mm_context_t;
 
-#define INIT_MM_CONTEXT(name) \
-	.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
-
-#define ASID(mm)	((mm)->context.id & 0xffff)
+/*
+ * This macro is only used by the TLBI code, which cannot race with an
+ * ASID change and therefore doesn't need to reload the counter using
+ * atomic64_read.
+ */
+#define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8ec41e5f56f0..c0e87898ba96 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -28,13 +28,6 @@
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
 
-#define MAX_ASID_BITS	16
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
 {
@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void)
 		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
-static inline void __cpu_set_tcr_t0sz(u64 t0sz)
-{
-	unsigned long tcr;
-
-	if (__cpu_uses_extended_idmap())
-		asm volatile (
-		"	mrs	%0, tcr_el1	;"
-		"	bfi	%0, %1, %2, %3	;"
-		"	msr	tcr_el1, %0	;"
-		"	isb"
-		: "=&r" (tcr)
-		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
-}
-
-/*
- * Set TCR.T0SZ to the value appropriate for activating the identity map.
- */
-static inline void cpu_set_idmap_tcr_t0sz(void)
-{
-	__cpu_set_tcr_t0sz(idmap_t0sz);
-}
-
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
 static inline void cpu_set_default_tcr_t0sz(void)
 {
-	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
-}
-
-static inline void switch_new_context(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	__new_context(mm);
+	unsigned long tcr;
 
-	local_irq_save(flags);
-	cpu_switch_mm(mm->pgd, mm);
-	local_irq_restore(flags);
-}
+	if (!__cpu_uses_extended_idmap())
+		return;
 
-static inline void check_and_switch_context(struct mm_struct *mm,
-					    struct task_struct *tsk)
-{
-	/*
-	 * Required during context switch to avoid speculative page table
-	 * walking with the wrong TTBR.
-	 */
-	cpu_set_reserved_ttbr0();
-
-	if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
-		/*
-		 * The ASID is from the current generation, just switch to the
-		 * new pgd. This condition is only true for calls from
-		 * context_switch() and interrupts are already disabled.
-		 */
-		cpu_switch_mm(mm->pgd, mm);
-	else if (irqs_disabled())
-		/*
-		 * Defer the new ASID allocation until after the context
-		 * switch critical region since __new_context() cannot be
-		 * called with interrupts disabled.
-		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
-	else
-		/*
-		 * That is a direct call to switch_mm() or activate_mm() with
-		 * interrupts enabled and a new context.
-		 */
-		switch_new_context(mm);
+	asm volatile (
+	"	mrs	%0, tcr_el1	;"
+	"	bfi	%0, %1, %2, %3	;"
+	"	msr	tcr_el1, %0	;"
+	"	isb"
+	: "=&r" (tcr)
+	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
-#define init_new_context(tsk,mm)	(__init_new_context(tsk,mm),0)
+/*
+ * It would be nice to return ASIDs back to the allocator, but unfortunately
+ * that introduces a race with a generation rollover where we could erroneously
+ * free an ASID allocated in a future generation. We could workaround this by
+ * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
+ * but we'd then need to make sure that we didn't dirty any TLBs afterwards.
+ * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
+ * take CPU migration into account.
+ */
 #define destroy_context(mm)		do { } while(0)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
-#define finish_arch_post_lock_switch \
-	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		unsigned long flags;
-
-		__new_context(mm);
-
-		local_irq_save(flags);
-		cpu_switch_mm(mm->pgd, mm);
-		local_irq_restore(flags);
-	}
-}
+#define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
+	if (prev == next)
+		return;
+
 	/*
 	 * init_mm.pgd does not contain any user mappings and it is always
 	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		return;
 	}
 
-	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
-		check_and_switch_context(next, tsk);
+	check_and_switch_context(next, cpu);
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7d9c7e4a424b..9b2f5a9d019d 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -20,31 +20,22 @@
 #define __ASM_PAGE_H
 
 /* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define PAGE_SHIFT		16
+#define CONT_SHIFT		5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define PAGE_SHIFT		14
+#define CONT_SHIFT		7
 #else
 #define PAGE_SHIFT		12
+#define CONT_SHIFT		4
 #endif
-#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so 3 pages are reserved in all cases.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
-#else
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
-#endif
-
-#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
+#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK		(~(CONT_SIZE-1))
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 76420568d66a..c15053902942 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -27,6 +27,7 @@
 #define check_pgt_cache()		do { } while (0)
 
 #define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 24154b055835..d6739e836f7b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -16,13 +16,46 @@
 #ifndef __ASM_PGTABLE_HWDEF_H
 #define __ASM_PGTABLE_HWDEF_H
 
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ *
+ *  levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ *
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
+ *
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
+ *
+ *	((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ *
+ * which gets simplified as :
+ */
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+
+/*
+ * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 4. Hence, starting at at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ *  ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ *   (4 - n) * (PAGE_SHIFT - 3) + 3
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)	((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+
 #define PTRS_PER_PTE		(1 << (PAGE_SHIFT - 3))
 
 /*
  * PMD_SHIFT determines the size a level 2 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 2
-#define PMD_SHIFT		((PAGE_SHIFT - 3) * 2 + 3)
+#define PMD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
 #define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK		(~(PMD_SIZE-1))
 #define PTRS_PER_PMD		PTRS_PER_PTE
@@ -32,7 +65,7 @@
  * PUD_SHIFT determines the size a level 1 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 3
-#define PUD_SHIFT		((PAGE_SHIFT - 3) * 3 + 3)
+#define PUD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
 #define PUD_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_MASK		(~(PUD_SIZE-1))
 #define PTRS_PER_PUD		PTRS_PER_PTE
@@ -42,7 +75,7 @@
  * PGDIR_SHIFT determines the size a top-level page table entry can map
  * (depending on the configuration, this level can be 0, 1 or 2).
  */
-#define PGDIR_SHIFT		((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
@@ -55,6 +88,13 @@
 #define SECTION_MASK		(~(SECTION_SIZE-1))
 
 /*
+ * Contiguous page definitions.
+ */
+#define CONT_PTES		(_AC(1, UL) << CONT_SHIFT)
+/* the the numerical offset of the PTE within a range of CONT_PTES */
+#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
+
+/*
  * Hardware page table definitions.
  *
  * Level 1 descriptor (PUD).
@@ -83,6 +123,7 @@
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_NG		(_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT		(_AT(pmdval_t, 1) << 52)
 #define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
 
@@ -105,6 +146,7 @@
 #define PTE_AF			(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_NG			(_AT(pteval_t, 1) << 11)	/* nG */
 #define PTE_DBM			(_AT(pteval_t, 1) << 51)	/* Dirty Bit Management */
+#define PTE_CONT		(_AT(pteval_t, 1) << 52)	/* Contiguous range */
 #define PTE_PXN			(_AT(pteval_t, 1) << 53)	/* Privileged XN */
 #define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b0329be95cb1..f3acf421ded4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -41,7 +41,14 @@
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-#define VMALLOC_START		(UL(0xffffffffffffffff) << VA_BITS)
+
+#ifndef CONFIG_KASAN
+#define VMALLOC_START		(VA_START)
+#else
+#include <asm/kasan.h>
+#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)
+#endif
+
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
@@ -60,8 +67,10 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
 #define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
@@ -72,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
 #define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
@@ -79,7 +89,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PAGE_S2			__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
 #define PAGE_S2_DEVICE		__pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
 
-#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
 #define PAGE_COPY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
@@ -140,6 +150,7 @@ extern struct page *empty_zero_page;
 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
+#define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -202,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
 	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
 }
 
+static inline pte_t pte_mkcont(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mknoncont(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
@@ -496,7 +517,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
-			      PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
+			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE;
 	/* preserve the hardware dirty information */
 	if (pte_hw_dirty(pte))
 		pte = pte_mkdirty(pte);
@@ -646,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 				    unsigned long addr, pte_t *ptep)
 {
 	/*
-	 * set_pte() does not have a DSB for user mappings, so make sure that
-	 * the page table write is visible.
+	 * We don't do anything here, so there's a very small chance of
+	 * us retaking a user fault which we just fixed up. The alternative
+	 * is doing a dsb(ishst), but that penalises the fastpath.
 	 */
-	dsb(ishst);
 }
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
+#define kc_vaddr_to_offset(v)	((v) & ~VA_START)
+#define kc_offset_to_vaddr(o)	((o) | VA_START)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
deleted file mode 100644
index b7710a59672c..000000000000
--- a/arch/arm64/include/asm/pmu.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Based on arch/arm/include/asm/pmu.h
- *
- * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
-	/*
-	 * The events that are active on the PMU for the given index.
-	 */
-	struct perf_event	**events;
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long           *used_mask;
-
-	/*
-	 * Hardware lock to serialize accesses to PMU registers. Needed for the
-	 * read/modify/write sequences.
-	 */
-	raw_spinlock_t		pmu_lock;
-};
-
-struct arm_pmu {
-	struct pmu		pmu;
-	cpumask_t		active_irqs;
-	int			*irq_affinity;
-	const char		*name;
-	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
-	void			(*enable)(struct hw_perf_event *evt, int idx);
-	void			(*disable)(struct hw_perf_event *evt, int idx);
-	int			(*get_event_idx)(struct pmu_hw_events *hw_events,
-						 struct hw_perf_event *hwc);
-	int			(*set_event_filter)(struct hw_perf_event *evt,
-						    struct perf_event_attr *attr);
-	u32			(*read_counter)(int idx);
-	void			(*write_counter)(int idx, u32 val);
-	void			(*start)(void);
-	void			(*stop)(void);
-	void			(*reset)(void *);
-	int			(*map_event)(struct perf_event *event);
-	int			num_events;
-	atomic_t		active_events;
-	struct mutex		reserve_mutex;
-	u64			max_period;
-	struct platform_device	*plat_device;
-	struct pmu_hw_events	*(*get_hw_events)(void);
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
-
-u64 armpmu_event_update(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx);
-
-int armpmu_event_set_period(struct perf_event *event,
-			    struct hw_perf_event *hwc,
-			    int idx);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-#endif /* __ASM_PMU_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 98f32355dc97..4acb7ca94fcd 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
 
 #endif
 
-void cpu_enable_pan(void);
+void cpu_enable_pan(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 536274ed292e..e9e5467e0bf4 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -83,14 +83,14 @@
 #define compat_sp	regs[13]
 #define compat_lr	regs[14]
 #define compat_sp_hyp	regs[15]
-#define compat_sp_irq	regs[16]
-#define compat_lr_irq	regs[17]
-#define compat_sp_svc	regs[18]
-#define compat_lr_svc	regs[19]
-#define compat_sp_abt	regs[20]
-#define compat_lr_abt	regs[21]
-#define compat_sp_und	regs[22]
-#define compat_lr_und	regs[23]
+#define compat_lr_irq	regs[16]
+#define compat_sp_irq	regs[17]
+#define compat_lr_svc	regs[18]
+#define compat_sp_svc	regs[19]
+#define compat_lr_abt	regs[20]
+#define compat_sp_abt	regs[21]
+#define compat_lr_und	regs[22]
+#define compat_sp_und	regs[23]
 #define compat_r8_fiq	regs[24]
 #define compat_r9_fiq	regs[25]
 #define compat_r10_fiq	regs[26]
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 64d2d4884a9d..2eb714c4639f 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *, int, __kernel_size_t);
+extern void *__memset(void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *, const void *, size_t);
 
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+#endif
+
 #endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a7f3d4b2514d..d48ab5b41f52 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,9 +22,6 @@
 
 #include <asm/opcodes.h>
 
-#define SCTLR_EL1_CP15BEN	(0x1 << 5)
-#define SCTLR_EL1_SED		(0x1 << 8)
-
 /*
  * ARMv8 ARM reserves the following encoding for system registers:
  * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -38,12 +35,162 @@
 #define sys_reg(op0, op1, crn, crm, op2) \
 	((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
 
-#define REG_PSTATE_PAN_IMM                     sys_reg(0, 0, 4, 0, 4)
-#define SCTLR_EL1_SPAN                         (1 << 23)
+#define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
+#define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
+#define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
+
+#define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
+#define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
+#define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
+#define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
+#define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
+#define SYS_ID_MMFR3_EL1		sys_reg(3, 0, 0, 1, 7)
+
+#define SYS_ID_ISAR0_EL1		sys_reg(3, 0, 0, 2, 0)
+#define SYS_ID_ISAR1_EL1		sys_reg(3, 0, 0, 2, 1)
+#define SYS_ID_ISAR2_EL1		sys_reg(3, 0, 0, 2, 2)
+#define SYS_ID_ISAR3_EL1		sys_reg(3, 0, 0, 2, 3)
+#define SYS_ID_ISAR4_EL1		sys_reg(3, 0, 0, 2, 4)
+#define SYS_ID_ISAR5_EL1		sys_reg(3, 0, 0, 2, 5)
+#define SYS_ID_MMFR4_EL1		sys_reg(3, 0, 0, 2, 6)
+
+#define SYS_MVFR0_EL1			sys_reg(3, 0, 0, 3, 0)
+#define SYS_MVFR1_EL1			sys_reg(3, 0, 0, 3, 1)
+#define SYS_MVFR2_EL1			sys_reg(3, 0, 0, 3, 2)
+
+#define SYS_ID_AA64PFR0_EL1		sys_reg(3, 0, 0, 4, 0)
+#define SYS_ID_AA64PFR1_EL1		sys_reg(3, 0, 0, 4, 1)
+
+#define SYS_ID_AA64DFR0_EL1		sys_reg(3, 0, 0, 5, 0)
+#define SYS_ID_AA64DFR1_EL1		sys_reg(3, 0, 0, 5, 1)
+
+#define SYS_ID_AA64ISAR0_EL1		sys_reg(3, 0, 0, 6, 0)
+#define SYS_ID_AA64ISAR1_EL1		sys_reg(3, 0, 0, 6, 1)
+
+#define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
+#define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
+
+#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+#define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
+#define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
+
+#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
+/* SCTLR_EL1 */
+#define SCTLR_EL1_CP15BEN	(0x1 << 5)
+#define SCTLR_EL1_SED		(0x1 << 8)
+#define SCTLR_EL1_SPAN		(0x1 << 23)
+
+
+/* id_aa64isar0 */
+#define ID_AA64ISAR0_RDM_SHIFT		28
+#define ID_AA64ISAR0_ATOMICS_SHIFT	20
+#define ID_AA64ISAR0_CRC32_SHIFT	16
+#define ID_AA64ISAR0_SHA2_SHIFT		12
+#define ID_AA64ISAR0_SHA1_SHIFT		8
+#define ID_AA64ISAR0_AES_SHIFT		4
+
+/* id_aa64pfr0 */
+#define ID_AA64PFR0_GIC_SHIFT		24
+#define ID_AA64PFR0_ASIMD_SHIFT		20
+#define ID_AA64PFR0_FP_SHIFT		16
+#define ID_AA64PFR0_EL3_SHIFT		12
+#define ID_AA64PFR0_EL2_SHIFT		8
+#define ID_AA64PFR0_EL1_SHIFT		4
+#define ID_AA64PFR0_EL0_SHIFT		0
+
+#define ID_AA64PFR0_FP_NI		0xf
+#define ID_AA64PFR0_FP_SUPPORTED	0x0
+#define ID_AA64PFR0_ASIMD_NI		0xf
+#define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
+#define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
+#define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
+
+/* id_aa64mmfr0 */
+#define ID_AA64MMFR0_TGRAN4_SHIFT	28
+#define ID_AA64MMFR0_TGRAN64_SHIFT	24
+#define ID_AA64MMFR0_TGRAN16_SHIFT	20
+#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
+#define ID_AA64MMFR0_SNSMEM_SHIFT	12
+#define ID_AA64MMFR0_BIGENDEL_SHIFT	8
+#define ID_AA64MMFR0_ASID_SHIFT		4
+#define ID_AA64MMFR0_PARANGE_SHIFT	0
+
+#define ID_AA64MMFR0_TGRAN4_NI		0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN64_NI		0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN16_NI		0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+
+/* id_aa64mmfr1 */
+#define ID_AA64MMFR1_PAN_SHIFT		20
+#define ID_AA64MMFR1_LOR_SHIFT		16
+#define ID_AA64MMFR1_HPD_SHIFT		12
+#define ID_AA64MMFR1_VHE_SHIFT		8
+#define ID_AA64MMFR1_VMIDBITS_SHIFT	4
+#define ID_AA64MMFR1_HADBS_SHIFT	0
+
+/* id_aa64dfr0 */
+#define ID_AA64DFR0_CTX_CMPS_SHIFT	28
+#define ID_AA64DFR0_WRPS_SHIFT		20
+#define ID_AA64DFR0_BRPS_SHIFT		12
+#define ID_AA64DFR0_PMUVER_SHIFT	8
+#define ID_AA64DFR0_TRACEVER_SHIFT	4
+#define ID_AA64DFR0_DEBUGVER_SHIFT	0
+
+#define ID_ISAR5_RDM_SHIFT		24
+#define ID_ISAR5_CRC32_SHIFT		16
+#define ID_ISAR5_SHA2_SHIFT		12
+#define ID_ISAR5_SHA1_SHIFT		8
+#define ID_ISAR5_AES_SHIFT		4
+#define ID_ISAR5_SEVL_SHIFT		0
+
+#define MVFR0_FPROUND_SHIFT		28
+#define MVFR0_FPSHVEC_SHIFT		24
+#define MVFR0_FPSQRT_SHIFT		20
+#define MVFR0_FPDIVIDE_SHIFT		16
+#define MVFR0_FPTRAP_SHIFT		12
+#define MVFR0_FPDP_SHIFT		8
+#define MVFR0_FPSP_SHIFT		4
+#define MVFR0_SIMD_SHIFT		0
+
+#define MVFR1_SIMDFMAC_SHIFT		28
+#define MVFR1_FPHP_SHIFT		24
+#define MVFR1_SIMDHP_SHIFT		20
+#define MVFR1_SIMDSP_SHIFT		16
+#define MVFR1_SIMDINT_SHIFT		12
+#define MVFR1_SIMDLS_SHIFT		8
+#define MVFR1_FPDNAN_SHIFT		4
+#define MVFR1_FPFTZ_SHIFT		0
+
+
+#define ID_AA64MMFR0_TGRAN4_SHIFT	28
+#define ID_AA64MMFR0_TGRAN64_SHIFT	24
+#define ID_AA64MMFR0_TGRAN16_SHIFT	20
+
+#define ID_AA64MMFR0_TGRAN4_NI		0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN64_NI		0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN16_NI		0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN4_SUPPORTED
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN16_SUPPORTED
+#elif defined(CONFIG_ARM64_64K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN64_SUPPORTED
+#endif
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index dcd06d18a42a..90c7ff233735 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,8 +23,10 @@
 
 #include <linux/compiler.h>
 
-#ifndef CONFIG_ARM64_64K_PAGES
+#ifdef CONFIG_ARM64_4K_PAGES
 #define THREAD_SIZE_ORDER	2
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define THREAD_SIZE_ORDER	0
 #endif
 
 #define THREAD_SIZE		16384
@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SINGLESTEP		21
 #define TIF_32BIT		22	/* 32bit process */
-#define TIF_SWITCH_MM		23	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d6e6b6660380..ffdaea7954bb 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	if (tlb->fullmm) {
-		flush_tlb_mm(tlb->mm);
-	} else {
-		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
-		/*
-		 * The intermediate page table levels are already handled by
-		 * the __(pte|pmd|pud)_free_tlb() functions, so last level
-		 * TLBI is sufficient here.
-		 */
-		__flush_tlb_range(&vma, tlb->start, tlb->end, true);
-	}
+	struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+
+	/*
+	 * The ASID allocator will either invalidate the ASID or mark
+	 * it as used.
+	 */
+	if (tlb->fullmm)
+		return;
+
+	/*
+	 * The intermediate page table levels are already handled by
+	 * the __(pte|pmd|pud)_free_tlb() functions, so last level
+	 * TLBI is sufficient here.
+	 */
+	__flush_tlb_range(&vma, tlb->start, tlb->end, true);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 7bd2da021658..b460ae28e346 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -63,6 +63,14 @@
  *		only require the D-TLB to be invalidated.
  *		- kaddr - Kernel virtual memory address
  */
+static inline void local_flush_tlb_all(void)
+{
+	dsb(nshst);
+	asm("tlbi	vmalle1");
+	dsb(nsh);
+	isb();
+}
+
 static inline void flush_tlb_all(void)
 {
 	dsb(ishst);
@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned long asid = (unsigned long)ASID(mm) << 48;
+	unsigned long asid = ASID(mm) << 48;
 
 	dsb(ishst);
 	asm("tlbi	aside1is, %0" : : "r" (asid));
@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 |
-		((unsigned long)ASID(vma->vm_mm) << 48);
+	unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
 
 	dsb(ishst);
 	asm("tlbi	vale1is, %0" : : "r" (addr));
@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     bool last_level)
 {
-	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+	unsigned long asid = ASID(vma->vm_mm) << 48;
 	unsigned long addr;
 
 	if ((end - start) > MAX_TLB_RANGE) {
@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 static inline void __flush_tlb_pgtable(struct mm_struct *mm,
 				       unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
 
-	dsb(ishst);
 	asm("tlbi	vae1is, %0" : : "r" (addr));
 	dsb(ish);
 }
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3bc498c250dc..41e58fe3c041 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		388
+#define __NR_compat_syscalls		390
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index cef934a90f17..5b925b761a2a 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -797,3 +797,12 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 387
 __SYSCALL(__NR_execveat, compat_sys_execveat)
+#define __NR_userfaultfd 388
+__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
+#define __NR_membarrier 389
+__SYSCALL(__NR_membarrier, sys_membarrier)
+
+/*
+ * Please add new compat syscalls above this comment and update
+ * __NR_compat_syscalls in asm/unistd.h.
+ */
diff --git a/arch/arm64/include/uapi/asm/signal.h b/arch/arm64/include/uapi/asm/signal.h
index 8d1e7236431b..991bf5db2ca1 100644
--- a/arch/arm64/include/uapi/asm/signal.h
+++ b/arch/arm64/include/uapi/asm/signal.h
@@ -19,6 +19,9 @@
 /* Required for AArch32 compatibility. */
 #define SA_RESTORER	0x04000000
 
+#define MINSIGSTKSZ 5120
+#define SIGSTKSZ    16384
+
 #include <asm-generic/signal.h>
 
 #endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc781be..474691f8b13a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -4,7 +4,6 @@
 
 CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
-CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
 CFLAGS_REMOVE_ftrace.o = -pg
@@ -20,6 +19,12 @@ arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o
 
+extra-$(CONFIG_EFI)			:= efi-entry.o
+
+OBJCOPYFLAGS := --prefix-symbols=__efistub_
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+	$(call if_changed,objcopy)
+
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
 					   ../../arm/kernel/opcodes.o
@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM)		+= sleep.o suspend.o
 arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
-arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
-extra-y					:= $(head-y) vmlinux.lds
+extra-y					+= $(head-y) vmlinux.lds
 
 # vDSO - this must be built first to generate the symbol offsets
 $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 19de7537e7d3..d1ce8e2f98b9 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -29,6 +29,11 @@
 #include <asm/cpu_ops.h>
 #include <asm/smp_plat.h>
 
+#ifdef CONFIG_ACPI_APEI
+# include <linux/efi.h>
+# include <asm/pgtable.h>
+#endif
+
 int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
 int acpi_disabled = 1;
 EXPORT_SYMBOL(acpi_disabled);
@@ -206,27 +211,26 @@ void __init acpi_boot_table_init(void)
 	}
 }
 
-void __init acpi_gic_init(void)
+#ifdef CONFIG_ACPI_APEI
+pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
 {
-	struct acpi_table_header *table;
-	acpi_status status;
-	acpi_size tbl_size;
-	int err;
-
-	if (acpi_disabled)
-		return;
-
-	status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
-	if (ACPI_FAILURE(status)) {
-		const char *msg = acpi_format_exception(status);
-
-		pr_err("Failed to get MADT table, %s\n", msg);
-		return;
-	}
+	/*
+	 * According to "Table 8 Map: EFI memory types to AArch64 memory
+	 * types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is
+	 * mapped to a corresponding MAIR attribute encoding.
+	 * The EFI memory attribute advises all possible capabilities
+	 * of a memory region. We use the most efficient capability.
+	 */
 
-	err = gic_v2_acpi_init(table);
-	if (err)
-		pr_err("Failed to initialize GIC IRQ controller");
+	u64 attr;
 
-	early_acpi_os_unmap_memory((char *)table, tbl_size);
+	attr = efi_mem_attributes(addr);
+	if (attr & EFI_MEMORY_WB)
+		return PAGE_KERNEL;
+	if (attr & EFI_MEMORY_WT)
+		return __pgprot(PROT_NORMAL_WT);
+	if (attr & EFI_MEMORY_WC)
+		return __pgprot(PROT_NORMAL_NC);
+	return __pgprot(PROT_DEVICE_nGnRnE);
 }
+#endif
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index a85843ddbde8..3b6d8cc9dfe0 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(memcmp);
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index bcee7abac68e..937f5e58a4d3 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -284,21 +284,23 @@ static void register_insn_emulation_sysctl(struct ctl_table *table)
 	__asm__ __volatile__(					\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
 		    CONFIG_ARM64_PAN)				\
-	"	mov		%w2, %w1\n"			\
-	"0:	ldxr"B"		%w1, [%3]\n"			\
-	"1:	stxr"B"		%w0, %w2, [%3]\n"		\
+	"0:	ldxr"B"		%w2, [%3]\n"			\
+	"1:	stxr"B"		%w0, %w1, [%3]\n"		\
 	"	cbz		%w0, 2f\n"			\
 	"	mov		%w0, %w4\n"			\
+	"	b		3f\n"				\
 	"2:\n"							\
+	"	mov		%w1, %w2\n"			\
+	"3:\n"							\
 	"	.pushsection	 .fixup,\"ax\"\n"		\
 	"	.align		2\n"				\
-	"3:	mov		%w0, %w5\n"			\
-	"	b		2b\n"				\
+	"4:	mov		%w0, %w5\n"			\
+	"	b		3b\n"				\
 	"	.popsection"					\
 	"	.pushsection	 __ex_table,\"a\"\n"		\
 	"	.align		3\n"				\
-	"	.quad		0b, 3b\n"			\
-	"	.quad		1b, 3b\n"			\
+	"	.quad		0b, 4b\n"			\
+	"	.quad		1b, 4b\n"			\
 	"	.popsection\n"					\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
 		CONFIG_ARM64_PAN)				\
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 8d89cf8dae55..25de8b244961 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,7 +60,7 @@ int main(void)
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
-  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
   BLANK();
   DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
   DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6ffd91438560..24926f2504f7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -23,6 +23,7 @@
 
 #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX	MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 
 #define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
 			MIDR_ARCHITECTURE_MASK)
@@ -82,11 +83,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
 	},
 #endif
+#ifdef CONFIG_CAVIUM_ERRATUM_23154
+	{
+	/* Cavium ThunderX, pass 1.x */
+		.desc = "Cavium erratum 23154",
+		.capability = ARM64_WORKAROUND_CAVIUM_23154,
+		MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
+	},
+#endif
 	{
 	}
 };
 
 void check_local_cpu_errata(void)
 {
-	check_cpu_capabilities(arm64_errata, "enabling workaround for");
+	update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3c9aed32f70b..52f0d7a5a1c2 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -16,12 +16,569 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
+#define pr_fmt(fmt) "CPU features: " fmt
 
+#include <linux/bsearch.h>
+#include <linux/sort.h>
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
+#include <asm/cpu_ops.h>
 #include <asm/processor.h>
+#include <asm/sysreg.h>
+
+unsigned long elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT	\
+				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+				 COMPAT_HWCAP_LPAE)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	{						\
+		.strict = STRICT,			\
+		.type = TYPE,				\
+		.shift = SHIFT,				\
+		.width = WIDTH,				\
+		.safe_val = SAFE_VAL,			\
+	}
+
+#define ARM64_FTR_END					\
+	{						\
+		.width = 0,				\
+	}
+
+static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* RAZ */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+	/* Linux doesn't care about the EL3 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+	/* Linux shouldn't care about secure memory */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+	/*
+	 * Differing PARange is fine as long as all peripherals and memory are mapped
+	 * within the minimum PARange of all CPUs
+	 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_ctr[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
+	/*
+	 * Linux can handle differing I-cache policies. Userspace JITs will
+	 * make use of *minLine
+	 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),	/* L1Ip */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),	/* InnerShr */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0),	/* TCM */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* ShareLvl */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),	/* OuterShr */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),	/* PMSA */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* VMSA */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_mvfr2[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_dczid[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1),		/* DZP */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* BS */
+	ARM64_FTR_END,
+};
+
+
+static struct arm64_ftr_bits ftr_id_isar5[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* RAZ */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_pfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* State1 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* State0 */
+	ARM64_FTR_END,
+};
+
+/*
+ * Common ftr bits for a 32bit register with all hidden, strict
+ * attributes, with 4bit feature fields and a default safe value of
+ * 0. Covers the following 32bit registers:
+ * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ */
+static struct arm64_ftr_bits ftr_generic_32bits[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic32[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_aa64raz[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+	ARM64_FTR_END,
+};
+
+#define ARM64_FTR_REG(id, table)		\
+	{					\
+		.sys_id = id,			\
+		.name = #id,			\
+		.ftr_bits = &((table)[0]),	\
+	}
+
+static struct arm64_ftr_reg arm64_ftr_regs[] = {
+
+	/* Op1 = 0, CRn = 0, CRm = 1 */
+	ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
+	ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
+	ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
+
+	/* Op1 = 0, CRn = 0, CRm = 2 */
+	ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
+	ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+
+	/* Op1 = 0, CRn = 0, CRm = 3 */
+	ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+
+	/* Op1 = 0, CRn = 0, CRm = 4 */
+	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+
+	/* Op1 = 0, CRn = 0, CRm = 5 */
+	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
+	ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+
+	/* Op1 = 0, CRn = 0, CRm = 6 */
+	ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
+	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+
+	/* Op1 = 0, CRn = 0, CRm = 7 */
+	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+	ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+
+	/* Op1 = 3, CRn = 0, CRm = 0 */
+	ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
+
+	/* Op1 = 3, CRn = 14, CRm = 0 */
+	ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+};
+
+static int search_cmp_ftr_reg(const void *id, const void *regp)
+{
+	return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+}
+
+/*
+ * get_arm64_ftr_reg - Lookup a feature register entry using its
+ * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id , we use binary search to find a matching
+ * entry.
+ *
+ * returns - Upon success,  matching ftr_reg entry for id.
+ *         - NULL on failure. It is upto the caller to decide
+ *	     the impact of a failure.
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+	return bsearch((const void *)(unsigned long)sys_id,
+			arm64_ftr_regs,
+			ARRAY_SIZE(arm64_ftr_regs),
+			sizeof(arm64_ftr_regs[0]),
+			search_cmp_ftr_reg);
+}
+
+static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+{
+	u64 mask = arm64_ftr_mask(ftrp);
+
+	reg &= ~mask;
+	reg |= (ftr_val << ftrp->shift) & mask;
+	return reg;
+}
+
+static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+{
+	s64 ret = 0;
+
+	switch (ftrp->type) {
+	case FTR_EXACT:
+		ret = ftrp->safe_val;
+		break;
+	case FTR_LOWER_SAFE:
+		ret = new < cur ? new : cur;
+		break;
+	case FTR_HIGHER_SAFE:
+		ret = new > cur ? new : cur;
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+static int __init sort_cmp_ftr_regs(const void *a, const void *b)
+{
+	return ((const struct arm64_ftr_reg *)a)->sys_id -
+		 ((const struct arm64_ftr_reg *)b)->sys_id;
+}
+
+static void __init swap_ftr_regs(void *a, void *b, int size)
+{
+	struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
+	*(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
+	*(struct arm64_ftr_reg *)b = tmp;
+}
+
+static void __init sort_ftr_regs(void)
+{
+	/* Keep the array sorted so that we can do the binary search */
+	sort(arm64_ftr_regs,
+		ARRAY_SIZE(arm64_ftr_regs),
+		sizeof(arm64_ftr_regs[0]),
+		sort_cmp_ftr_regs,
+		swap_ftr_regs);
+}
+
+/*
+ * Initialise the CPU feature register from Boot CPU values.
+ * Also initiliases the strict_mask for the register.
+ */
+static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+{
+	u64 val = 0;
+	u64 strict_mask = ~0x0ULL;
+	struct arm64_ftr_bits *ftrp;
+	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
+
+	BUG_ON(!reg);
+
+	for (ftrp  = reg->ftr_bits; ftrp->width; ftrp++) {
+		s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+		val = arm64_ftr_set_value(ftrp, val, ftr_new);
+		if (!ftrp->strict)
+			strict_mask &= ~arm64_ftr_mask(ftrp);
+	}
+	reg->sys_val = val;
+	reg->strict_mask = strict_mask;
+}
+
+void __init init_cpu_features(struct cpuinfo_arm64 *info)
+{
+	/* Before we start using the tables, make sure it is sorted */
+	sort_ftr_regs();
+
+	init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
+	init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
+	init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
+	init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+	init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+	init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+	init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+	init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+	init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+	init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+	init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+	init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+	init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+	init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+	init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+	init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+	init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+	init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+	init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
+static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
+{
+	struct arm64_ftr_bits *ftrp;
+
+	for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+		s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
+		s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+		if (ftr_cur == ftr_new)
+			continue;
+		/* Find a safe value */
+		ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur);
+		reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new);
+	}
+
+}
+
+static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
+{
+	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+	BUG_ON(!regp);
+	update_cpu_ftr_reg(regp, val);
+	if ((boot & regp->strict_mask) == (val & regp->strict_mask))
+		return 0;
+	pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n",
+			regp->name, boot, cpu, val);
+	return 1;
+}
+
+/*
+ * Update system wide CPU feature registers with the values from a
+ * non-boot CPU. Also performs SANITY checks to make sure that there
+ * aren't any insane variations from that of the boot CPU.
+ */
+void update_cpu_features(int cpu,
+			 struct cpuinfo_arm64 *info,
+			 struct cpuinfo_arm64 *boot)
+{
+	int taint = 0;
+
+	/*
+	 * The kernel can handle differing I-cache policies, but otherwise
+	 * caches should look identical. Userspace JITs will make use of
+	 * *minLine.
+	 */
+	taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu,
+				      info->reg_ctr, boot->reg_ctr);
+
+	/*
+	 * Userspace may perform DC ZVA instructions. Mismatched block sizes
+	 * could result in too much or too little memory being zeroed if a
+	 * process is preempted and migrated between CPUs.
+	 */
+	taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
+				      info->reg_dczid, boot->reg_dczid);
+
+	/* If different, timekeeping will be broken (especially with KVM) */
+	taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
+				      info->reg_cntfrq, boot->reg_cntfrq);
+
+	/*
+	 * The kernel uses self-hosted debug features and expects CPUs to
+	 * support identical debug features. We presently need CTX_CMPs, WRPs,
+	 * and BRPs to be identical.
+	 * ID_AA64DFR1 is currently RES0.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu,
+				      info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu,
+				      info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1);
+	/*
+	 * Even in big.LITTLE, processors should be identical instruction-set
+	 * wise.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu,
+				      info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
+				      info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+
+	/*
+	 * Differing PARange support is fine as long as all peripherals and
+	 * memory are mapped within the minimum PARange of all CPUs.
+	 * Linux should not care about secure memory.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu,
+				      info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
+				      info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+
+	/*
+	 * EL3 is not our concern.
+	 * ID_AA64PFR1 is currently RES0.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
+				      info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
+				      info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+
+	/*
+	 * If we have AArch32, we care about 32-bit features for compat. These
+	 * registers should be RES0 otherwise.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+					info->reg_id_dfr0, boot->reg_id_dfr0);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+					info->reg_id_isar0, boot->reg_id_isar0);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+					info->reg_id_isar1, boot->reg_id_isar1);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+					info->reg_id_isar2, boot->reg_id_isar2);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+					info->reg_id_isar3, boot->reg_id_isar3);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+					info->reg_id_isar4, boot->reg_id_isar4);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+					info->reg_id_isar5, boot->reg_id_isar5);
+
+	/*
+	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+	 * ACTLR formats could differ across CPUs and therefore would have to
+	 * be trapped for virtualization anyway.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+					info->reg_id_mmfr0, boot->reg_id_mmfr0);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+					info->reg_id_mmfr1, boot->reg_id_mmfr1);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+					info->reg_id_mmfr2, boot->reg_id_mmfr2);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+					info->reg_id_mmfr3, boot->reg_id_mmfr3);
+	taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+					info->reg_id_pfr0, boot->reg_id_pfr0);
+	taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+					info->reg_id_pfr1, boot->reg_id_pfr1);
+	taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+					info->reg_mvfr0, boot->reg_mvfr0);
+	taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+					info->reg_mvfr1, boot->reg_mvfr1);
+	taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+					info->reg_mvfr2, boot->reg_mvfr2);
+
+	/*
+	 * Mismatched CPU features are a recipe for disaster. Don't even
+	 * pretend to support them.
+	 */
+	WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
+			"Unsupported CPU feature variation.\n");
+}
+
+u64 read_system_reg(u32 id)
+{
+	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+	/* We shouldn't get a request for an unsupported register */
+	BUG_ON(!regp);
+	return regp->sys_val;
+}
+
+#include <linux/irqchip/arm-gic-v3.h>
 
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
@@ -31,34 +588,46 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 	return val >= entry->min_field_value;
 }
 
-#define __ID_FEAT_CHK(reg)						\
-static bool __maybe_unused						\
-has_##reg##_feature(const struct arm64_cpu_capabilities *entry)		\
-{									\
-	u64 val;							\
-									\
-	val = read_cpuid(reg##_el1);					\
-	return feature_matches(val, entry);				\
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
+{
+	u64 val;
+
+	val = read_system_reg(entry->sys_reg);
+	return feature_matches(val, entry);
 }
 
-__ID_FEAT_CHK(id_aa64pfr0);
-__ID_FEAT_CHK(id_aa64mmfr1);
-__ID_FEAT_CHK(id_aa64isar0);
+static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
+{
+	bool has_sre;
+
+	if (!has_cpuid_feature(entry))
+		return false;
+
+	has_sre = gic_enable_sre();
+	if (!has_sre)
+		pr_warn_once("%s present but disabled by higher exception level\n",
+			     entry->desc);
+
+	return has_sre;
+}
 
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
 		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
-		.matches = has_id_aa64pfr0_feature,
-		.field_pos = 24,
+		.matches = has_useable_gicv3_cpuif,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.field_pos = ID_AA64PFR0_GIC_SHIFT,
 		.min_field_value = 1,
 	},
 #ifdef CONFIG_ARM64_PAN
 	{
 		.desc = "Privileged Access Never",
 		.capability = ARM64_HAS_PAN,
-		.matches = has_id_aa64mmfr1_feature,
-		.field_pos = 20,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
 		.min_field_value = 1,
 		.enable = cpu_enable_pan,
 	},
@@ -67,15 +636,101 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "LSE atomic instructions",
 		.capability = ARM64_HAS_LSE_ATOMICS,
-		.matches = has_id_aa64isar0_feature,
-		.field_pos = 20,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR0_EL1,
+		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
 		.min_field_value = 2,
 	},
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
 	{},
 };
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+#define HWCAP_CAP(reg, field, min_value, type, cap)		\
+	{							\
+		.desc = #cap,					\
+		.matches = has_cpuid_feature,			\
+		.sys_reg = reg,					\
+		.field_pos = field,				\
+		.min_field_value = min_value,			\
+		.hwcap_type = type,				\
+		.hwcap = cap,					\
+	}
+
+static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
+#ifdef CONFIG_COMPAT
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+	{},
+};
+
+static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		elf_hwcap |= cap->hwcap;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		compat_elf_hwcap |= (u32)cap->hwcap;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		compat_elf_hwcap2 |= (u32)cap->hwcap;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	bool rc;
+
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		rc = (elf_hwcap & cap->hwcap) != 0;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		rc = false;
+	}
+
+	return rc;
+}
+
+static void setup_cpu_hwcaps(void)
+{
+	int i;
+	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
+
+	for (i = 0; hwcaps[i].desc; i++)
+		if (hwcaps[i].matches(&hwcaps[i]))
+			cap_set_hwcap(&hwcaps[i]);
+}
+
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info)
 {
 	int i;
@@ -88,15 +743,178 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			pr_info("%s %s\n", info, caps[i].desc);
 		cpus_set_cap(caps[i].capability);
 	}
+}
+
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
+ */
+static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+	int i;
+
+	for (i = 0; caps[i].desc; i++)
+		if (caps[i].enable && cpus_have_cap(caps[i].capability))
+			on_each_cpu(caps[i].enable, NULL, true);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+	sys_caps_initialised = true;
+}
+
+/*
+ * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ */
+static u64 __raw_read_system_reg(u32 sys_id)
+{
+	switch (sys_id) {
+	case SYS_ID_PFR0_EL1:		return (u64)read_cpuid(ID_PFR0_EL1);
+	case SYS_ID_PFR1_EL1:		return (u64)read_cpuid(ID_PFR1_EL1);
+	case SYS_ID_DFR0_EL1:		return (u64)read_cpuid(ID_DFR0_EL1);
+	case SYS_ID_MMFR0_EL1:		return (u64)read_cpuid(ID_MMFR0_EL1);
+	case SYS_ID_MMFR1_EL1:		return (u64)read_cpuid(ID_MMFR1_EL1);
+	case SYS_ID_MMFR2_EL1:		return (u64)read_cpuid(ID_MMFR2_EL1);
+	case SYS_ID_MMFR3_EL1:		return (u64)read_cpuid(ID_MMFR3_EL1);
+	case SYS_ID_ISAR0_EL1:		return (u64)read_cpuid(ID_ISAR0_EL1);
+	case SYS_ID_ISAR1_EL1:		return (u64)read_cpuid(ID_ISAR1_EL1);
+	case SYS_ID_ISAR2_EL1:		return (u64)read_cpuid(ID_ISAR2_EL1);
+	case SYS_ID_ISAR3_EL1:		return (u64)read_cpuid(ID_ISAR3_EL1);
+	case SYS_ID_ISAR4_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
+	case SYS_ID_ISAR5_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
+	case SYS_MVFR0_EL1:		return (u64)read_cpuid(MVFR0_EL1);
+	case SYS_MVFR1_EL1:		return (u64)read_cpuid(MVFR1_EL1);
+	case SYS_MVFR2_EL1:		return (u64)read_cpuid(MVFR2_EL1);
+
+	case SYS_ID_AA64PFR0_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
+	case SYS_ID_AA64PFR1_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
+	case SYS_ID_AA64DFR0_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
+	case SYS_ID_AA64DFR1_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
+	case SYS_ID_AA64MMFR0_EL1:	return (u64)read_cpuid(ID_AA64MMFR0_EL1);
+	case SYS_ID_AA64MMFR1_EL1:	return (u64)read_cpuid(ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64ISAR0_EL1:	return (u64)read_cpuid(ID_AA64ISAR0_EL1);
+	case SYS_ID_AA64ISAR1_EL1:	return (u64)read_cpuid(ID_AA64ISAR1_EL1);
 
-	/* second pass allows enable() to consider interacting capabilities */
+	case SYS_CNTFRQ_EL0:		return (u64)read_cpuid(CNTFRQ_EL0);
+	case SYS_CTR_EL0:		return (u64)read_cpuid(CTR_EL0);
+	case SYS_DCZID_EL0:		return (u64)read_cpuid(DCZID_EL0);
+	default:
+		BUG();
+		return 0;
+	}
+}
+
+/*
+ * Park the CPU which doesn't have the capability as advertised
+ * by the system.
+ */
+static void fail_incapable_cpu(char *cap_type,
+				 const struct arm64_cpu_capabilities *cap)
+{
+	int cpu = smp_processor_id();
+
+	pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
+	/* Mark this CPU absent */
+	set_cpu_present(cpu, 0);
+
+	/* Check if we can park ourselves */
+	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+		cpu_ops[cpu]->cpu_die(cpu);
+	asm(
+	"1:	wfe\n"
+	"	wfi\n"
+	"	b	1b");
+}
+
+/*
+ * Run through the enabled system capabilities and enable() it on this CPU.
+ * The capabilities were decided based on the available CPUs at the boot time.
+ * Any new CPU should match the system wide status of the capability. If the
+ * new CPU doesn't have a capability which the system now has enabled, we
+ * cannot do anything to fix it up and could cause unexpected failures. So
+ * we park the CPU.
+ */
+void verify_local_cpu_capabilities(void)
+{
+	int i;
+	const struct arm64_cpu_capabilities *caps;
+
+	/*
+	 * If we haven't computed the system capabilities, there is nothing
+	 * to verify.
+	 */
+	if (!sys_caps_initialised)
+		return;
+
+	caps = arm64_features;
 	for (i = 0; caps[i].desc; i++) {
-		if (cpus_have_cap(caps[i].capability) && caps[i].enable)
-			caps[i].enable();
+		if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
+			continue;
+		/*
+		 * If the new CPU misses an advertised feature, we cannot proceed
+		 * further, park the cpu.
+		 */
+		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+			fail_incapable_cpu("arm64_features", &caps[i]);
+		if (caps[i].enable)
+			caps[i].enable(NULL);
 	}
+
+	for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+		if (!cpus_have_hwcap(&caps[i]))
+			continue;
+		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+			fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+	}
+}
+
+#else	/* !CONFIG_HOTPLUG_CPU */
+
+static inline void set_sys_caps_initialised(void)
+{
+}
+
+#endif	/* CONFIG_HOTPLUG_CPU */
+
+static void setup_feature_capabilities(void)
+{
+	update_cpu_capabilities(arm64_features, "detected feature:");
+	enable_cpu_capabilities(arm64_features);
 }
 
-void check_local_cpu_features(void)
+void __init setup_cpu_features(void)
 {
-	check_cpu_capabilities(arm64_features, "detected feature:");
+	u32 cwg;
+	int cls;
+
+	/* Set the CPU feature capabilies */
+	setup_feature_capabilities();
+	setup_cpu_hwcaps();
+
+	/* Advertise that we have computed the system capabilities */
+	set_sys_caps_initialised();
+
+	/*
+	 * Check for sane CTR_EL0.CWG value.
+	 */
+	cwg = cache_type_cwg();
+	cls = cache_line_size();
+	if (!cwg)
+		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+			cls);
+	if (L1_CACHE_BYTES < cls)
+		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+			L1_CACHE_BYTES, cls);
 }
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a867e7fb..706679d0a0b4 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -24,8 +24,11 @@
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/personality.h>
 #include <linux/preempt.h>
 #include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
 #include <linux/smp.h>
 
 /*
@@ -35,7 +38,6 @@
  */
 DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 static struct cpuinfo_arm64 boot_cpu_data;
-static bool mixed_endian_el0 = true;
 
 static char *icache_policy_str[] = {
 	[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
@@ -46,157 +48,148 @@ static char *icache_policy_str[] = {
 
 unsigned long __icache_flags;
 
-static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
+static const char *const hwcap_str[] = {
+	"fp",
+	"asimd",
+	"evtstrm",
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	"atomics",
+	NULL
+};
+
+#ifdef CONFIG_COMPAT
+static const char *const compat_hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	"java",
+	"iwmmxt",
+	"crunch",
+	"thumbee",
+	"neon",
+	"vfpv3",
+	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm"
+};
+
+static const char *const compat_hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+#endif /* CONFIG_COMPAT */
+
+static int c_show(struct seq_file *m, void *v)
 {
-	unsigned int cpu = smp_processor_id();
-	u32 l1ip = CTR_L1IP(info->reg_ctr);
+	int i, j;
+
+	for_each_online_cpu(i) {
+		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+		u32 midr = cpuinfo->reg_midr;
 
-	if (l1ip != ICACHE_POLICY_PIPT) {
 		/*
-		 * VIPT caches are non-aliasing if the VA always equals the PA
-		 * in all bit positions that are covered by the index. This is
-		 * the case if the size of a way (# of sets * line size) does
-		 * not exceed PAGE_SIZE.
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
 		 */
-		u32 waysize = icache_get_numsets() * icache_get_linesize();
+		seq_printf(m, "processor\t: %d\n", i);
 
-		if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
-			set_bit(ICACHEF_ALIASING, &__icache_flags);
+		/*
+		 * Dump out the common processor features in a single line.
+		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+		 * rather than attempting to parse this, but there's a body of
+		 * software which does already (at least for 32-bit).
+		 */
+		seq_puts(m, "Features\t:");
+		if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+			for (j = 0; compat_hwcap_str[j]; j++)
+				if (compat_elf_hwcap & (1 << j))
+					seq_printf(m, " %s", compat_hwcap_str[j]);
+
+			for (j = 0; compat_hwcap2_str[j]; j++)
+				if (compat_elf_hwcap2 & (1 << j))
+					seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+		} else {
+			for (j = 0; hwcap_str[j]; j++)
+				if (elf_hwcap & (1 << j))
+					seq_printf(m, " %s", hwcap_str[j]);
+		}
+		seq_puts(m, "\n");
+
+		seq_printf(m, "CPU implementer\t: 0x%02x\n",
+			   MIDR_IMPLEMENTOR(midr));
+		seq_printf(m, "CPU architecture: 8\n");
+		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
 	}
-	if (l1ip == ICACHE_POLICY_AIVIVT)
-		set_bit(ICACHEF_AIVIVT, &__icache_flags);
 
-	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
-}
-
-bool cpu_supports_mixed_endian_el0(void)
-{
-	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
-}
-
-bool system_supports_mixed_endian_el0(void)
-{
-	return mixed_endian_el0;
+	return 0;
 }
 
-static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
+static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
+	return *pos < 1 ? (void *)1 : NULL;
 }
 
-static void update_cpu_features(struct cpuinfo_arm64 *info)
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	update_mixed_endian_el0_support(info);
+	++*pos;
+	return NULL;
 }
 
-static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
+static void c_stop(struct seq_file *m, void *v)
 {
-	if ((boot & mask) == (cur & mask))
-		return 0;
-
-	pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
-		name, (unsigned long)boot, cpu, (unsigned long)cur);
-
-	return 1;
 }
 
-#define CHECK_MASK(field, mask, boot, cur, cpu) \
-	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
-
-#define CHECK(field, boot, cur, cpu) \
-	CHECK_MASK(field, ~0ULL, boot, cur, cpu)
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
 
-/*
- * Verify that CPUs don't have unexpected differences that will cause problems.
- */
-static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
+static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 {
 	unsigned int cpu = smp_processor_id();
-	struct cpuinfo_arm64 *boot = &boot_cpu_data;
-	unsigned int diff = 0;
-
-	/*
-	 * The kernel can handle differing I-cache policies, but otherwise
-	 * caches should look identical. Userspace JITs will make use of
-	 * *minLine.
-	 */
-	diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
-
-	/*
-	 * Userspace may perform DC ZVA instructions. Mismatched block sizes
-	 * could result in too much or too little memory being zeroed if a
-	 * process is preempted and migrated between CPUs.
-	 */
-	diff |= CHECK(dczid, boot, cur, cpu);
-
-	/* If different, timekeeping will be broken (especially with KVM) */
-	diff |= CHECK(cntfrq, boot, cur, cpu);
-
-	/*
-	 * The kernel uses self-hosted debug features and expects CPUs to
-	 * support identical debug features. We presently need CTX_CMPs, WRPs,
-	 * and BRPs to be identical.
-	 * ID_AA64DFR1 is currently RES0.
-	 */
-	diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
-	diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
-
-	/*
-	 * Even in big.LITTLE, processors should be identical instruction-set
-	 * wise.
-	 */
-	diff |= CHECK(id_aa64isar0, boot, cur, cpu);
-	diff |= CHECK(id_aa64isar1, boot, cur, cpu);
-
-	/*
-	 * Differing PARange support is fine as long as all peripherals and
-	 * memory are mapped within the minimum PARange of all CPUs.
-	 * Linux should not care about secure memory.
-	 * ID_AA64MMFR1 is currently RES0.
-	 */
-	diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
-	diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
-
-	/*
-	 * EL3 is not our concern.
-	 * ID_AA64PFR1 is currently RES0.
-	 */
-	diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
-	diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
+	u32 l1ip = CTR_L1IP(info->reg_ctr);
 
-	/*
-	 * If we have AArch32, we care about 32-bit features for compat. These
-	 * registers should be RES0 otherwise.
-	 */
-	diff |= CHECK(id_dfr0, boot, cur, cpu);
-	diff |= CHECK(id_isar0, boot, cur, cpu);
-	diff |= CHECK(id_isar1, boot, cur, cpu);
-	diff |= CHECK(id_isar2, boot, cur, cpu);
-	diff |= CHECK(id_isar3, boot, cur, cpu);
-	diff |= CHECK(id_isar4, boot, cur, cpu);
-	diff |= CHECK(id_isar5, boot, cur, cpu);
-	/*
-	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
-	 * ACTLR formats could differ across CPUs and therefore would have to
-	 * be trapped for virtualization anyway.
-	 */
-	diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
-	diff |= CHECK(id_mmfr1, boot, cur, cpu);
-	diff |= CHECK(id_mmfr2, boot, cur, cpu);
-	diff |= CHECK(id_mmfr3, boot, cur, cpu);
-	diff |= CHECK(id_pfr0, boot, cur, cpu);
-	diff |= CHECK(id_pfr1, boot, cur, cpu);
+	if (l1ip != ICACHE_POLICY_PIPT) {
+		/*
+		 * VIPT caches are non-aliasing if the VA always equals the PA
+		 * in all bit positions that are covered by the index. This is
+		 * the case if the size of a way (# of sets * line size) does
+		 * not exceed PAGE_SIZE.
+		 */
+		u32 waysize = icache_get_numsets() * icache_get_linesize();
 
-	diff |= CHECK(mvfr0, boot, cur, cpu);
-	diff |= CHECK(mvfr1, boot, cur, cpu);
-	diff |= CHECK(mvfr2, boot, cur, cpu);
+		if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
+			set_bit(ICACHEF_ALIASING, &__icache_flags);
+	}
+	if (l1ip == ICACHE_POLICY_AIVIVT)
+		set_bit(ICACHEF_AIVIVT, &__icache_flags);
 
-	/*
-	 * Mismatched CPU features are a recipe for disaster. Don't even
-	 * pretend to support them.
-	 */
-	WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
-			"Unsupported CPU feature variation.\n");
+	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	cpuinfo_detect_icache_policy(info);
 
 	check_local_cpu_errata();
-	check_local_cpu_features();
-	update_cpu_features(info);
 }
 
 void cpuinfo_store_cpu(void)
 {
 	struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
 	__cpuinfo_store_cpu(info);
-	cpuinfo_sanity_check(info);
+	update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
 }
 
 void __init cpuinfo_store_boot_cpu(void)
@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void)
 	__cpuinfo_store_cpu(info);
 
 	boot_cpu_data = *info;
+	init_cpu_features(&boot_cpu_data);
 }
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index cebf78661a55..8aee3aeec3e6 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -26,14 +26,16 @@
 #include <linux/stat.h>
 #include <linux/uaccess.h>
 
-#include <asm/debug-monitors.h>
+#include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
-	return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
+	return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_DEBUGVER_SHIFT);
 }
 
 /*
@@ -58,7 +60,7 @@ static u32 mdscr_read(void)
  * Allow root to disable self-hosted debug from userspace.
  * This is useful if you want to connect an external JTAG debugger.
  */
-static u32 debug_enabled = 1;
+static bool debug_enabled = true;
 
 static int create_debug_debugfs_entry(void)
 {
@@ -69,7 +71,7 @@ fs_initcall(create_debug_debugfs_entry);
 
 static int __init early_debug_disable(char *buf)
 {
-	debug_enabled = 0;
+	debug_enabled = false;
 	return 0;
 }
 
@@ -201,7 +203,7 @@ void unregister_step_hook(struct step_hook *hook)
 }
 
 /*
- * Call registered single step handers
+ * Call registered single step handlers
  * There is no Syndrome info to check for determining the handler.
  * So we call all the registered handlers, until the right handler is
  * found which returns zero.
@@ -271,20 +273,21 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
  * Use reader/writer locks instead of plain spinlock.
  */
 static LIST_HEAD(break_hook);
-static DEFINE_RWLOCK(break_hook_lock);
+static DEFINE_SPINLOCK(break_hook_lock);
 
 void register_break_hook(struct break_hook *hook)
 {
-	write_lock(&break_hook_lock);
-	list_add(&hook->node, &break_hook);
-	write_unlock(&break_hook_lock);
+	spin_lock(&break_hook_lock);
+	list_add_rcu(&hook->node, &break_hook);
+	spin_unlock(&break_hook_lock);
 }
 
 void unregister_break_hook(struct break_hook *hook)
 {
-	write_lock(&break_hook_lock);
-	list_del(&hook->node);
-	write_unlock(&break_hook_lock);
+	spin_lock(&break_hook_lock);
+	list_del_rcu(&hook->node);
+	spin_unlock(&break_hook_lock);
+	synchronize_rcu();
 }
 
 static int call_break_hook(struct pt_regs *regs, unsigned int esr)
@@ -292,11 +295,11 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
 	struct break_hook *hook;
 	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
 
-	read_lock(&break_hook_lock);
-	list_for_each_entry(hook, &break_hook, node)
+	rcu_read_lock();
+	list_for_each_entry_rcu(hook, &break_hook, node)
 		if ((esr & hook->esr_mask) == hook->esr_val)
 			fn = hook->fn;
-	read_unlock(&break_hook_lock);
+	rcu_read_unlock();
 
 	return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
 }
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 8ce9b0577442..a773db92908b 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -29,7 +29,7 @@
 	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET
 	 * from start of RAM.
 	 */
-ENTRY(efi_stub_entry)
+ENTRY(entry)
 	/*
 	 * Create a stack frame to save FP/LR with extra space
 	 * for image_addr variable passed to efi_entry().
@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
 	 * entries for the VA range of the current image, so no maintenance is
 	 * necessary.
 	 */
-	adr	x0, efi_stub_entry
-	adr	x1, efi_stub_entry_end
+	adr	x0, entry
+	adr	x1, entry_end
 	sub	x1, x1, x0
 	bl	__flush_dcache_area
 
@@ -120,5 +120,5 @@ efi_load_fail:
 	ldp	x29, x30, [sp], #32
 	ret
 
-efi_stub_entry_end:
-ENDPROC(efi_stub_entry)
+entry_end:
+ENDPROC(entry)
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
deleted file mode 100644
index 816120ece6bc..000000000000
--- a/arch/arm64/kernel/efi-stub.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org>
- *
- * This file implements the EFI boot stub for the arm64 kernel.
- * Adapted from ARM version by Mark Salter <msalter@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#include <linux/efi.h>
-#include <asm/efi.h>
-#include <asm/sections.h>
-
-efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
-					unsigned long *image_addr,
-					unsigned long *image_size,
-					unsigned long *reserve_addr,
-					unsigned long *reserve_size,
-					unsigned long dram_base,
-					efi_loaded_image_t *image)
-{
-	efi_status_t status;
-	unsigned long kernel_size, kernel_memsize = 0;
-	unsigned long nr_pages;
-	void *old_image_addr = (void *)*image_addr;
-
-	/* Relocate the image, if required. */
-	kernel_size = _edata - _text;
-	if (*image_addr != (dram_base + TEXT_OFFSET)) {
-		kernel_memsize = kernel_size + (_end - _edata);
-
-		/*
-		 * First, try a straight allocation at the preferred offset.
-		 * This will work around the issue where, if dram_base == 0x0,
-		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
-		 * address of the allocation to be mistaken for a FAIL return
-		 * value or a NULL pointer). It will also ensure that, on
-		 * platforms where the [dram_base, dram_base + TEXT_OFFSET)
-		 * interval is partially occupied by the firmware (like on APM
-		 * Mustang), we can still place the kernel at the address
-		 * 'dram_base + TEXT_OFFSET'.
-		 */
-		*image_addr = *reserve_addr = dram_base + TEXT_OFFSET;
-		nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
-			   EFI_PAGE_SIZE;
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA, nr_pages,
-					(efi_physical_addr_t *)reserve_addr);
-		if (status != EFI_SUCCESS) {
-			kernel_memsize += TEXT_OFFSET;
-			status = efi_low_alloc(sys_table_arg, kernel_memsize,
-					       SZ_2M, reserve_addr);
-
-			if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
-				return status;
-			}
-			*image_addr = *reserve_addr + TEXT_OFFSET;
-		}
-		memcpy((void *)*image_addr, old_image_addr, kernel_size);
-		*reserve_size = kernel_memsize;
-	}
-
-
-	return EFI_SUCCESS;
-}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index e8ca6eaedd02..de46b50f4cdf 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -48,18 +48,8 @@ static struct mm_struct efi_mm = {
 	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
-	INIT_MM_CONTEXT(efi_mm)
 };
 
-static int uefi_debug __initdata;
-static int __init uefi_debug_setup(char *str)
-{
-	uefi_debug = 1;
-
-	return 0;
-}
-early_param("uefi_debug", uefi_debug_setup);
-
 static int __init is_normal_ram(efi_memory_desc_t *md)
 {
 	if (md->attribute & EFI_MEMORY_WB)
@@ -171,14 +161,14 @@ static __init void reserve_regions(void)
 	efi_memory_desc_t *md;
 	u64 paddr, npages, size;
 
-	if (uefi_debug)
+	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
 
 	for_each_efi_memory_desc(&memmap, md) {
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
-		if (uefi_debug) {
+		if (efi_enabled(EFI_DBG)) {
 			char buf[64];
 
 			pr_info("  0x%012llx-0x%012llx %s",
@@ -194,11 +184,11 @@ static __init void reserve_regions(void)
 
 		if (is_reserve_region(md)) {
 			memblock_reserve(paddr, size);
-			if (uefi_debug)
+			if (efi_enabled(EFI_DBG))
 				pr_cont("*");
 		}
 
-		if (uefi_debug)
+		if (efi_enabled(EFI_DBG))
 			pr_cont("\n");
 	}
 
@@ -210,14 +200,14 @@ void __init efi_init(void)
 	struct efi_fdt_params params;
 
 	/* Grab UEFI information placed in FDT by stub */
-	if (!efi_get_fdt_params(&params, uefi_debug))
+	if (!efi_get_fdt_params(&params))
 		return;
 
 	efi_system_table = params.system_table;
 
 	memblock_reserve(params.mmap & PAGE_MASK,
 			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
-	memmap.phys_map = (void *)params.mmap;
+	memmap.phys_map = params.mmap;
 	memmap.map = early_memremap(params.mmap, params.mmap_size);
 	memmap.map_end = memmap.map + params.mmap_size;
 	memmap.desc_size = params.desc_size;
@@ -258,7 +248,8 @@ static bool __init efi_virtmap_init(void)
 		 */
 		if (!is_normal_ram(md))
 			prot = __pgprot(PROT_DEVICE_nGnRE);
-		else if (md->type == EFI_RUNTIME_SERVICES_CODE)
+		else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+			 !PAGE_ALIGNED(md->phys_addr))
 			prot = PAGE_KERNEL_EXEC;
 		else
 			prot = PAGE_KERNEL;
@@ -290,7 +281,7 @@ static int __init arm64_enable_runtime_services(void)
 	pr_info("Remapping and enabling EFI services.\n");
 
 	mapsize = memmap.map_end - memmap.map;
-	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map,
+	memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
 						   mapsize);
 	if (!memmap.map) {
 		pr_err("Failed to remap EFI memory map\n");
@@ -343,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm)
 	else
 		cpu_switch_mm(mm->pgd, mm);
 
-	flush_tlb_all();
+	local_flush_tlb_all();
 	if (icache_is_aivivt())
-		__flush_icache_all();
+		__local_flush_icache_all();
 }
 
 void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 08cafc518b9a..0f03a8fe2314 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -178,6 +178,24 @@ ENTRY(ftrace_stub)
 ENDPROC(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* save return value regs*/
+	.macro save_return_regs
+	sub sp, sp, #64
+	stp x0, x1, [sp]
+	stp x2, x3, [sp, #16]
+	stp x4, x5, [sp, #32]
+	stp x6, x7, [sp, #48]
+	.endm
+
+	/* restore return value regs*/
+	.macro restore_return_regs
+	ldp x0, x1, [sp]
+	ldp x2, x3, [sp, #16]
+	ldp x4, x5, [sp, #32]
+	ldp x6, x7, [sp, #48]
+	add sp, sp, #64
+	.endm
+
 /*
  * void ftrace_graph_caller(void)
  *
@@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller)
  * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
  */
 ENTRY(return_to_handler)
-	str	x0, [sp, #-16]!
+	save_return_regs
 	mov	x0, x29			//     parent's fp
 	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
 	mov	x30, x0			// restore the original return address
-	ldr	x0, [sp], #16
+	restore_return_regs
 	ret
 END(return_to_handler)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4306c937b1ff..7ed3d75f6304 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -430,6 +430,8 @@ el0_sync_compat:
 	b.eq	el0_fpsimd_acc
 	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
+	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
+	b.eq	el0_sp_pc
 	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
 	b.eq	el0_undef
 	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c56956a16d3f..4c46c54a3ad7 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
  */
 static int __init fpsimd_init(void)
 {
-	u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-
-	if (pfr & (0xf << 16)) {
+	if (elf_hwcap & HWCAP_FP) {
+		fpsimd_pm_init();
+		fpsimd_hotplug_init();
+	} else {
 		pr_notice("Floating-point is not implemented\n");
-		return 0;
 	}
-	elf_hwcap |= HWCAP_FP;
 
-	if (pfr & (0xf << 20))
+	if (!(elf_hwcap & HWCAP_ASIMD))
 		pr_notice("Advanced SIMD is not implemented\n");
-	else
-		elf_hwcap |= HWCAP_ASIMD;
-
-	fpsimd_pm_init();
-	fpsimd_hotplug_init();
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 90d09eddd5b2..23cfc08fc8ba 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,11 +29,13 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
-#include <asm/thread_info.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
 #include <asm/virt.h>
 
 #define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
@@ -46,32 +48,10 @@
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-#ifdef CONFIG_ARM64_64K_PAGES
-#define BLOCK_SHIFT	PAGE_SHIFT
-#define BLOCK_SIZE	PAGE_SIZE
-#define TABLE_SHIFT	PMD_SHIFT
-#else
-#define BLOCK_SHIFT	SECTION_SHIFT
-#define BLOCK_SIZE	SECTION_SIZE
-#define TABLE_SHIFT	PUD_SHIFT
-#endif
-
 #define KERNEL_START	_text
 #define KERNEL_END	_end
 
 /*
- * Initial memory map attributes.
- */
-#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
-#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
-
-#ifdef CONFIG_ARM64_64K_PAGES
-#define MM_MMUFLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
-#else
-#define MM_MMUFLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
-#endif
-
-/*
  * Kernel startup entry point.
  * ---------------------------
  *
@@ -120,8 +100,8 @@ efi_head:
 #endif
 
 #ifdef CONFIG_EFI
-	.globl	stext_offset
-	.set	stext_offset, stext - efi_head
+	.globl	__efistub_stext_offset
+	.set	__efistub_stext_offset, stext - efi_head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -144,8 +124,8 @@ optional_header:
 	.long	_end - stext			// SizeOfCode
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
-	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint
-	.long	stext_offset			// BaseOfCode
+	.long	__efistub_entry - efi_head	// AddressOfEntryPoint
+	.long	__efistub_stext_offset		// BaseOfCode
 
 extra_header_fields:
 	.quad	0				// ImageBase
@@ -162,7 +142,7 @@ extra_header_fields:
 	.long	_end - efi_head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
-	.long	stext_offset			// SizeOfHeaders
+	.long	__efistub_stext_offset		// SizeOfHeaders
 	.long	0				// CheckSum
 	.short	0xa				// Subsystem (EFI application)
 	.short	0				// DllCharacteristics
@@ -207,9 +187,9 @@ section_table:
 	.byte	0
 	.byte	0        		// end of 0 padding of section name
 	.long	_end - stext		// VirtualSize
-	.long	stext_offset		// VirtualAddress
+	.long	__efistub_stext_offset	// VirtualAddress
 	.long	_edata - stext		// SizeOfRawData
-	.long	stext_offset		// PointerToRawData
+	.long	__efistub_stext_offset	// PointerToRawData
 
 	.long	0		// PointerToRelocations (0 for executables)
 	.long	0		// PointerToLineNumbers (0 for executables)
@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args)
  */
 	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
 	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
-	create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS > 3
+	create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
+#endif
+#if SWAPPER_PGTABLE_LEVELS > 2
+	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
 #endif
 	.endm
 
@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args)
  * Corrupts:	phys, start, end, pstate
  */
 	.macro	create_block_map, tbl, flags, phys, start, end
-	lsr	\phys, \phys, #BLOCK_SHIFT
-	lsr	\start, \start, #BLOCK_SHIFT
+	lsr	\phys, \phys, #SWAPPER_BLOCK_SHIFT
+	lsr	\start, \start, #SWAPPER_BLOCK_SHIFT
 	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
-	lsr	\end, \end, #BLOCK_SHIFT
+	orr	\phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT	// table entry
+	lsr	\end, \end, #SWAPPER_BLOCK_SHIFT
 	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
 9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
 	add	\start, \start, #1			// next entry
-	add	\phys, \phys, #BLOCK_SIZE		// next block
+	add	\phys, \phys, #SWAPPER_BLOCK_SIZE		// next block
 	cmp	\start, \end
 	b.ls	9999b
 	.endm
@@ -350,7 +333,7 @@ __create_page_tables:
 	cmp	x0, x6
 	b.lo	1b
 
-	ldr	x7, =MM_MMUFLAGS
+	ldr	x7, =SWAPPER_MM_MMUFLAGS
 
 	/*
 	 * Create the identity mapping.
@@ -444,6 +427,9 @@ __mmap_switched:
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
@@ -498,6 +484,8 @@ CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	orr	x0, x0, #ICC_SRE_EL2_ENABLE	// Set ICC_SRE_EL2.Enable==1
 	msr_s	ICC_SRE_EL2, x0
 	isb					// Make sure SRE is now set
+	mrs_s	x0, ICC_SRE_EL2			// Read SRE back,
+	tbz	x0, #0, 3f			// and check that it sticks
 	msr_s	ICH_HCR_EL2, xzr		// Reset ICC_HCR_EL2 to defaults
 
 3:
@@ -628,10 +616,17 @@ ENDPROC(__secondary_switched)
  *  x0  = SCTLR_EL1 value for turning on the MMU.
  *  x27 = *virtual* address to jump to upon completion
  *
- * other registers depend on the function called upon completion
+ * Other registers depend on the function called upon completion.
+ *
+ * Checks if the selected granule size is supported by the CPU.
+ * If it isn't, park the CPU
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
+	mrs	x1, ID_AA64MMFR0_EL1
+	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+	b.ne	__no_granule_support
 	ldr	x5, =vectors
 	msr	vbar_el1, x5
 	msr	ttbr0_el1, x25			// load TTBR0
@@ -649,3 +644,8 @@ __enable_mmu:
 	isb
 	br	x27
 ENDPROC(__enable_mmu)
+
+__no_granule_support:
+	wfe
+	b __no_granule_support
+ENDPROC(__no_granule_support)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index bba85c8f8037..b45c95d34b83 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
 #include <linux/ptrace.h>
 #include <linux/smp.h>
 
+#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
@@ -163,6 +164,20 @@ enum hw_breakpoint_ops {
 	HW_BREAKPOINT_RESTORE
 };
 
+static int is_compat_bp(struct perf_event *bp)
+{
+	struct task_struct *tsk = bp->hw.target;
+
+	/*
+	 * tsk can be NULL for per-cpu (non-ptrace) breakpoints.
+	 * In this case, use the native interface, since we don't have
+	 * the notion of a "compat CPU" and could end up relying on
+	 * deprecated behaviour if we use unaligned watchpoints in
+	 * AArch64 state.
+	 */
+	return tsk && is_compat_thread(task_thread_info(tsk));
+}
+
 /**
  * hw_breakpoint_slot_setup - Find and setup a perf slot according to
  *			      operations
@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 	 * Watchpoints can be of length 1, 2, 4 or 8 bytes.
 	 */
 	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
-		if (is_compat_task()) {
+		if (is_compat_bp(bp)) {
 			if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
 			    info->ctrl.len != ARM_BREAKPOINT_LEN_4)
 				return -EINVAL;
@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * AArch32 tasks expect some simple alignment fixups, so emulate
 	 * that here.
 	 */
-	if (is_compat_task()) {
+	if (is_compat_bp(bp)) {
 		if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
 			alignment_mask = 0x7;
 		else
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 8fae0756e175..bc2abb8b1599 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -47,7 +47,10 @@
 #define __HEAD_FLAG_BE	0
 #endif
 
-#define __HEAD_FLAGS	(__HEAD_FLAG_BE << 0)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAGS	((__HEAD_FLAG_BE << 0) |	\
+			 (__HEAD_FLAG_PAGE_SIZE << 1))
 
 /*
  * These will output as part of the Image header, which should be little-endian
@@ -59,4 +62,37 @@
 	_kernel_offset_le	= DATA_LE64(TEXT_OFFSET);	\
 	_kernel_flags_le	= DATA_LE64(__HEAD_FLAGS);
 
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp		= __pi_memcmp;
+__efistub_memchr		= __pi_memchr;
+__efistub_memcpy		= __pi_memcpy;
+__efistub_memmove		= __pi_memmove;
+__efistub_memset		= __pi_memset;
+__efistub_strlen		= __pi_strlen;
+__efistub_strcmp		= __pi_strcmp;
+__efistub_strncmp		= __pi_strncmp;
+__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+
+#ifdef CONFIG_KASAN
+__efistub___memcpy		= __pi_memcpy;
+__efistub___memmove		= __pi_memmove;
+__efistub___memset		= __pi_memset;
+#endif
+
+__efistub__text			= _text;
+__efistub__end			= _end;
+__efistub__edata		= _edata;
+
+#endif
+
 #endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index f341866aa810..c08b9ad6f429 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -85,7 +85,7 @@ bool aarch64_insn_is_branch_imm(u32 insn)
 		aarch64_insn_is_bcond(insn));
 }
 
-static DEFINE_SPINLOCK(patch_lock);
+static DEFINE_RAW_SPINLOCK(patch_lock);
 
 static void __kprobes *patch_map(void *addr, int fixmap)
 {
@@ -131,13 +131,13 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
 	unsigned long flags = 0;
 	int ret;
 
-	spin_lock_irqsave(&patch_lock, flags);
+	raw_spin_lock_irqsave(&patch_lock, flags);
 	waddr = patch_map(addr, FIX_TEXT_POKE0);
 
 	ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
 
 	patch_unmap(FIX_TEXT_POKE0);
-	spin_unlock_irqrestore(&patch_lock, flags);
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 11dc3fd47853..9f17ec071ee0 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
-#include <linux/ratelimit.h>
 
 unsigned long irq_err_count;
 
@@ -54,64 +53,3 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
-
-#ifdef CONFIG_HOTPLUG_CPU
-static bool migrate_one_irq(struct irq_desc *desc)
-{
-	struct irq_data *d = irq_desc_get_irq_data(desc);
-	const struct cpumask *affinity = irq_data_get_affinity_mask(d);
-	struct irq_chip *c;
-	bool ret = false;
-
-	/*
-	 * If this is a per-CPU interrupt, or the affinity does not
-	 * include this CPU, then we have nothing to do.
-	 */
-	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
-		return false;
-
-	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-		affinity = cpu_online_mask;
-		ret = true;
-	}
-
-	c = irq_data_get_irq_chip(d);
-	if (!c->irq_set_affinity)
-		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
-	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
-		cpumask_copy(irq_data_get_affinity_mask(d), affinity);
-
-	return ret;
-}
-
-/*
- * The current CPU has been marked offline.  Migrate IRQs off this CPU.
- * If the affinity settings do not allow other CPUs, force them onto any
- * available CPU.
- *
- * Note: we must iterate over all IRQs, whether they have an attached
- * action structure or not, as we need to get chained interrupts too.
- */
-void migrate_irqs(void)
-{
-	unsigned int i;
-	struct irq_desc *desc;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	for_each_irq_desc(i, desc) {
-		bool affinity_broken;
-
-		raw_spin_lock(&desc->lock);
-		affinity_broken = migrate_one_irq(desc);
-		raw_spin_unlock(&desc->lock);
-
-		if (affinity_broken)
-			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
-					    i, smp_processor_id());
-	}
-
-	local_irq_restore(flags);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 876eb8df50bf..f4bc779e62e8 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/moduleloader.h>
@@ -34,9 +35,18 @@
 
 void *module_alloc(unsigned long size)
 {
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				    GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
-				    NUMA_NO_NODE, __builtin_return_address(0));
+	void *p;
+
+	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				NUMA_NO_NODE, __builtin_return_address(0));
+
+	if (p && (kasan_module_alloc(p, size) < 0)) {
+		vfree(p);
+		return NULL;
+	}
+
+	return p;
 }
 
 enum aarch64_reloc_op {
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f9a74d4fff3b..5b1897e8ca24 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -18,651 +18,12 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/perf_event.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
 
-#include <asm/cputype.h>
-#include <asm/irq.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/*
- * ARMv8 supports a maximum of 32 events.
- * The cycle counter is included in this total.
- */
-#define ARMPMU_MAX_HWEVENTS		32
-
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-int
-armpmu_get_max_events(void)
-{
-	int max_events = 0;
-
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(armpmu_get_max_events);
-
-int perf_num_counters(void)
-{
-	return armpmu_get_max_events();
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-#define HW_OP_UNSUPPORTED		0xFFFF
-
-#define C(_x) \
-	PERF_COUNT_HW_CACHE_##_x
-
-#define CACHE_OP_UNSUPPORTED		0xFFFF
-
-#define PERF_MAP_ALL_UNSUPPORTED					\
-	[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
-
-#define PERF_CACHE_MAP_ALL_UNSUPPORTED					\
-[0 ... C(MAX) - 1] = {							\
-	[0 ... C(OP_MAX) - 1] = {					\
-		[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,	\
-	},								\
-}
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
-				      [PERF_COUNT_HW_CACHE_MAX]
-				      [PERF_COUNT_HW_CACHE_OP_MAX]
-				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
-		       u64 config)
-{
-	unsigned int cache_type, cache_op, cache_result, ret;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-	if (ret == CACHE_OP_UNSUPPORTED)
-		return -ENOENT;
-
-	return ret;
-}
-
-static int
-armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
-	int mapping;
-
-	if (config >= PERF_COUNT_HW_MAX)
-		return -EINVAL;
-
-	mapping = (*event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-	return (int)(config & raw_event_mask);
-}
-
-static int map_cpu_event(struct perf_event *event,
-			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-			 const unsigned (*cache_map)
-					[PERF_COUNT_HW_CACHE_MAX]
-					[PERF_COUNT_HW_CACHE_OP_MAX]
-					[PERF_COUNT_HW_CACHE_RESULT_MAX],
-			 u32 raw_event_mask)
-{
-	u64 config = event->attr.config;
-
-	switch (event->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		return armpmu_map_event(event_map, config);
-	case PERF_TYPE_HW_CACHE:
-		return armpmu_map_cache_event(cache_map, config);
-	case PERF_TYPE_RAW:
-		return armpmu_map_raw_event(raw_event_mask, config);
-	}
-
-	return -ENOENT;
-}
-
-int
-armpmu_event_set_period(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	s64 left = local64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	/*
-	 * Limit the maximum period to prevent the counter value
-	 * from overtaking the one we are about to program. In
-	 * effect we are reducing max_period to account for
-	 * interrupt latency (and we are being very conservative).
-	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
-
-	local64_set(&hwc->prev_count, (u64)-left);
-
-	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
-
-	perf_event_update_userpage(event);
-
-	return ret;
-}
-
-u64
-armpmu_event_update(struct perf_event *event,
-		    struct hw_perf_event *hwc,
-		    int idx)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	u64 delta, prev_raw_count, new_raw_count;
-
-again:
-	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(idx);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
-	local64_add(delta, &event->count);
-	local64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	/* Don't read disabled counters! */
-	if (hwc->idx < 0)
-		return;
-
-	armpmu_event_update(event, hwc, hwc->idx);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to update the counter, so ignore
-	 * PERF_EF_UPDATE, see comments in armpmu_start().
-	 */
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(hwc, hwc->idx);
-		barrier(); /* why? */
-		armpmu_event_update(event, hwc, hwc->idx);
-		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	}
-}
-
-static void
-armpmu_start(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to reprogram the period, so ignore
-	 * PERF_EF_RELOAD, see the comment below.
-	 */
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-	hwc->state = 0;
-	/*
-	 * Set the period again. Some counters can't be stopped, so when we
-	 * were stopped we simply disabled the IRQ source and the counter
-	 * may have been left counting. If we don't do this step then we may
-	 * get an interrupt too soon or *way* too late if the overflow has
-	 * happened since disabling.
-	 */
-	armpmu_event_set_period(event, hwc, hwc->idx);
-	armpmu->enable(hwc, hwc->idx);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	WARN_ON(idx < 0);
-
-	armpmu_stop(event, PERF_EF_UPDATE);
-	hw_events->events[idx] = NULL;
-	clear_bit(idx, hw_events->used_mask);
-
-	perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-	int err = 0;
-
-	perf_pmu_disable(event->pmu);
-
-	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, hwc);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
-
-	/*
-	 * If there is an event in the counter we are going to use then make
-	 * sure it is disabled.
-	 */
-	event->hw.idx = idx;
-	armpmu->disable(hwc, idx);
-	hw_events->events[idx] = event;
-
-	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	if (flags & PERF_EF_START)
-		armpmu_start(event, PERF_EF_RELOAD);
-
-	/* Propagate our changes to the userspace mapping. */
-	perf_event_update_userpage(event);
-
-out:
-	perf_pmu_enable(event->pmu);
-	return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-				struct perf_event *event)
-{
-	struct arm_pmu *armpmu;
-	struct hw_perf_event fake_event = event->hw;
-	struct pmu *leader_pmu = event->group_leader->pmu;
-
-	if (is_software_event(event))
-		return 1;
-
-	/*
-	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
-	 * core perf code won't check that the pmu->ctx == leader->ctx
-	 * until after pmu->event_init(event).
-	 */
-	if (event->pmu != pmu)
-		return 0;
-
-	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
-		return 1;
-
-	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-		return 1;
-
-	armpmu = to_arm_pmu(event->pmu);
-	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct pmu_hw_events fake_pmu;
-	DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
-
-	/*
-	 * Initialise the fake PMU. We only need to populate the
-	 * used_mask for the purposes of validation.
-	 */
-	memset(fake_used_mask, 0, sizeof(fake_used_mask));
-	fake_pmu.used_mask = fake_used_mask;
-
-	if (!validate_event(event->pmu, &fake_pmu, leader))
-		return -EINVAL;
-
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(event->pmu, &fake_pmu, sibling))
-			return -EINVAL;
-	}
-
-	if (!validate_event(event->pmu, &fake_pmu, event))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void
-armpmu_disable_percpu_irq(void *data)
-{
-	unsigned int irq = *(unsigned int *)data;
-	disable_percpu_irq(irq);
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-	int irq;
-	unsigned int i, irqs;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (!irqs)
-		return;
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq <= 0)
-		return;
-
-	if (irq_is_percpu(irq)) {
-		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &cpu_hw_events);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (armpmu->irq_affinity)
-				cpu = armpmu->irq_affinity[i];
-
-			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq > 0)
-				free_irq(irq, armpmu);
-		}
-	}
-}
-
-static void
-armpmu_enable_percpu_irq(void *data)
-{
-	unsigned int irq = *(unsigned int *)data;
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-	int err, irq;
-	unsigned int i, irqs;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (!irqs) {
-		pr_err("no irqs for PMUs defined\n");
-		return -ENODEV;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq <= 0) {
-		pr_err("failed to get valid irq for PMU device\n");
-		return -ENODEV;
-	}
-
-	if (irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, armpmu->handle_irq,
-				"arm-pmu", &cpu_hw_events);
-
-		if (err) {
-			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
-					irq);
-			armpmu_release_hardware(armpmu);
-			return err;
-		}
-
-		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq <= 0)
-				continue;
-
-			if (armpmu->irq_affinity)
-				cpu = armpmu->irq_affinity[i];
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-						irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, armpmu->handle_irq,
-					IRQF_NOBALANCING | IRQF_NO_THREAD,
-					"arm-pmu", armpmu);
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-						irq);
-				armpmu_release_hardware(armpmu);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &armpmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	atomic_t *active_events	 = &armpmu->active_events;
-	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-		armpmu_release_hardware(armpmu);
-		mutex_unlock(pmu_reserve_mutex);
-	}
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-	return attr->exclude_idle || attr->exclude_user ||
-	       attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int mapping, err;
-
-	mapping = armpmu->map_event(event);
-
-	if (mapping < 0) {
-		pr_debug("event %x:%llx not supported\n", event->attr.type,
-			 event->attr.config);
-		return mapping;
-	}
-
-	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
-	 */
-	hwc->idx		= -1;
-	hwc->config_base	= 0;
-	hwc->config		= 0;
-	hwc->event_base		= 0;
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!armpmu->set_event_filter ||
-	     armpmu->set_event_filter(hwc, &event->attr)) &&
-	     event_requires_mode_exclusion(&event->attr)) {
-		pr_debug("ARM performance counters do not support mode exclusion\n");
-		return -EPERM;
-	}
-
-	/*
-	 * Store the event encoding into the config_base field.
-	 */
-	hwc->config_base	    |= (unsigned long)mapping;
-
-	if (!hwc->sample_period) {
-		/*
-		 * For non-sampling runs, limit the sample_period to half
-		 * of the counter width. That way, the new counter value
-		 * is far less likely to overtake the previous one unless
-		 * you have some serious IRQ latency issues.
-		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
-		hwc->last_period    = hwc->sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	err = 0;
-	if (event->group_leader != event) {
-		err = validate_group(event);
-		if (err)
-			return -EINVAL;
-	}
-
-	return err;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &armpmu->active_events;
-
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
-	event->destroy = hw_perf_event_destroy;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		mutex_lock(&armpmu->reserve_mutex);
-		if (atomic_read(active_events) == 0)
-			err = armpmu_reserve_hardware(armpmu);
-
-		if (!err)
-			atomic_inc(active_events);
-		mutex_unlock(&armpmu->reserve_mutex);
-	}
 
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-	if (err)
-		hw_perf_event_destroy(event);
-
-	return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
-	if (enabled)
-		armpmu->start();
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	armpmu->stop();
-}
-
-static void __init armpmu_init(struct arm_pmu *armpmu)
-{
-	atomic_set(&armpmu->active_events, 0);
-	mutex_init(&armpmu->reserve_mutex);
-
-	armpmu->pmu = (struct pmu) {
-		.pmu_enable	= armpmu_enable,
-		.pmu_disable	= armpmu_disable,
-		.event_init	= armpmu_event_init,
-		.add		= armpmu_add,
-		.del		= armpmu_del,
-		.start		= armpmu_start,
-		.stop		= armpmu_stop,
-		.read		= armpmu_read,
-	};
-}
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
-{
-	armpmu_init(armpmu);
-	return perf_pmu_register(&armpmu->pmu, name, type);
-}
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
@@ -708,6 +69,21 @@ enum armv8_pmuv3_perf_types {
 	ARMV8_PMUV3_PERFCTR_BUS_CYCLES				= 0x1D,
 };
 
+/* ARMv8 Cortex-A53 specific event types. */
+enum armv8_a53_pmu_perf_types {
+	ARMV8_A53_PERFCTR_PREFETCH_LINEFILL			= 0xC2,
+};
+
+/* ARMv8 Cortex-A57 specific event types. */
+enum armv8_a57_perf_types {
+	ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD			= 0x40,
+	ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST			= 0x41,
+	ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD			= 0x42,
+	ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST			= 0x43,
+	ARMV8_A57_PERFCTR_DTLB_REFILL_LD			= 0x4c,
+	ARMV8_A57_PERFCTR_DTLB_REFILL_ST			= 0x4d,
+};
+
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
@@ -718,6 +94,28 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+/* ARM Cortex-A53 HW events mapping. */
+static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
+static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -734,12 +132,60 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+
 /*
  * Perf Events' indices
  */
 #define	ARMV8_IDX_CYCLE_COUNTER	0
 #define	ARMV8_IDX_COUNTER0	1
-#define	ARMV8_IDX_COUNTER_LAST	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define	ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
+	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 #define	ARMV8_MAX_COUNTERS	32
 #define	ARMV8_COUNTER_MASK	(ARMV8_MAX_COUNTERS - 1)
@@ -805,49 +251,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
 	return pmovsr & ARMV8_OVERFLOWED_MASK;
 }
 
-static inline int armv8pmu_counter_valid(int idx)
+static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
 {
-	return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
+	return idx >= ARMV8_IDX_CYCLE_COUNTER &&
+		idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
 }
 
 static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 {
-	int ret = 0;
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), idx);
-	} else {
-		counter = ARMV8_IDX_TO_COUNTER(idx);
-		ret = pmnc & BIT(counter);
-	}
-
-	return ret;
+	return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
 static inline int armv8pmu_select_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u selecting wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmselr_el0, %0" :: "r" (counter));
 	isb();
 
 	return idx;
 }
 
-static inline u32 armv8pmu_read_counter(int idx)
+static inline u32 armv8pmu_read_counter(struct perf_event *event)
 {
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv8pmu_counter_valid(idx))
+	if (!armv8pmu_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -858,9 +289,13 @@ static inline u32 armv8pmu_read_counter(int idx)
 	return value;
 }
 
-static inline void armv8pmu_write_counter(int idx, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
 {
-	if (!armv8pmu_counter_valid(idx))
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!armv8pmu_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -879,65 +314,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val)
 
 static inline int armv8pmu_enable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_disable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_enable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_disable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
 	asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
 	isb();
+
 	return idx;
 }
 
@@ -955,10 +359,13 @@ static inline u32 armv8pmu_getreset_flags(void)
 	return value;
 }
 
-static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
@@ -989,10 +396,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
 
 	/*
 	 * Disable counter and interrupt
@@ -1016,7 +426,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 {
 	u32 pmovsr;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -1036,7 +447,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	cpuc = this_cpu_ptr(&cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -1053,13 +463,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -1074,10 +484,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void armv8pmu_start(void)
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
@@ -1085,10 +495,10 @@ static void armv8pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_stop(void)
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
@@ -1097,10 +507,12 @@ static void armv8pmu_stop(void)
 }
 
 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
-				  struct hw_perf_event *event)
+				  struct perf_event *event)
 {
 	int idx;
-	unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
@@ -1151,11 +563,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 
 static void armv8pmu_reset(void *info)
 {
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
 	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
-		armv8pmu_disable_event(NULL, idx);
+	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv8pmu_disable_counter(idx);
+		armv8pmu_disable_intens(idx);
+	}
 
 	/* Initialize & Reset PMNC: C and P bits. */
 	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
@@ -1166,169 +581,104 @@ static void armv8pmu_reset(void *info)
 
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
-	return map_cpu_event(event, &armv8_pmuv3_perf_map,
+	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				&armv8_pmuv3_perf_cache_map,
 				ARMV8_EVTYPE_EVENT);
 }
 
-static struct arm_pmu armv8pmu = {
-	.handle_irq		= armv8pmu_handle_irq,
-	.enable			= armv8pmu_enable_event,
-	.disable		= armv8pmu_disable_event,
-	.read_counter		= armv8pmu_read_counter,
-	.write_counter		= armv8pmu_write_counter,
-	.get_event_idx		= armv8pmu_get_event_idx,
-	.start			= armv8pmu_start,
-	.stop			= armv8pmu_stop,
-	.reset			= armv8pmu_reset,
-	.max_period		= (1LLU << 32) - 1,
-};
+static int armv8_a53_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv8_a53_perf_map,
+				&armv8_a53_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
+}
 
-static u32 __init armv8pmu_read_num_pmnc_events(void)
+static int armv8_a57_map_event(struct perf_event *event)
 {
-	u32 nb_cnt;
+	return armpmu_map_event(event, &armv8_a57_perf_map,
+				&armv8_a57_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
+}
+
+static void armv8pmu_read_num_pmnc_events(void *info)
+{
+	int *nb_cnt = info;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+	*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
 
-	/* Add the CPU cycles counter and return */
-	return nb_cnt + 1;
+	/* Add the CPU cycles counter */
+	*nb_cnt += 1;
 }
 
-static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
+static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
 {
-	armv8pmu.name			= "arm/armv8-pmuv3";
-	armv8pmu.map_event		= armv8_pmuv3_map_event;
-	armv8pmu.num_events		= armv8pmu_read_num_pmnc_events();
-	armv8pmu.set_event_filter	= armv8pmu_set_event_filter;
-	return &armv8pmu;
+	return smp_call_function_any(&arm_pmu->supported_cpus,
+				    armv8pmu_read_num_pmnc_events,
+				    &arm_pmu->num_events, 1);
 }
 
-/*
- * Ensure the PMU has sane values out of reset.
- * This requires SMP to be available, so exists as a separate initcall.
- */
-static int __init
-cpu_pmu_reset(void)
+static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	if (cpu_pmu && cpu_pmu->reset)
-		return on_each_cpu(cpu_pmu->reset, NULL, 1);
-	return 0;
+	cpu_pmu->handle_irq		= armv8pmu_handle_irq,
+	cpu_pmu->enable			= armv8pmu_enable_event,
+	cpu_pmu->disable		= armv8pmu_disable_event,
+	cpu_pmu->read_counter		= armv8pmu_read_counter,
+	cpu_pmu->write_counter		= armv8pmu_write_counter,
+	cpu_pmu->get_event_idx		= armv8pmu_get_event_idx,
+	cpu_pmu->start			= armv8pmu_start,
+	cpu_pmu->stop			= armv8pmu_stop,
+	cpu_pmu->reset			= armv8pmu_reset,
+	cpu_pmu->max_period		= (1LLU << 32) - 1,
+	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
 }
-arch_initcall(cpu_pmu_reset);
-
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id armpmu_of_device_ids[] = {
-	{.compatible = "arm,armv8-pmuv3"},
-	{},
-};
 
-static int armpmu_device_probe(struct platform_device *pdev)
+static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 {
-	int i, irq, *irqs;
-
-	if (!cpu_pmu)
-		return -ENODEV;
-
-	/* Don't bother with PPIs; they're already affine */
-	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0 && irq_is_percpu(irq))
-		goto out;
-
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
-	for (i = 0; i < pdev->num_resources; ++i) {
-		struct device_node *dn;
-		int cpu;
-
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
-				      i);
-		if (!dn) {
-			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(pdev->dev.of_node), i);
-			break;
-		}
-
-		for_each_possible_cpu(cpu)
-			if (dn == of_cpu_device_node_get(cpu))
-				break;
-
-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			of_node_put(dn);
-			break;
-		}
-		of_node_put(dn);
-
-		irqs[i] = cpu;
-	}
-
-	if (i == pdev->num_resources)
-		cpu_pmu->irq_affinity = irqs;
-	else
-		kfree(irqs);
-
-out:
-	cpu_pmu->plat_device = pdev;
-	return 0;
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_pmuv3";
+	cpu_pmu->map_event		= armv8_pmuv3_map_event;
+	return armv8pmu_probe_num_events(cpu_pmu);
 }
 
-static struct platform_driver armpmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
-	},
-	.probe		= armpmu_device_probe,
-};
-
-static int __init register_pmu_driver(void)
+static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return platform_driver_register(&armpmu_driver);
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_cortex_a53";
+	cpu_pmu->map_event		= armv8_a53_map_event;
+	return armv8pmu_probe_num_events(cpu_pmu);
 }
-device_initcall(register_pmu_driver);
 
-static struct pmu_hw_events *armpmu_get_cpu_events(void)
+static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return this_cpu_ptr(&cpu_hw_events);
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_cortex_a57";
+	cpu_pmu->map_event		= armv8_a57_map_event;
+	return armv8pmu_probe_num_events(cpu_pmu);
 }
 
-static void __init cpu_pmu_init(struct arm_pmu *armpmu)
-{
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-		events->events = per_cpu(hw_events, cpu);
-		events->used_mask = per_cpu(used_mask, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-	}
-	armpmu->get_hw_events = armpmu_get_cpu_events;
-}
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
+	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
+	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
+	{},
+};
 
-static int __init init_hw_perf_events(void)
+static int armv8_pmu_device_probe(struct platform_device *pdev)
 {
-	u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-
-	switch ((dfr >> 8) & 0xf) {
-	case 0x1:	/* PMUv3 */
-		cpu_pmu = armv8_pmuv3_pmu_init();
-		break;
-	}
+	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+}
 
-	if (cpu_pmu) {
-		pr_info("enabled with %s PMU driver, %d counters available\n",
-			cpu_pmu->name, cpu_pmu->num_events);
-		cpu_pmu_init(cpu_pmu);
-		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
-	} else {
-		pr_info("no hardware support available\n");
-	}
+static struct platform_driver armv8_pmu_driver = {
+	.driver		= {
+		.name	= "armv8-pmu",
+		.of_match_table = armv8_pmu_of_device_ids,
+	},
+	.probe		= armv8_pmu_device_probe,
+};
 
-	return 0;
+static int __init register_armv8_pmu_driver(void)
+{
+	return platform_driver_register(&armv8_pmu_driver);
 }
-early_initcall(init_hw_perf_events);
-
+device_initcall(register_armv8_pmu_driver);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 223b093c9440..f75b540bc3b4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -44,6 +44,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/personality.h>
 #include <linux/notifier.h>
+#include <trace/events/power.h>
 
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
 	 * This should do all the clock switching and wait for interrupt
 	 * tricks
 	 */
+	trace_cpu_idle_rcuidle(1, smp_processor_id());
 	cpu_do_idle();
 	local_irq_enable();
+	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 6bab21f84a9f..8119479147db 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -28,7 +28,6 @@
 #include <linux/console.h>
 #include <linux/cache.h>
 #include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
@@ -44,7 +43,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/efi.h>
-#include <linux/personality.h>
 #include <linux/psci.h>
 
 #include <asm/acpi.h>
@@ -54,6 +52,7 @@
 #include <asm/elf.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
+#include <asm/kasan.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -64,23 +63,6 @@
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
 
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
-
-#ifdef CONFIG_COMPAT
-#define COMPAT_ELF_HWCAP_DEFAULT	\
-				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
-				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
-				 COMPAT_HWCAP_LPAE)
-unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
-unsigned int compat_elf_hwcap2 __read_mostly;
-#endif
-
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void)
 	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
-static void __init setup_processor(void)
-{
-	u64 features;
-	s64 block;
-	u32 cwg;
-	int cls;
-
-	printk("CPU: AArch64 Processor [%08x] revision %d\n",
-	       read_cpuid_id(), read_cpuid_id() & 15);
-
-	sprintf(init_utsname()->machine, ELF_PLATFORM);
-	elf_hwcap = 0;
-
-	cpuinfo_store_boot_cpu();
-
-	/*
-	 * Check for sane CTR_EL0.CWG value.
-	 */
-	cwg = cache_type_cwg();
-	cls = cache_line_size();
-	if (!cwg)
-		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
-			cls);
-	if (L1_CACHE_BYTES < cls)
-		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
-			L1_CACHE_BYTES, cls);
-
-	/*
-	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
-	 * The blocks we test below represent incremental functionality
-	 * for non-negative values. Negative values are reserved.
-	 */
-	features = read_cpuid(ID_AA64ISAR0_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_PMULL;
-		case 1:
-			elf_hwcap |= HWCAP_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		elf_hwcap |= HWCAP_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		elf_hwcap |= HWCAP_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		elf_hwcap |= HWCAP_CRC32;
-
-	block = cpuid_feature_extract_field(features, 20);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_ATOMICS;
-		case 1:
-			/* RESERVED */
-		case 0:
-			break;
-		}
-	}
-
-#ifdef CONFIG_COMPAT
-	/*
-	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
-	 * the AArch32 32-bit execution state.
-	 */
-	features = read_cpuid(ID_ISAR5_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
-		case 1:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
-#endif
-}
-
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
 	void *dt_virt = fixmap_remap_fdt(dt_phys);
@@ -364,6 +248,8 @@ static void __init relocate_initrd(void)
 		to_free = ram_end - orig_start;
 
 	size = orig_end - orig_start;
+	if (!size)
+		return;
 
 	/* initrd needs to be relocated completely inside linear mapping */
 	new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn),
@@ -404,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
 {
-	setup_processor();
+	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
 
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
@@ -434,6 +321,9 @@ void __init setup_arch(char **cmdline_p)
 
 	paging_init();
 	relocate_initrd();
+
+	kasan_init();
+
 	request_standard_resources();
 
 	early_ioremap_reset();
@@ -491,124 +381,3 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
-
-static const char *hwcap_str[] = {
-	"fp",
-	"asimd",
-	"evtstrm",
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	"atomics",
-	NULL
-};
-
-#ifdef CONFIG_COMPAT
-static const char *compat_hwcap_str[] = {
-	"swp",
-	"half",
-	"thumb",
-	"26bit",
-	"fastmult",
-	"fpa",
-	"vfp",
-	"edsp",
-	"java",
-	"iwmmxt",
-	"crunch",
-	"thumbee",
-	"neon",
-	"vfpv3",
-	"vfpv3d16",
-	"tls",
-	"vfpv4",
-	"idiva",
-	"idivt",
-	"vfpd32",
-	"lpae",
-	"evtstrm"
-};
-
-static const char *compat_hwcap2_str[] = {
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	NULL
-};
-#endif /* CONFIG_COMPAT */
-
-static int c_show(struct seq_file *m, void *v)
-{
-	int i, j;
-
-	for_each_online_cpu(i) {
-		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
-		u32 midr = cpuinfo->reg_midr;
-
-		/*
-		 * glibc reads /proc/cpuinfo to determine the number of
-		 * online processors, looking for lines beginning with
-		 * "processor".  Give glibc what it expects.
-		 */
-		seq_printf(m, "processor\t: %d\n", i);
-
-		/*
-		 * Dump out the common processor features in a single line.
-		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
-		 * rather than attempting to parse this, but there's a body of
-		 * software which does already (at least for 32-bit).
-		 */
-		seq_puts(m, "Features\t:");
-		if (personality(current->personality) == PER_LINUX32) {
-#ifdef CONFIG_COMPAT
-			for (j = 0; compat_hwcap_str[j]; j++)
-				if (compat_elf_hwcap & (1 << j))
-					seq_printf(m, " %s", compat_hwcap_str[j]);
-
-			for (j = 0; compat_hwcap2_str[j]; j++)
-				if (compat_elf_hwcap2 & (1 << j))
-					seq_printf(m, " %s", compat_hwcap2_str[j]);
-#endif /* CONFIG_COMPAT */
-		} else {
-			for (j = 0; hwcap_str[j]; j++)
-				if (elf_hwcap & (1 << j))
-					seq_printf(m, " %s", hwcap_str[j]);
-		}
-		seq_puts(m, "\n");
-
-		seq_printf(m, "CPU implementer\t: 0x%02x\n",
-			   MIDR_IMPLEMENTOR(midr));
-		seq_printf(m, "CPU architecture: 8\n");
-		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
-		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
-		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
-	}
-
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start	= c_start,
-	.next	= c_next,
-	.stop	= c_stop,
-	.show	= c_show
-};
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dbdaacddd9a5..2bbdc0e4fd14 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
-	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-	printk("CPU%u: Booted secondary processor\n", cpu);
 
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
+	local_flush_tlb_all();
 	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
 
+	/*
+	 * If the system has established the capabilities, make sure
+	 * this CPU ticks all of those. If it doesn't, the CPU will
+	 * fail to come online.
+	 */
+	verify_local_cpu_capabilities();
+
 	if (cpu_ops[cpu]->cpu_postboot)
 		cpu_ops[cpu]->cpu_postboot();
 
@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void)
 	 * the CPU migration code to notice that the CPU is online
 	 * before we continue.
 	 */
+	pr_info("CPU%u: Booted secondary processor [%08x]\n",
+					 cpu, read_cpuid_id());
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
@@ -232,12 +239,7 @@ int __cpu_disable(void)
 	/*
 	 * OK - migrate IRQs away from this CPU
 	 */
-	migrate_irqs();
-
-	/*
-	 * Remove this CPU from the vm mask set of all processes.
-	 */
-	clear_tasks_mm_cpumask(cpu);
+	irq_migrate_all_off_this_cpu();
 
 	return 0;
 }
@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+	setup_cpu_features();
 	hyp_mode_check();
 	apply_alternatives_all();
 }
 
 void __init smp_prepare_boot_cpu(void)
 {
+	cpuinfo_store_boot_cpu();
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 407991bf79f5..ccb6078ed9f2 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -48,11 +48,7 @@ int notrace unwind_frame(struct stackframe *frame)
 
 	frame->sp = fp + 0x10;
 	frame->fp = *(unsigned long *)(fp);
-	/*
-	 * -4 here because we care about the PC at time of bl,
-	 * not where the return will go.
-	 */
-	frame->pc = *(unsigned long *)(fp + 8) - 4;
+	frame->pc = *(unsigned long *)(fp + 8);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 8297d502217e..40f7b33a22da 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -80,17 +80,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	if (ret == 0) {
 		/*
 		 * We are resuming from reset with TTBR0_EL1 set to the
-		 * idmap to enable the MMU; restore the active_mm mappings in
-		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
-		 * the thread entered cpu_suspend with TTBR0_EL1 set to
-		 * reserved TTBR0 page tables and should be restored as such.
+		 * idmap to enable the MMU; set the TTBR0 to the reserved
+		 * page tables to prevent speculative TLB allocations, flush
+		 * the local tlb and set the default tcr_el1.t0sz so that
+		 * the TTBR0 address space set-up is properly restored.
+		 * If the current active_mm != &init_mm we entered cpu_suspend
+		 * with mappings in TTBR0 that must be restored, so we switch
+		 * them back to complete the address space configuration
+		 * restoration before returning.
 		 */
-		if (mm == &init_mm)
-			cpu_set_reserved_ttbr0();
-		else
-			cpu_switch_mm(mm->pgd, mm);
+		cpu_set_reserved_ttbr0();
+		local_flush_tlb_all();
+		cpu_set_default_tcr_t0sz();
 
-		flush_tlb_all();
+		if (mm != &init_mm)
+			cpu_switch_mm(mm->pgd, mm);
 
 		/*
 		 * Restore per-cpu offset before any kernel
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 149151fb42bb..13339b6ffc1a 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -67,16 +67,10 @@ void __init time_init(void)
 	u32 arch_timer_rate;
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 
 	tick_setup_hrtimer_broadcast();
 
-	/*
-	 * Since ACPI or FDT will only one be available in the system,
-	 * we can use acpi_generic_timer_init() here safely
-	 */
-	acpi_generic_timer_init();
-
 	arch_timer_rate = arch_timer_get_rate();
 	if (!arch_timer_rate)
 		panic("Unable to initialise architected timer.\n");
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f93aae5e4307..e9b9b5364393 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 	set_fs(fs);
 }
 
-static void dump_backtrace_entry(unsigned long where, unsigned long stack)
+static void dump_backtrace_entry(unsigned long where)
 {
+	/*
+	 * Note that 'where' can have a physical address, but it's not handled.
+	 */
 	print_ip_sym(where);
-	if (in_exception_text(where))
-		dump_mem("", "Exception stack", stack,
-			 stack + sizeof(struct pt_regs), false);
 }
 
 static void dump_instr(const char *lvl, struct pt_regs *regs)
@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	pr_emerg("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
+		unsigned long stack;
 		int ret;
 
+		dump_backtrace_entry(where);
 		ret = unwind_frame(&frame);
 		if (ret < 0)
 			break;
-		dump_backtrace_entry(where, frame.sp);
+		stack = frame.sp;
+		if (in_exception_text(where))
+			dump_mem("", "Exception stack", stack,
+				 stack + sizeof(struct pt_regs), false);
 	}
 }
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d0..1ee2c3937d4e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
 #define ALIGN_DEBUG_RO			. = ALIGN(1<<SECTION_SHIFT);
 #define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
+#elif defined(CONFIG_DEBUG_RODATA)
+#define ALIGN_DEBUG_RO			. = ALIGN(1<<PAGE_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
 #else
 #define ALIGN_DEBUG_RO
 #define ALIGN_DEBUG_RO_MIN(min)		. = ALIGN(min);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 5c7e920e4861..a5272c07d1cb 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -16,9 +16,13 @@ menuconfig VIRTUALIZATION
 
 if VIRTUALIZATION
 
+config KVM_ARM_VGIC_V3
+	bool
+
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
+	depends on !ARM64_16K_PAGES
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
@@ -31,8 +35,11 @@ config KVM
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
+	select KVM_ARM_VGIC_V3
 	---help---
 	  Support hosting virtualized guest machines.
+	  We don't support KVM with 16K page tables yet, due to the multiple
+	  levels of fake page tables.
 
 	  If unsure, say N.
 
@@ -41,4 +48,6 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
+source drivers/vhost/Kconfig
+
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index e5836138ec42..1599701ef044 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -880,6 +880,14 @@ __kvm_hyp_panic:
 
 	bl __restore_sysregs
 
+	/*
+	 * Make sure we have a valid host stack, and don't leave junk in the
+	 * frame pointer that will give us a misleading host stack unwinding.
+	 */
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	msr	sp_el1, x22
+	mov	x29, xzr
+
 1:	adr	x0, __hyp_panic_str
 	adr	x1, 2f
 	ldp	x2, x3, [x1]
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 91cf5350b328..f34745cb3d23 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
 {
 	u64 pfr0;
 
-	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
 	return !!(pfr0 & 0x20);
 }
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d03d3af17e7e..87a64e8db04c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-		u32 el3 = !!((pfr >> 12) & 0xf);
+		u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
+		u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
+		u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
-		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
-					  (((dfr >> 12) & 0xf) << 24) |
-					  (((dfr >> 28) & 0xf) << 20) |
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+					  (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+					  (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
 					  (6 << 16) | (el3 << 14) | (el3 << 12));
 		return true;
 	}
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 1be9ef27be97..4699cd74f87e 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -31,49 +32,58 @@
  * Returns:
  *	x0 - bytes not copied
  */
+
+	.macro ldrb1 ptr, regB, val
+	USER(9998f, ldrb  \ptr, [\regB], \val)
+	.endm
+
+	.macro strb1 ptr, regB, val
+	strb \ptr, [\regB], \val
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	USER(9998f, ldrh  \ptr, [\regB], \val)
+	.endm
+
+	.macro strh1 ptr, regB, val
+	strh \ptr, [\regB], \val
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	USER(9998f, ldr \ptr, [\regB], \val)
+	.endm
+
+	.macro str1 ptr, regB, val
+	str \ptr, [\regB], \val
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	stp \ptr, \regB, [\regC], \val
+	.endm
+
+end	.req	x5
 ENTRY(__copy_from_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x1, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-USER(9f, ldp	x3, x4, [x1], #16)
-	subs	x2, x2, #16
-	stp	x3, x4, [x0], #16
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-USER(9f, ldr	x3, [x1], #8	)
-	sub	x2, x2, #8
-	str	x3, [x0], #8
-2:	adds	x2, x2, #4
-	b.mi	3f
-USER(9f, ldr	w3, [x1], #4	)
-	sub	x2, x2, #4
-	str	w3, [x0], #4
-3:	adds	x2, x2, #2
-	b.mi	4f
-USER(9f, ldrh	w3, [x1], #2	)
-	sub	x2, x2, #2
-	strh	w3, [x0], #2
-4:	adds	x2, x2, #1
-	b.mi	5f
-USER(9f, ldrb	w3, [x1]	)
-	strb	w3, [x0]
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0				// Nothing to copy
 	ret
 ENDPROC(__copy_from_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x2, x5, x1
-	mov	x3, x2
-10:	strb	wzr, [x0], #1			// zero remaining buffer space
-	subs	x3, x3, #1
-	b.ne	10b
-	mov	x0, x2				// bytes not copied
+9998:
+	sub	x0, end, dst
+9999:
+	strb	wzr, [dst], #1			// zero remaining buffer space
+	cmp	dst, end
+	b.lo	9999b
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 1b94661e22b3..81c8fc93c100 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -20,6 +20,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -33,44 +34,52 @@
  * Returns:
  *	x0 - bytes not copied
  */
+	.macro ldrb1 ptr, regB, val
+	USER(9998f, ldrb  \ptr, [\regB], \val)
+	.endm
+
+	.macro strb1 ptr, regB, val
+	USER(9998f, strb \ptr, [\regB], \val)
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	USER(9998f, ldrh  \ptr, [\regB], \val)
+	.endm
+
+	.macro strh1 ptr, regB, val
+	USER(9998f, strh \ptr, [\regB], \val)
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	USER(9998f, ldr \ptr, [\regB], \val)
+	.endm
+
+	.macro str1 ptr, regB, val
+	USER(9998f, str \ptr, [\regB], \val)
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	.endm
+
+end	.req	x5
 ENTRY(__copy_in_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x0, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-USER(9f, ldp	x3, x4, [x1], #16)
-	subs	x2, x2, #16
-USER(9f, stp	x3, x4, [x0], #16)
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-USER(9f, ldr	x3, [x1], #8	)
-	sub	x2, x2, #8
-USER(9f, str	x3, [x0], #8	)
-2:	adds	x2, x2, #4
-	b.mi	3f
-USER(9f, ldr	w3, [x1], #4	)
-	sub	x2, x2, #4
-USER(9f, str	w3, [x0], #4	)
-3:	adds	x2, x2, #2
-	b.mi	4f
-USER(9f, ldrh	w3, [x1], #2	)
-	sub	x2, x2, #2
-USER(9f, strh	w3, [x0], #2	)
-4:	adds	x2, x2, #1
-	b.mi	5f
-USER(9f, ldrb	w3, [x1]	)
-USER(9f, strb	w3, [x0]	)
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0
 	ret
 ENDPROC(__copy_in_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x0, x5, x0			// bytes not copied
+9998:	sub	x0, end, dst			// bytes not copied
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
new file mode 100644
index 000000000000..410fbdb8163f
--- /dev/null
+++ b/arch/arm64/lib/copy_template.S
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * Copy a buffer from src to dest (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ *	x2 - n
+ * Returns:
+ *	x0 - dest
+ */
+dstin	.req	x0
+src	.req	x1
+count	.req	x2
+tmp1	.req	x3
+tmp1w	.req	w3
+tmp2	.req	x4
+tmp2w	.req	w4
+dst	.req	x6
+
+A_l	.req	x7
+A_h	.req	x8
+B_l	.req	x9
+B_h	.req	x10
+C_l	.req	x11
+C_h	.req	x12
+D_l	.req	x13
+D_h	.req	x14
+
+	mov	dst, dstin
+	cmp	count, #16
+	/*When memory length is less than 16, the accessed are not aligned.*/
+	b.lo	.Ltiny15
+
+	neg	tmp2, src
+	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
+	b.eq	.LSrcAligned
+	sub	count, count, tmp2
+	/*
+	* Copy the leading memory data from src to dst in an increasing
+	* address order.By this way,the risk of overwritting the source
+	* memory data is eliminated when the distance between src and
+	* dst is less than 16. The memory accesses here are alignment.
+	*/
+	tbz	tmp2, #0, 1f
+	ldrb1	tmp1w, src, #1
+	strb1	tmp1w, dst, #1
+1:
+	tbz	tmp2, #1, 2f
+	ldrh1	tmp1w, src, #2
+	strh1	tmp1w, dst, #2
+2:
+	tbz	tmp2, #2, 3f
+	ldr1	tmp1w, src, #4
+	str1	tmp1w, dst, #4
+3:
+	tbz	tmp2, #3, .LSrcAligned
+	ldr1	tmp1, src, #8
+	str1	tmp1, dst, #8
+
+.LSrcAligned:
+	cmp	count, #64
+	b.ge	.Lcpy_over64
+	/*
+	* Deal with small copies quickly by dropping straight into the
+	* exit block.
+	*/
+.Ltail63:
+	/*
+	* Copy up to 48 bytes of data. At this point we only need the
+	* bottom 6 bits of count to be accurate.
+	*/
+	ands	tmp1, count, #0x30
+	b.eq	.Ltiny15
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+1:
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+2:
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+.Ltiny15:
+	/*
+	* Prefer to break one ldp/stp into several load/store to access
+	* memory in an increasing address order,rather than to load/store 16
+	* bytes from (src-16) to (dst-16) and to backward the src to aligned
+	* address,which way is used in original cortex memcpy. If keeping
+	* the original memcpy process here, memmove need to satisfy the
+	* precondition that src address is at least 16 bytes bigger than dst
+	* address,otherwise some source data will be overwritten when memove
+	* call memcpy directly. To make memmove simpler and decouple the
+	* memcpy's dependency on memmove, withdrew the original process.
+	*/
+	tbz	count, #3, 1f
+	ldr1	tmp1, src, #8
+	str1	tmp1, dst, #8
+1:
+	tbz	count, #2, 2f
+	ldr1	tmp1w, src, #4
+	str1	tmp1w, dst, #4
+2:
+	tbz	count, #1, 3f
+	ldrh1	tmp1w, src, #2
+	strh1	tmp1w, dst, #2
+3:
+	tbz	count, #0, .Lexitfunc
+	ldrb1	tmp1w, src, #1
+	strb1	tmp1w, dst, #1
+
+	b	.Lexitfunc
+
+.Lcpy_over64:
+	subs	count, count, #128
+	b.ge	.Lcpy_body_large
+	/*
+	* Less than 128 bytes to copy, so handle 64 here and then jump
+	* to the tail.
+	*/
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+	ldp1	B_l, B_h, src, #16
+	ldp1	C_l, C_h, src, #16
+	stp1	B_l, B_h, dst, #16
+	stp1	C_l, C_h, dst, #16
+	ldp1	D_l, D_h, src, #16
+	stp1	D_l, D_h, dst, #16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	b	.Lexitfunc
+
+	/*
+	* Critical loop.  Start at a new cache line boundary.  Assuming
+	* 64 bytes per line this ensures the entire loop is in one line.
+	*/
+	.p2align	L1_CACHE_SHIFT
+.Lcpy_body_large:
+	/* pre-get 64 bytes data. */
+	ldp1	A_l, A_h, src, #16
+	ldp1	B_l, B_h, src, #16
+	ldp1	C_l, C_h, src, #16
+	ldp1	D_l, D_h, src, #16
+1:
+	/*
+	* interlace the load of next 64 bytes data block with store of the last
+	* loaded 64 bytes data.
+	*/
+	stp1	A_l, A_h, dst, #16
+	ldp1	A_l, A_h, src, #16
+	stp1	B_l, B_h, dst, #16
+	ldp1	B_l, B_h, src, #16
+	stp1	C_l, C_h, dst, #16
+	ldp1	C_l, C_h, src, #16
+	stp1	D_l, D_h, dst, #16
+	ldp1	D_l, D_h, src, #16
+	subs	count, count, #64
+	b.ge	1b
+	stp1	A_l, A_h, dst, #16
+	stp1	B_l, B_h, dst, #16
+	stp1	C_l, C_h, dst, #16
+	stp1	D_l, D_h, dst, #16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+.Lexitfunc:
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index a257b47e2dc4..7512bbbc07ac 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -31,44 +32,52 @@
  * Returns:
  *	x0 - bytes not copied
  */
+	.macro ldrb1 ptr, regB, val
+	ldrb  \ptr, [\regB], \val
+	.endm
+
+	.macro strb1 ptr, regB, val
+	USER(9998f, strb \ptr, [\regB], \val)
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	ldrh  \ptr, [\regB], \val
+	.endm
+
+	.macro strh1 ptr, regB, val
+	USER(9998f, strh \ptr, [\regB], \val)
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	ldr \ptr, [\regB], \val
+	.endm
+
+	.macro str1 ptr, regB, val
+	USER(9998f, str \ptr, [\regB], \val)
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	ldp \ptr, \regB, [\regC], \val
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	.endm
+
+end	.req	x5
 ENTRY(__copy_to_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x0, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-	ldp	x3, x4, [x1], #16
-	subs	x2, x2, #16
-USER(9f, stp	x3, x4, [x0], #16)
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-	ldr	x3, [x1], #8
-	sub	x2, x2, #8
-USER(9f, str	x3, [x0], #8	)
-2:	adds	x2, x2, #4
-	b.mi	3f
-	ldr	w3, [x1], #4
-	sub	x2, x2, #4
-USER(9f, str	w3, [x0], #4	)
-3:	adds	x2, x2, #2
-	b.mi	4f
-	ldrh	w3, [x1], #2
-	sub	x2, x2, #2
-USER(9f, strh	w3, [x0], #2	)
-4:	adds	x2, x2, #1
-	b.mi	5f
-	ldrb	w3, [x1]
-USER(9f, strb	w3, [x0]	)
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0
 	ret
 ENDPROC(__copy_to_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x0, x5, x0			// bytes not copied
+9998:	sub	x0, end, dst			// bytes not copied
 	ret
 	.previous
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 8636b7549163..4444c1d25f4b 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -41,4 +41,4 @@ ENTRY(memchr)
 	ret
 2:	mov	x0, #0
 	ret
-ENDPROC(memchr)
+ENDPIPROC(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 6ea0776ba6de..ffbdec00327d 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -255,4 +255,4 @@ CPU_LE( rev	data2, data2 )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPROC(memcmp)
+ENDPIPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 8a9a96d3ddae..67613937711f 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -36,166 +36,42 @@
  * Returns:
  *	x0 - dest
  */
-dstin	.req	x0
-src	.req	x1
-count	.req	x2
-tmp1	.req	x3
-tmp1w	.req	w3
-tmp2	.req	x4
-tmp2w	.req	w4
-tmp3	.req	x5
-tmp3w	.req	w5
-dst	.req	x6
+	.macro ldrb1 ptr, regB, val
+	ldrb  \ptr, [\regB], \val
+	.endm
 
-A_l	.req	x7
-A_h	.req	x8
-B_l	.req	x9
-B_h	.req	x10
-C_l	.req	x11
-C_h	.req	x12
-D_l	.req	x13
-D_h	.req	x14
+	.macro strb1 ptr, regB, val
+	strb \ptr, [\regB], \val
+	.endm
 
-ENTRY(memcpy)
-	mov	dst, dstin
-	cmp	count, #16
-	/*When memory length is less than 16, the accessed are not aligned.*/
-	b.lo	.Ltiny15
+	.macro ldrh1 ptr, regB, val
+	ldrh  \ptr, [\regB], \val
+	.endm
 
-	neg	tmp2, src
-	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
-	b.eq	.LSrcAligned
-	sub	count, count, tmp2
-	/*
-	* Copy the leading memory data from src to dst in an increasing
-	* address order.By this way,the risk of overwritting the source
-	* memory data is eliminated when the distance between src and
-	* dst is less than 16. The memory accesses here are alignment.
-	*/
-	tbz	tmp2, #0, 1f
-	ldrb	tmp1w, [src], #1
-	strb	tmp1w, [dst], #1
-1:
-	tbz	tmp2, #1, 2f
-	ldrh	tmp1w, [src], #2
-	strh	tmp1w, [dst], #2
-2:
-	tbz	tmp2, #2, 3f
-	ldr	tmp1w, [src], #4
-	str	tmp1w, [dst], #4
-3:
-	tbz	tmp2, #3, .LSrcAligned
-	ldr	tmp1, [src],#8
-	str	tmp1, [dst],#8
+	.macro strh1 ptr, regB, val
+	strh \ptr, [\regB], \val
+	.endm
 
-.LSrcAligned:
-	cmp	count, #64
-	b.ge	.Lcpy_over64
-	/*
-	* Deal with small copies quickly by dropping straight into the
-	* exit block.
-	*/
-.Ltail63:
-	/*
-	* Copy up to 48 bytes of data. At this point we only need the
-	* bottom 6 bits of count to be accurate.
-	*/
-	ands	tmp1, count, #0x30
-	b.eq	.Ltiny15
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-1:
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-2:
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-.Ltiny15:
-	/*
-	* Prefer to break one ldp/stp into several load/store to access
-	* memory in an increasing address order,rather than to load/store 16
-	* bytes from (src-16) to (dst-16) and to backward the src to aligned
-	* address,which way is used in original cortex memcpy. If keeping
-	* the original memcpy process here, memmove need to satisfy the
-	* precondition that src address is at least 16 bytes bigger than dst
-	* address,otherwise some source data will be overwritten when memove
-	* call memcpy directly. To make memmove simpler and decouple the
-	* memcpy's dependency on memmove, withdrew the original process.
-	*/
-	tbz	count, #3, 1f
-	ldr	tmp1, [src], #8
-	str	tmp1, [dst], #8
-1:
-	tbz	count, #2, 2f
-	ldr	tmp1w, [src], #4
-	str	tmp1w, [dst], #4
-2:
-	tbz	count, #1, 3f
-	ldrh	tmp1w, [src], #2
-	strh	tmp1w, [dst], #2
-3:
-	tbz	count, #0, .Lexitfunc
-	ldrb	tmp1w, [src]
-	strb	tmp1w, [dst]
+	.macro ldr1 ptr, regB, val
+	ldr \ptr, [\regB], \val
+	.endm
 
-.Lexitfunc:
-	ret
+	.macro str1 ptr, regB, val
+	str \ptr, [\regB], \val
+	.endm
 
-.Lcpy_over64:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/*
-	* Less than 128 bytes to copy, so handle 64 here and then jump
-	* to the tail.
-	*/
-	ldp	A_l, A_h, [src],#16
-	stp	A_l, A_h, [dst],#16
-	ldp	B_l, B_h, [src],#16
-	ldp	C_l, C_h, [src],#16
-	stp	B_l, B_h, [dst],#16
-	stp	C_l, C_h, [dst],#16
-	ldp	D_l, D_h, [src],#16
-	stp	D_l, D_h, [dst],#16
+	.macro ldp1 ptr, regB, regC, val
+	ldp \ptr, \regB, [\regC], \val
+	.endm
 
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
+	.macro stp1 ptr, regB, regC, val
+	stp \ptr, \regB, [\regC], \val
+	.endm
 
-	/*
-	* Critical loop.  Start at a new cache line boundary.  Assuming
-	* 64 bytes per line this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lcpy_body_large:
-	/* pre-get 64 bytes data. */
-	ldp	A_l, A_h, [src],#16
-	ldp	B_l, B_h, [src],#16
-	ldp	C_l, C_h, [src],#16
-	ldp	D_l, D_h, [src],#16
-1:
-	/*
-	* interlace the load of next 64 bytes data block with store of the last
-	* loaded 64 bytes data.
-	*/
-	stp	A_l, A_h, [dst],#16
-	ldp	A_l, A_h, [src],#16
-	stp	B_l, B_h, [dst],#16
-	ldp	B_l, B_h, [src],#16
-	stp	C_l, C_h, [dst],#16
-	ldp	C_l, C_h, [src],#16
-	stp	D_l, D_h, [dst],#16
-	ldp	D_l, D_h, [src],#16
-	subs	count, count, #64
-	b.ge	1b
-	stp	A_l, A_h, [dst],#16
-	stp	B_l, B_h, [dst],#16
-	stp	C_l, C_h, [dst],#16
-	stp	D_l, D_h, [dst],#16
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
+	.weak memcpy
+ENTRY(__memcpy)
+ENTRY(memcpy)
+#include "copy_template.S"
 	ret
-ENDPROC(memcpy)
+ENDPIPROC(memcpy)
+ENDPROC(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index 57b19ea2dad4..a5a4459013b1 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -57,12 +57,14 @@ C_h	.req	x12
 D_l	.req	x13
 D_h	.req	x14
 
+	.weak memmove
+ENTRY(__memmove)
 ENTRY(memmove)
 	cmp	dstin, src
-	b.lo	memcpy
+	b.lo	__memcpy
 	add	tmp1, src, count
 	cmp	dstin, tmp1
-	b.hs	memcpy		/* No overlap.  */
+	b.hs	__memcpy		/* No overlap.  */
 
 	add	dst, dstin, count
 	add	src, src, count
@@ -194,4 +196,5 @@ ENTRY(memmove)
 	tst	count, #0x3f
 	b.ne	.Ltail63
 	ret
-ENDPROC(memmove)
+ENDPIPROC(memmove)
+ENDPROC(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 7c72dfd36b63..f2670a9f218c 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -54,6 +54,8 @@ dst		.req	x8
 tmp3w		.req	w9
 tmp3		.req	x9
 
+	.weak memset
+ENTRY(__memset)
 ENTRY(memset)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
@@ -213,4 +215,5 @@ ENTRY(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-ENDPROC(memset)
+ENDPIPROC(memset)
+ENDPROC(__memset)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 42f828b06c59..471fe61760ef 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -231,4 +231,4 @@ CPU_BE(	orr	syndrome, diff, has_nul )
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
 	ret
-ENDPROC(strcmp)
+ENDPIPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 987b68b9ce44..55ccc8e24c08 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -123,4 +123,4 @@ CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
 	csinv	data1, data1, xzr, le
 	csel	data2, data2, data2a, le
 	b	.Lrealigned
-ENDPROC(strlen)
+ENDPIPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 0224cf5a5533..e267044761c6 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -307,4 +307,4 @@ CPU_BE( orr	syndrome, diff, has_nul )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPROC(strncmp)
+ENDPIPROC(strncmp)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 773d37a14039..57f57fde5722 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,3 +4,6 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
+
+obj-$(CONFIG_KASAN)		+= kasan_init.o
+KASAN_SANITIZE_kasan_init.o	:= n
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5df4a0f..cfa44a6adc0a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
 	b.lo	1b
 	dsb	sy
 	ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
 
 /*
  *	__inval_cache_range(start, end)
@@ -131,7 +131,7 @@ __dma_inv_range:
 	b.lo	2b
 	dsb	sy
 	ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
 ENDPROC(__dma_inv_range)
 
 /*
@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
 	b.lo	1b
 	dsb	sy
 	ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__dma_inv_range
 	b	__dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
 
 /*
  *	__dma_unmap_area(start, size, dir)
@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__dma_inv_range
 	ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d70ff14dbdbd..f636a2639f03 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,135 +17,185 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/init.h>
+#include <linux/bitops.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
 
+#include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
-#include <asm/cachetype.h>
 
-#define asid_bits(reg) \
-	(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+static u32 asid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-#define ASID_FIRST_VERSION	(1 << MAX_ASID_BITS)
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
 
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
-	mm->context.id = 0;
-	raw_spin_lock_init(&mm->context.id_lock);
-}
+#define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION	(1UL << asid_bits)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
 
-static void flush_context(void)
+static void flush_context(unsigned int cpu)
 {
-	/* set the reserved TTBR0 before flushing the TLB */
-	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
-	if (icache_is_aivivt())
-		__flush_icache_all();
-}
+	int i;
+	u64 asid;
 
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
-{
-	unsigned long flags;
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 
 	/*
-	 * Locking needed for multi-threaded applications where the same
-	 * mm->context.id could be set from different CPUs during the
-	 * broadcast. This function is also called via IPI so the
-	 * mm->context.id_lock has to be IRQ-safe.
+	 * Ensure the generation bump is observed before we xchg the
+	 * active_asids.
 	 */
-	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-	if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+	smp_wmb();
+
+	for_each_possible_cpu(i) {
+		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
 		/*
-		 * Old version of ASID found. Set the new one and reset
-		 * mm_cpumask(mm).
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
 		 */
-		mm->context.id = asid;
-		cpumask_clear(mm_cpumask(mm));
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
+		per_cpu(reserved_asids, i) = asid;
 	}
-	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
-	/*
-	 * Set the mm_cpumask(mm) bit for the current CPU.
-	 */
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+	/* Queue a TLB invalidate and flush the I-cache if necessary. */
+	cpumask_setall(&tlb_flush_pending);
+
+	if (icache_is_aivivt())
+		__flush_icache_all();
 }
 
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+static int is_reserved_asid(u64 asid)
+{
+	int cpu;
+	for_each_possible_cpu(cpu)
+		if (per_cpu(reserved_asids, cpu) == asid)
+			return 1;
+	return 0;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned int asid;
-	unsigned int cpu = smp_processor_id();
-	struct mm_struct *mm = current->active_mm;
+	static u32 cur_idx = 1;
+	u64 asid = atomic64_read(&mm->context.id);
+	u64 generation = atomic64_read(&asid_generation);
+
+	if (asid != 0) {
+		/*
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
+		 */
+		if (is_reserved_asid(asid))
+			return generation | (asid & ~ASID_MASK);
+
+		/*
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.
+		 */
+		asid &= ~ASID_MASK;
+		if (!__test_and_set_bit(asid, asid_map))
+			goto bump_gen;
+	}
 
 	/*
-	 * current->active_mm could be init_mm for the idle thread immediately
-	 * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
-	 * the reserved value, so no need to reset any context.
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.
+	 * We always count from ASID #1, as we use ASID #0 when setting a
+	 * reserved TTBR0 for the init_mm.
 	 */
-	if (mm == &init_mm)
-		return;
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+	if (asid != NUM_USER_ASIDS)
+		goto set_asid;
 
-	smp_rmb();
-	asid = cpu_last_asid + cpu;
+	/* We're out of ASIDs, so increment the global generation count */
+	generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+						 &asid_generation);
+	flush_context(cpu);
 
-	flush_context();
-	set_mm_context(mm, asid);
+	/* We have at least 1 ASID per CPU, so this will always succeed */
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
-	/* set the new ASID */
-	cpu_switch_mm(mm->pgd, mm);
+set_asid:
+	__set_bit(asid, asid_map);
+	cur_idx = asid;
+
+bump_gen:
+	asid |= generation;
+	return asid;
 }
 
-void __new_context(struct mm_struct *mm)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned int asid;
-	unsigned int bits = asid_bits();
+	unsigned long flags;
+	u64 asid;
+
+	asid = atomic64_read(&mm->context.id);
 
-	raw_spin_lock(&cpu_asid_lock);
 	/*
-	 * Check the ASID again, in case the change was broadcast from another
-	 * CPU before we acquired the lock.
+	 * The memory ordering here is subtle. We rely on the control
+	 * dependency between the generation read and the update of
+	 * active_asids to ensure that we are synchronised with a
+	 * parallel rollover (i.e. this pairs with the smp_wmb() in
+	 * flush_context).
 	 */
-	if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		raw_spin_unlock(&cpu_asid_lock);
-		return;
+	if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+	    && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+		goto switch_mm_fastpath;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	asid = atomic64_read(&mm->context.id);
+	if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+		asid = new_context(mm, cpu);
+		atomic64_set(&mm->context.id, asid);
 	}
-	/*
-	 * At this point, it is guaranteed that the current mm (with an old
-	 * ASID) isn't active on any other CPU since the ASIDs are changed
-	 * simultaneously via IPI.
-	 */
-	asid = ++cpu_last_asid;
 
-	/*
-	 * If we've used up all our ASIDs, we need to start a new version and
-	 * flush the TLB.
-	 */
-	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
-		/* increment the ASID version */
-		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
-		if (cpu_last_asid == 0)
-			cpu_last_asid = ASID_FIRST_VERSION;
-		asid = cpu_last_asid + smp_processor_id();
-		flush_context();
-		smp_wmb();
-		smp_call_function(reset_context, NULL, 1);
-		cpu_last_asid += NR_CPUS - 1;
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+		local_flush_tlb_all();
+
+	atomic64_set(&per_cpu(active_asids, cpu), asid);
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+	cpu_switch_mm(mm->pgd, mm);
+}
+
+static int asids_init(void)
+{
+	int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+	switch (fld) {
+	default:
+		pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+		/* Fallthrough */
+	case 0:
+		asid_bits = 8;
+		break;
+	case 2:
+		asid_bits = 16;
 	}
 
-	set_mm_context(mm, asid);
-	raw_spin_unlock(&cpu_asid_lock);
+	/* If we end up with more CPUs than ASIDs, expect things to crash */
+	WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+			   GFP_KERNEL);
+	if (!asid_map)
+		panic("Failed to allocate bitmap for %lu ASIDs\n",
+		      NUM_USER_ASIDS);
+
+	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+	return 0;
 }
+early_initcall(asids_init);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 99224dcebdc5..6320361d8d4c 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -533,3 +533,460 @@ static int __init dma_debug_do_init(void)
 	return 0;
 }
 fs_initcall(dma_debug_do_init);
+
+
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/dma-iommu.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+
+/* Thankfully, all cache ops are by VA so we can ignore phys here */
+static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
+{
+	__dma_flush_range(virt, virt + PAGE_SIZE);
+}
+
+static void *__iommu_alloc_attrs(struct device *dev, size_t size,
+				 dma_addr_t *handle, gfp_t gfp,
+				 struct dma_attrs *attrs)
+{
+	bool coherent = is_device_dma_coherent(dev);
+	int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+	void *addr;
+
+	if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
+		return NULL;
+	/*
+	 * Some drivers rely on this, and we probably don't want the
+	 * possibility of stale kernel data being read by devices anyway.
+	 */
+	gfp |= __GFP_ZERO;
+
+	if (gfp & __GFP_WAIT) {
+		struct page **pages;
+		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+
+		pages = iommu_dma_alloc(dev, size, gfp, ioprot,	handle,
+					flush_page);
+		if (!pages)
+			return NULL;
+
+		addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+					      __builtin_return_address(0));
+		if (!addr)
+			iommu_dma_free(dev, pages, size, handle);
+	} else {
+		struct page *page;
+		/*
+		 * In atomic context we can't remap anything, so we'll only
+		 * get the virtually contiguous buffer we need by way of a
+		 * physically contiguous allocation.
+		 */
+		if (coherent) {
+			page = alloc_pages(gfp, get_order(size));
+			addr = page ? page_address(page) : NULL;
+		} else {
+			addr = __alloc_from_pool(size, &page, gfp);
+		}
+		if (!addr)
+			return NULL;
+
+		*handle = iommu_dma_map_page(dev, page, 0, size, ioprot);
+		if (iommu_dma_mapping_error(dev, *handle)) {
+			if (coherent)
+				__free_pages(page, get_order(size));
+			else
+				__free_from_pool(addr, size);
+			addr = NULL;
+		}
+	}
+	return addr;
+}
+
+static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+			       dma_addr_t handle, struct dma_attrs *attrs)
+{
+	/*
+	 * @cpu_addr will be one of 3 things depending on how it was allocated:
+	 * - A remapped array of pages from iommu_dma_alloc(), for all
+	 *   non-atomic allocations.
+	 * - A non-cacheable alias from the atomic pool, for atomic
+	 *   allocations by non-coherent devices.
+	 * - A normal lowmem address, for atomic allocations by
+	 *   coherent devices.
+	 * Hence how dodgy the below logic looks...
+	 */
+	if (__in_atomic_pool(cpu_addr, size)) {
+		iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+		__free_from_pool(cpu_addr, size);
+	} else if (is_vmalloc_addr(cpu_addr)){
+		struct vm_struct *area = find_vm_area(cpu_addr);
+
+		if (WARN_ON(!area || !area->pages))
+			return;
+		iommu_dma_free(dev, area->pages, size, &handle);
+		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+	} else {
+		iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+		__free_pages(virt_to_page(cpu_addr), get_order(size));
+	}
+}
+
+static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			      struct dma_attrs *attrs)
+{
+	struct vm_struct *area;
+	int ret;
+
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+					     is_device_dma_coherent(dev));
+
+	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	area = find_vm_area(cpu_addr);
+	if (WARN_ON(!area || !area->pages))
+		return -ENXIO;
+
+	return iommu_dma_mmap(area->pages, size, vma);
+}
+
+static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+			       void *cpu_addr, dma_addr_t dma_addr,
+			       size_t size, struct dma_attrs *attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	struct vm_struct *area = find_vm_area(cpu_addr);
+
+	if (WARN_ON(!area || !area->pages))
+		return -ENXIO;
+
+	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
+					 GFP_KERNEL);
+}
+
+static void __iommu_sync_single_for_cpu(struct device *dev,
+					dma_addr_t dev_addr, size_t size,
+					enum dma_data_direction dir)
+{
+	phys_addr_t phys;
+
+	if (is_device_dma_coherent(dev))
+		return;
+
+	phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+	__dma_unmap_area(phys_to_virt(phys), size, dir);
+}
+
+static void __iommu_sync_single_for_device(struct device *dev,
+					   dma_addr_t dev_addr, size_t size,
+					   enum dma_data_direction dir)
+{
+	phys_addr_t phys;
+
+	if (is_device_dma_coherent(dev))
+		return;
+
+	phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+	__dma_map_area(phys_to_virt(phys), size, dir);
+}
+
+static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
+				   unsigned long offset, size_t size,
+				   enum dma_data_direction dir,
+				   struct dma_attrs *attrs)
+{
+	bool coherent = is_device_dma_coherent(dev);
+	int prot = dma_direction_to_prot(dir, coherent);
+	dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
+
+	if (!iommu_dma_mapping_error(dev, dev_addr) &&
+	    !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		__iommu_sync_single_for_device(dev, dev_addr, size, dir);
+
+	return dev_addr;
+}
+
+static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
+			       size_t size, enum dma_data_direction dir,
+			       struct dma_attrs *attrs)
+{
+	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		__iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
+
+	iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static void __iommu_sync_sg_for_cpu(struct device *dev,
+				    struct scatterlist *sgl, int nelems,
+				    enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (is_device_dma_coherent(dev))
+		return;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(sg_virt(sg), sg->length, dir);
+}
+
+static void __iommu_sync_sg_for_device(struct device *dev,
+				       struct scatterlist *sgl, int nelems,
+				       enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (is_device_dma_coherent(dev))
+		return;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_map_area(sg_virt(sg), sg->length, dir);
+}
+
+static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+				int nelems, enum dma_data_direction dir,
+				struct dma_attrs *attrs)
+{
+	bool coherent = is_device_dma_coherent(dev);
+
+	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
+
+	return iommu_dma_map_sg(dev, sgl, nelems,
+			dma_direction_to_prot(dir, coherent));
+}
+
+static void __iommu_unmap_sg_attrs(struct device *dev,
+				   struct scatterlist *sgl, int nelems,
+				   enum dma_data_direction dir,
+				   struct dma_attrs *attrs)
+{
+	if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+		__iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
+
+	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
+}
+
+static struct dma_map_ops iommu_dma_ops = {
+	.alloc = __iommu_alloc_attrs,
+	.free = __iommu_free_attrs,
+	.mmap = __iommu_mmap_attrs,
+	.get_sgtable = __iommu_get_sgtable,
+	.map_page = __iommu_map_page,
+	.unmap_page = __iommu_unmap_page,
+	.map_sg = __iommu_map_sg_attrs,
+	.unmap_sg = __iommu_unmap_sg_attrs,
+	.sync_single_for_cpu = __iommu_sync_single_for_cpu,
+	.sync_single_for_device = __iommu_sync_single_for_device,
+	.sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
+	.sync_sg_for_device = __iommu_sync_sg_for_device,
+	.dma_supported = iommu_dma_supported,
+	.mapping_error = iommu_dma_mapping_error,
+};
+
+/*
+ * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
+ * everything it needs to - the device is only partially created and the
+ * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
+ * need this delayed attachment dance. Once IOMMU probe ordering is sorted
+ * to move the arch_setup_dma_ops() call later, all the notifier bits below
+ * become unnecessary, and will go away.
+ */
+struct iommu_dma_notifier_data {
+	struct list_head list;
+	struct device *dev;
+	const struct iommu_ops *ops;
+	u64 dma_base;
+	u64 size;
+};
+static LIST_HEAD(iommu_dma_masters);
+static DEFINE_MUTEX(iommu_dma_notifier_lock);
+
+/*
+ * Temporarily "borrow" a domain feature flag to to tell if we had to resort
+ * to creating our own domain here, in case we need to clean it up again.
+ */
+#define __IOMMU_DOMAIN_FAKE_DEFAULT		(1U << 31)
+
+static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+			   u64 dma_base, u64 size)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+	/*
+	 * Best case: The device is either part of a group which was
+	 * already attached to a domain in a previous call, or it's
+	 * been put in a default DMA domain by the IOMMU core.
+	 */
+	if (!domain) {
+		/*
+		 * Urgh. The IOMMU core isn't going to do default domains
+		 * for non-PCI devices anyway, until it has some means of
+		 * abstracting the entirely implementation-specific
+		 * sideband data/SoC topology/unicorn dust that may or
+		 * may not differentiate upstream masters.
+		 * So until then, HORRIBLE HACKS!
+		 */
+		domain = ops->domain_alloc(IOMMU_DOMAIN_DMA);
+		if (!domain)
+			goto out_no_domain;
+
+		domain->ops = ops;
+		domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT;
+
+		if (iommu_attach_device(domain, dev))
+			goto out_put_domain;
+	}
+
+	if (iommu_dma_init_domain(domain, dma_base, size))
+		goto out_detach;
+
+	dev->archdata.dma_ops = &iommu_dma_ops;
+	return true;
+
+out_detach:
+	iommu_detach_device(domain, dev);
+out_put_domain:
+	if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+		iommu_domain_free(domain);
+out_no_domain:
+	pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+		dev_name(dev));
+	return false;
+}
+
+static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+			      u64 dma_base, u64 size)
+{
+	struct iommu_dma_notifier_data *iommudata;
+
+	iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
+	if (!iommudata)
+		return;
+
+	iommudata->dev = dev;
+	iommudata->ops = ops;
+	iommudata->dma_base = dma_base;
+	iommudata->size = size;
+
+	mutex_lock(&iommu_dma_notifier_lock);
+	list_add(&iommudata->list, &iommu_dma_masters);
+	mutex_unlock(&iommu_dma_notifier_lock);
+}
+
+static int __iommu_attach_notifier(struct notifier_block *nb,
+				   unsigned long action, void *data)
+{
+	struct iommu_dma_notifier_data *master, *tmp;
+
+	if (action != BUS_NOTIFY_ADD_DEVICE)
+		return 0;
+
+	mutex_lock(&iommu_dma_notifier_lock);
+	list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
+		if (do_iommu_attach(master->dev, master->ops,
+				master->dma_base, master->size)) {
+			list_del(&master->list);
+			kfree(master);
+		}
+	}
+	mutex_unlock(&iommu_dma_notifier_lock);
+	return 0;
+}
+
+static int register_iommu_dma_ops_notifier(struct bus_type *bus)
+{
+	struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
+	int ret;
+
+	if (!nb)
+		return -ENOMEM;
+	/*
+	 * The device must be attached to a domain before the driver probe
+	 * routine gets a chance to start allocating DMA buffers. However,
+	 * the IOMMU driver also needs a chance to configure the iommu_group
+	 * via its add_device callback first, so we need to make the attach
+	 * happen between those two points. Since the IOMMU core uses a bus
+	 * notifier with default priority for add_device, do the same but
+	 * with a lower priority to ensure the appropriate ordering.
+	 */
+	nb->notifier_call = __iommu_attach_notifier;
+	nb->priority = -100;
+
+	ret = bus_register_notifier(bus, nb);
+	if (ret) {
+		pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
+			bus->name);
+		kfree(nb);
+	}
+	return ret;
+}
+
+static int __init __iommu_dma_init(void)
+{
+	int ret;
+
+	ret = iommu_dma_init();
+	if (!ret)
+		ret = register_iommu_dma_ops_notifier(&platform_bus_type);
+	if (!ret)
+		ret = register_iommu_dma_ops_notifier(&amba_bustype);
+	return ret;
+}
+arch_initcall(__iommu_dma_init);
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				  const struct iommu_ops *ops)
+{
+	struct iommu_group *group;
+
+	if (!ops)
+		return;
+	/*
+	 * TODO: As a concession to the future, we're ready to handle being
+	 * called both early and late (i.e. after bus_add_device). Once all
+	 * the platform bus code is reworked to call us late and the notifier
+	 * junk above goes away, move the body of do_iommu_attach here.
+	 */
+	group = iommu_group_get(dev);
+	if (group) {
+		do_iommu_attach(dev, ops, dma_base, size);
+		iommu_group_put(group);
+	} else {
+		queue_iommu_attach(dev, ops, dma_base, size);
+	}
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+	if (domain) {
+		iommu_detach_device(domain, dev);
+		if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+			iommu_domain_free(domain);
+	}
+
+	dev->archdata.dma_ops = NULL;
+}
+
+#else
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				  struct iommu_ops *iommu)
+{ }
+
+#endif  /* CONFIG_IOMMU_DMA */
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			struct iommu_ops *iommu, bool coherent)
+{
+	if (!acpi_disabled && !dev->archdata.dma_ops)
+		dev->archdata.dma_ops = dma_ops;
+
+	dev->archdata.dma_coherent = coherent;
+	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
+}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f3d6221cd5bd..5a22a119a74c 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
 	{ -1,			NULL },
 };
 
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
 struct pg_state {
 	struct seq_file *seq;
 	const struct addr_marker *marker;
@@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = {
 		.set	= "NG",
 		.clear	= "  ",
 	}, {
+		.mask	= PTE_CONT,
+		.val	= PTE_CONT,
+		.set	= "CON",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_TABLE_BIT,
+		.val	= PTE_TABLE_BIT,
+		.set	= "   ",
+		.clear	= "BLK",
+	}, {
 		.mask	= PTE_UXN,
 		.val	= PTE_UXN,
 		.set	= "UXN",
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 		unsigned long delta;
 
 		if (st->current_prot) {
-			seq_printf(st->seq, "0x%16lx-0x%16lx   ",
+			seq_printf(st->seq, "0x%016lx-0x%016lx   ",
 				   st->start_address, addr);
 
 			delta = (addr - st->start_address) >> 10;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index aba9ead1384c..19211c4a8911 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -287,6 +287,7 @@ retry:
 			 * starvation.
 			 */
 			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			mm_flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
@@ -555,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 }
 
 #ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void)
+void cpu_enable_pan(void *__unused)
 {
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f5c0680d17d9..17bf39ac83ba 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	memset(zone_size, 0, sizeof(zone_size));
 
 	/* 4GB maximum for 32-bit only capable devices */
-	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-		max_dma = PFN_DOWN(arm64_dma_phys_limit);
-		zone_size[ZONE_DMA] = max_dma - min;
-	}
+#ifdef CONFIG_ZONE_DMA
+	max_dma = PFN_DOWN(arm64_dma_phys_limit);
+	zone_size[ZONE_DMA] = max_dma - min;
+#endif
 	zone_size[ZONE_NORMAL] = max - max_dma;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 		if (start >= max)
 			continue;
 
-		if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+		if (start < max_dma) {
 			unsigned long dma_end = min(end, max_dma);
 			zhole_size[ZONE_DMA] -= dma_end - start;
 		}
-
+#endif
 		if (end > max_dma) {
 			unsigned long normal_end = min(end, max);
 			unsigned long normal_start = max(start, max_dma);
@@ -298,6 +299,9 @@ void __init mem_init(void)
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
 	pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+#endif
 		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
@@ -310,6 +314,9 @@ void __init mem_init(void)
 		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
 		  MLG(VMALLOC_START, VMALLOC_END),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG((unsigned long)vmemmap,
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
new file mode 100644
index 000000000000..cf038c7d9fa9
--- /dev/null
+++ b/arch/arm64/mm/kasan_init.c
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+					unsigned long end)
+{
+	pte_t *pte;
+	unsigned long next;
+
+	if (pmd_none(*pmd))
+		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		next = addr + PAGE_SIZE;
+		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+					PAGE_KERNEL));
+	} while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+					unsigned long addr,
+					unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	if (pud_none(*pud))
+		pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		kasan_early_pte_populate(pmd, addr, next);
+	} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+					unsigned long addr,
+					unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	if (pgd_none(*pgd))
+		pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		kasan_early_pmd_populate(pud, addr, next);
+	} while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+	unsigned long addr = KASAN_SHADOW_START;
+	unsigned long end = KASAN_SHADOW_END;
+	unsigned long next;
+	pgd_t *pgd;
+
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_early_pud_populate(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+	kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+			unsigned long end)
+{
+	/*
+	 * Remove references to kasan page tables from
+	 * swapper_pg_dir. pgd_clear() can't be used
+	 * here because it's nop on 2,3-level pagetable setups
+	 */
+	for (; start < end; start += PGDIR_SIZE)
+		set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+	asm(
+	"	msr	ttbr1_el1, %0\n"
+	"	isb"
+	:
+	: "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+	struct memblock_region *reg;
+
+	/*
+	 * We are going to perform proper setup of shadow memory.
+	 * At first we should unmap early shadow (clear_pgds() call bellow).
+	 * However, instrumented code couldn't execute without shadow memory.
+	 * tmp_pg_dir used to keep early shadow mapped until full shadow
+	 * setup will be finished.
+	 */
+	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+	cpu_set_ttbr1(__pa(tmp_pg_dir));
+	flush_tlb_all();
+
+	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+			kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+	for_each_memblock(memory, reg) {
+		void *start = (void *)__phys_to_virt(reg->base);
+		void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+		if (start >= end)
+			break;
+
+		/*
+		 * end + 1 here is intentional. We check several shadow bytes in
+		 * advance to slightly speed up fastpath. In some rare cases
+		 * we could cross boundary of mapped shadow, so we just map
+		 * some more here.
+		 */
+		vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+				(unsigned long)kasan_mem_to_shadow(end) + 1,
+				pfn_to_nid(virt_to_pfn(start)));
+	}
+
+	memset(kasan_zero_page, 0, PAGE_SIZE);
+	cpu_set_ttbr1(__pa(swapper_pg_dir));
+	flush_tlb_all();
+
+	/* At this point kasan is fully initialized. Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KernelAddressSanitizer initialized\n");
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9211b8527f25..c2fa6b56613c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -32,6 +32,7 @@
 
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 	do {
 		/*
 		 * Need to have the least restrictive permissions available
-		 * permissions will be fixed up later
+		 * permissions will be fixed up later. Default the new page
+		 * range as contiguous ptes.
 		 */
-		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
 		pfn++;
 	} while (pte++, i++, i < PTRS_PER_PTE);
 }
 
+/*
+ * Given a PTE with the CONT bit set, determine where the CONT range
+ * starts, and clear the entire range of PTE CONT bits.
+ */
+static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
+{
+	int i;
+
+	pte -= CONT_RANGE_OFFSET(addr);
+	for (i = 0; i < CONT_PTES; i++) {
+		set_pte(pte, pte_mknoncont(*pte));
+		pte++;
+	}
+	flush_tlb_all();
+}
+
+/*
+ * Given a range of PTEs set the pfn and provided page protection flags
+ */
+static void __populate_init_pte(pte_t *pte, unsigned long addr,
+				unsigned long end, phys_addr_t phys,
+				pgprot_t prot)
+{
+	unsigned long pfn = __phys_to_pfn(phys);
+
+	do {
+		/* clear all the bits except the pfn, then apply the prot */
+		set_pte(pte, pfn_pte(pfn, prot));
+		pte++;
+		pfn++;
+		addr += PAGE_SIZE;
+	} while (addr != end);
+}
+
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, unsigned long pfn,
+				  unsigned long end, phys_addr_t phys,
 				  pgprot_t prot,
 				  void *(*alloc)(unsigned long size))
 {
 	pte_t *pte;
+	unsigned long next;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
 		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		set_pte(pte, pfn_pte(pfn, prot));
-		pfn++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+		next = min(end, (addr + CONT_SIZE) & CONT_MASK);
+		if (((addr | next | phys) & ~CONT_MASK) == 0) {
+			/* a block of CONT_PTES  */
+			__populate_init_pte(pte, addr, next, phys,
+					    prot | __pgprot(PTE_CONT));
+		} else {
+			/*
+			 * If the range being split is already inside of a
+			 * contiguous range but this PTE isn't going to be
+			 * contiguous, then we want to unmark the adjacent
+			 * ranges, then update the portion of the range we
+			 * are interrested in.
+			 */
+			 clear_cont_pte_range(pte, addr);
+			 __populate_init_pte(pte, addr, next, phys, prot);
+		}
+
+		pte += (next - addr) >> PAGE_SHIFT;
+		phys += next - addr;
+		addr = next;
+	} while (addr != end);
 }
 
 void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				}
 			}
 		} else {
-			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, alloc);
+			alloc_init_pte(pmd, addr, next, phys, prot, alloc);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
@@ -353,14 +407,11 @@ static void __init map_mem(void)
 	 * memory addressable from the initial direct kernel mapping.
 	 *
 	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
-	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
-	 * PHYS_OFFSET (which must be aligned to 2MB as per
-	 * Documentation/arm64/booting.txt).
+	 * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+	 * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+	 * per Documentation/arm64/booting.txt).
 	 */
-	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
-		limit = PHYS_OFFSET + PMD_SIZE;
-	else
-		limit = PHYS_OFFSET + PUD_SIZE;
+	limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
 	memblock_set_current_limit(limit);
 
 	/* map all the memory banks */
@@ -371,21 +422,24 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-#ifndef CONFIG_ARM64_64K_PAGES
-		/*
-		 * For the first memory bank align the start address and
-		 * current memblock limit to prevent create_mapping() from
-		 * allocating pte page tables from unmapped memory.
-		 * When 64K pages are enabled, the pte page table for the
-		 * first PGDIR_SIZE is already present in swapper_pg_dir.
-		 */
-		if (start < limit)
-			start = ALIGN(start, PMD_SIZE);
-		if (end < limit) {
-			limit = end & PMD_MASK;
-			memblock_set_current_limit(limit);
+		if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+			/*
+			 * For the first memory bank align the start address and
+			 * current memblock limit to prevent create_mapping() from
+			 * allocating pte page tables from unmapped memory. With
+			 * the section maps, if the first block doesn't end on section
+			 * size boundary, create_mapping() will try to allocate a pte
+			 * page, which may be returned from an unmapped area.
+			 * When section maps are not used, the pte page table for the
+			 * current limit is already present in swapper_pg_dir.
+			 */
+			if (start < limit)
+				start = ALIGN(start, SECTION_SIZE);
+			if (end < limit) {
+				limit = end & SECTION_MASK;
+				memblock_set_current_limit(limit);
+			}
 		}
-#endif
 		__map_memblock(start, end);
 	}
 
@@ -456,7 +510,7 @@ void __init paging_init(void)
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
+	local_flush_tlb_all();
 	cpu_set_default_tcr_t0sz();
 }
 
@@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
 	return pfn_valid(pte_pfn(*pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	return vmemmap_populate_basepages(start, end, node);
 }
-#else	/* !CONFIG_ARM64_64K_PAGES */
+#else	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	unsigned long addr = start;
@@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 {
 	const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
 	pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
-	int granularity, size, offset;
+	int size, offset;
 	void *dt_virt;
 
 	/*
@@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	 */
 	BUILD_BUG_ON(dt_virt_base % SZ_2M);
 
-	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
-		BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
-			     __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
-
-		granularity = PAGE_SIZE;
-	} else {
-		BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
-			     __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
-
-		granularity = PMD_SIZE;
-	}
+	BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+		     __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
 
-	offset = dt_phys % granularity;
+	offset = dt_phys % SWAPPER_BLOCK_SIZE;
 	dt_virt = (void *)dt_virt_base + offset;
 
 	/* map the first chunk so we can read the size from the header */
-	create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-		       granularity, prot);
+	create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+		       SWAPPER_BLOCK_SIZE, prot);
 
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
@@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	if (size > MAX_FDT_SIZE)
 		return NULL;
 
-	if (offset + size > granularity)
-		create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-			       round_up(offset + size, granularity), prot);
+	if (offset + size > SWAPPER_BLOCK_SIZE)
+		create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
 
 	memblock_reserve(dt_phys, size);
 
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index e47ed1c5dce1..3571c7309c5e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	int ret;
 	struct page_change_data data;
 
-	if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+	if (!PAGE_ALIGNED(addr)) {
 		start &= PAGE_MASK;
 		end = start + size;
 		WARN_ON_ONCE(1);
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 71ca104f97bd..cb3ba1b812e7 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -28,8 +28,6 @@
 
 #include "mm.h"
 
-#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
-
 static struct kmem_cache *pgd_cache;
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e4ee7bd8830a..cacecc4ad3e5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -30,7 +30,9 @@
 
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS	TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
 #define TCR_TG_FLAGS	TCR_TG0_4K | TCR_TG1_4K
 #endif
 
@@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume)
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-	mmid	w1, x1				// get mm->context.id
+	mmid	x1, x1				// get mm->context.id
 	bfi	x0, x1, #48, #16		// set the ASID
 	msr	ttbr0_el1, x0			// set TTBR0
 	isb
@@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm)
  *	value of the SCTLR_EL1 register.
  */
 ENTRY(__cpu_setup)
-	tlbi	vmalle1is			// invalidate I + D TLBs
-	dsb	ish
+	tlbi	vmalle1				// Invalidate local TLB
+	dsb	nsh
 
 	mov	x0, #3 << 20
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
@@ -163,12 +165,14 @@ ENTRY(__cpu_setup)
 	 *   DEVICE_GRE		010	00001100
 	 *   NORMAL_NC		011	01000100
 	 *   NORMAL		100	11111111
+	 *   NORMAL_WT		101	10111011
 	 */
 	ldr	x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
 		     MAIR(0x04, MT_DEVICE_nGnRE) | \
 		     MAIR(0x0c, MT_DEVICE_GRE) | \
 		     MAIR(0x44, MT_NORMAL_NC) | \
-		     MAIR(0xff, MT_NORMAL)
+		     MAIR(0xff, MT_NORMAL) | \
+		     MAIR(0xbb, MT_NORMAL_WT)
 	msr	mair_el1, x5
 	/*
 	 * Prepare SCTLR
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c047598b09e0..a44e5293c6f5 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -744,7 +744,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	prog->bpf_func = (void *)ctx.image;
-	prog->jited = true;
+	prog->jited = 1;
 out:
 	kfree(ctx.offset);
 }
diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c
index 91146b416cdb..99b0a7984950 100644
--- a/arch/avr32/boards/atngw100/mrmt.c
+++ b/arch/avr32/boards/atngw100/mrmt.c
@@ -21,7 +21,6 @@
 #include <linux/leds_pwm.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
-#include <linux/atmel_serial.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index f61f2dd67464..241b9b9729d8 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -20,4 +20,5 @@ generic-y += sections.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 97c9bdf83409..d74fd8ce980a 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -19,8 +19,8 @@
 
 #define ATOMIC_INIT(i)  { (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP_RETURN(op, asm_op, asm_con)				\
 static inline int __atomic_##op##_return(int i, atomic_t *v)		\
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 61cd1e786a14..91d49c0a3118 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -46,4 +46,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index f17c4dc6050c..945544ec603e 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -59,4 +59,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c
index ddcb45d7dfa7..43afc03e4125 100644
--- a/arch/c6x/platforms/megamod-pic.c
+++ b/arch/c6x/platforms/megamod-pic.c
@@ -178,7 +178,7 @@ static void __init set_megamod_mux(struct megamod_pic *pic, int src, int output)
 static void __init parse_priority_map(struct megamod_pic *pic,
 				      int *mapping, int size)
 {
-	struct device_node *np = pic->irqhost->of_node;
+	struct device_node *np = irq_domain_get_of_node(pic->irqhost);
 	const __be32 *map;
 	int i, maplen;
 	u32 val;
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 8da5653bd895..e086f9e93728 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -57,7 +57,6 @@ config CRIS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
-	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
 	select OLD_SIGSUSPEND
diff --git a/arch/cris/arch-v10/kernel/head.S b/arch/cris/arch-v10/kernel/head.S
index 4a146e1749c9..a4877a421756 100644
--- a/arch/cris/arch-v10/kernel/head.S
+++ b/arch/cris/arch-v10/kernel/head.S
@@ -354,63 +354,6 @@ no_command_line:
 	blo	1b
 	nop
 
-#ifdef CONFIG_BLK_DEV_ETRAXIDE
-	;; disable ATA before enabling it in genconfig below
-	moveq	0,$r0
-	move.d	$r0,[R_ATA_CTRL_DATA]
-	move.d	$r0,[R_ATA_TRANSFER_CNT]
-	move.d	$r0,[R_ATA_CONFIG]
-#if 0
-	move.d	R_PORT_G_DATA, $r1
-	move.d	$r0, [$r1]; assert ATA bus-reset
-	nop
-	nop
-	nop
-	nop
-	nop
-	nop
-	move.d	0x08000000,$r0
-	move.d	$r0,[$r1]
-#endif
-#endif
-
-#ifdef CONFIG_JULIETTE
-	;; configure external DMA channel 0 before enabling it in genconfig
-
-	moveq	0,$r0
-	move.d	$r0,[R_EXT_DMA_0_ADDR]
-	; cnt enable, word size, output, stop, size 0
-	move.d	  IO_STATE (R_EXT_DMA_0_CMD, cnt, enable)	\
-		| IO_STATE (R_EXT_DMA_0_CMD, rqpol, ahigh)	\
-		| IO_STATE (R_EXT_DMA_0_CMD, apol, ahigh)	\
-		| IO_STATE (R_EXT_DMA_0_CMD, rq_ack, burst)	\
-		| IO_STATE (R_EXT_DMA_0_CMD, wid, word)		\
-		| IO_STATE (R_EXT_DMA_0_CMD, dir, output)	\
-		| IO_STATE (R_EXT_DMA_0_CMD, run, stop)		\
-		| IO_FIELD (R_EXT_DMA_0_CMD, trf_count, 0),$r0
-	move.d	$r0,[R_EXT_DMA_0_CMD]
-
-	;; reset dma4 and wait for completion
-
-	moveq	IO_STATE (R_DMA_CH4_CMD, cmd, reset),$r0
-	move.b	$r0,[R_DMA_CH4_CMD]
-1:	move.b	[R_DMA_CH4_CMD],$r0
-	and.b	IO_MASK (R_DMA_CH4_CMD, cmd),$r0
-	cmp.b	IO_STATE (R_DMA_CH4_CMD, cmd, reset),$r0
-	beq	1b
-	nop
-
-	;; reset dma5 and wait for completion
-
-	moveq	IO_STATE (R_DMA_CH5_CMD, cmd, reset),$r0
-	move.b	$r0,[R_DMA_CH5_CMD]
-1:	move.b	[R_DMA_CH5_CMD],$r0
-	and.b	IO_MASK (R_DMA_CH5_CMD, cmd),$r0
-	cmp.b	IO_STATE (R_DMA_CH5_CMD, cmd, reset),$r0
-	beq	1b
-	nop
-#endif
-
 	;; Etrax product HW genconfig setup
 
 	moveq	0,$r0
@@ -447,21 +390,6 @@ no_command_line:
 		| IO_STATE (R_GEN_CONFIG, dma9, usb),$r0
 
 
-#if defined(CONFIG_ETRAX_DEF_R_PORT_G0_DIR_OUT)
-        or.d      IO_STATE (R_GEN_CONFIG, g0dir, out),$r0
-#endif
-
-#if defined(CONFIG_ETRAX_DEF_R_PORT_G8_15_DIR_OUT)
-        or.d      IO_STATE (R_GEN_CONFIG, g8_15dir, out),$r0
-#endif
-#if defined(CONFIG_ETRAX_DEF_R_PORT_G16_23_DIR_OUT)
-       or.d      IO_STATE (R_GEN_CONFIG, g16_23dir, out),$r0
-#endif
-
-#if defined(CONFIG_ETRAX_DEF_R_PORT_G24_DIR_OUT)
-       or.d      IO_STATE (R_GEN_CONFIG, g24dir, out),$r0
-#endif
-
 	move.d	$r0,[genconfig_shadow] ; init a shadow register of R_GEN_CONFIG
 
 	move.d	$r0,[R_GEN_CONFIG]
@@ -500,19 +428,9 @@ no_command_line:
 	;; including their shadow registers
 
 	move.b	CONFIG_ETRAX_DEF_R_PORT_PA_DIR,$r0
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PA7)
-	or.b	IO_STATE (R_PORT_PA_DIR, dir7, output),$r0
-#endif
 	move.b	$r0,[port_pa_dir_shadow]
 	move.b	$r0,[R_PORT_PA_DIR]
 	move.b	CONFIG_ETRAX_DEF_R_PORT_PA_DATA,$r0
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PA7)
-#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH)
-	and.b	~(1 << 7),$r0
-#else
-	or.b	(1 << 7),$r0
-#endif
-#endif
 	move.b	$r0,[port_pa_data_shadow]
 	move.b	$r0,[R_PORT_PA_DATA]
 
@@ -520,19 +438,9 @@ no_command_line:
 	move.b	$r0,[port_pb_config_shadow]
 	move.b	$r0,[R_PORT_PB_CONFIG]
 	move.b	CONFIG_ETRAX_DEF_R_PORT_PB_DIR,$r0
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PB5)
-	or.b	IO_STATE (R_PORT_PB_DIR, dir5, output),$r0
-#endif
 	move.b	$r0,[port_pb_dir_shadow]
 	move.b	$r0,[R_PORT_PB_DIR]
 	move.b	CONFIG_ETRAX_DEF_R_PORT_PB_DATA,$r0
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PB5)
-#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH)
-	and.b	~(1 << 5),$r0
-#else
-	or.b	(1 << 5),$r0
-#endif
-#endif
 	move.b	$r0,[port_pb_data_shadow]
 	move.b	$r0,[R_PORT_PB_DATA]
 
@@ -541,20 +449,6 @@ no_command_line:
 	move.d  $r0, [R_PORT_PB_I2C]
 
 	moveq	0,$r0
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_G10)
-#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH)
-	and.d	~(1 << 10),$r0
-#else
-	or.d	(1 << 10),$r0
-#endif
-#endif
-#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_G11)
-#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH)
-	and.d	~(1 << 11),$r0
-#else
-	or.d	(1 << 11),$r0
-#endif
-#endif
 	move.d	$r0,[port_g_data_shadow]
 	move.d	$r0,[R_PORT_G_DATA]
 
diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c
index 22d846bfc570..ed71ade93a73 100644
--- a/arch/cris/arch-v10/kernel/kgdb.c
+++ b/arch/cris/arch-v10/kernel/kgdb.c
@@ -275,7 +275,7 @@ static char remcomOutBuffer[BUFMAX];
 /* Error and warning messages. */
 enum error_type
 {
-	SUCCESS, E01, E02, E03, E04, E05, E06, E07
+	SUCCESS, E01, E02, E03, E04, E05, E06, E07, E08
 };
 static char *error_message[] =
 {
@@ -286,7 +286,8 @@ static char *error_message[] =
 	"E04 The command is not supported - [s,C,S,!,R,d,r] - internal error.",
 	"E05 Change register content - P - the register is not implemented..",
 	"E06 Change memory content - M - internal error.",
-	"E07 Change register content - P - the register is not stored on the stack"
+	"E07 Change register content - P - the register is not stored on the stack",
+	"E08 Invalid parameter"
 };
 /********************************* Register image ****************************/
 /* Use the order of registers as defined in "AXIS ETRAX CRIS Programmer's
@@ -351,7 +352,7 @@ char internal_stack[INTERNAL_STACK_SIZE];
    breakpoint to be handled. A static breakpoint uses the content of register
    BRP as it is whereas a dynamic breakpoint requires subtraction with 2
    in order to execute the instruction. The first breakpoint is static. */
-static unsigned char is_dyn_brkp = 0;
+static unsigned char __used is_dyn_brkp;
 
 /********************************* String library ****************************/
 /* Single-step over library functions creates trap loops. */
@@ -413,18 +414,6 @@ gdb_cris_strtol (const char *s, char **endptr, int base)
 }
 
 /********************************** Packet I/O ******************************/
-/* Returns the integer equivalent of a hexadecimal character. */
-static int
-hex (char ch)
-{
-	if ((ch >= 'a') && (ch <= 'f'))
-		return (ch - 'a' + 10);
-	if ((ch >= '0') && (ch <= '9'))
-		return (ch - '0');
-	if ((ch >= 'A') && (ch <= 'F'))
-		return (ch - 'A' + 10);
-	return (-1);
-}
 
 /* Convert the memory, pointed to by mem into hexadecimal representation.
    Put the result in buf, and return a pointer to the last character
@@ -455,22 +444,6 @@ mem2hex(char *buf, unsigned char *mem, int count)
 	return (buf);
 }
 
-/* Convert the array, in hexadecimal representation, pointed to by buf into
-   binary representation. Put the result in mem, and return a pointer to
-   the character after the last byte written. */
-static unsigned char*
-hex2mem (unsigned char *mem, char *buf, int count)
-{
-	int i;
-	unsigned char ch;
-	for (i = 0; i < count; i++) {
-		ch = hex (*buf++) << 4;
-		ch = ch + hex (*buf++);
-		*mem++ = ch;
-	}
-	return (mem);
-}
-
 /* Put the content of the array, in binary representation, pointed to by buf
    into memory pointed to by mem, and return a pointer to the character after
    the last byte written.
@@ -524,8 +497,8 @@ getpacket (char *buffer)
 		buffer[count] = '\0';
 		
 		if (ch == '#') {
-			xmitcsum = hex (getDebugChar ()) << 4;
-			xmitcsum += hex (getDebugChar ());
+			xmitcsum = hex_to_bin(getDebugChar()) << 4;
+			xmitcsum += hex_to_bin(getDebugChar());
 			if (checksum != xmitcsum) {
 				/* Wrong checksum */
 				putDebugChar ('-');
@@ -599,7 +572,7 @@ putDebugString (const unsigned char *str, int length)
 
 /********************************* Register image ****************************/
 /* Write a value to a specified register in the register image of the current
-   thread. Returns status code SUCCESS, E02 or E05. */
+   thread. Returns status code SUCCESS, E02, E05 or E08. */
 static int
 write_register (int regno, char *val)
 {
@@ -608,8 +581,9 @@ write_register (int regno, char *val)
 
         if (regno >= R0 && regno <= PC) {
 		/* 32-bit register with simple offset. */
-		hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int),
-			 val, sizeof(unsigned int));
+		if (hex2bin((unsigned char *)current_reg + regno * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
 	}
         else if (regno == P0 || regno == VR || regno == P4 || regno == P8) {
 		/* Do not support read-only registers. */
@@ -618,13 +592,15 @@ write_register (int regno, char *val)
         else if (regno == CCR) {
 		/* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, 
                    and P7 (MOF) is 32 bits in ETRAX 100LX. */
-		hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short),
-			 val, sizeof(unsigned short));
+		if (hex2bin((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short),
+			    val, sizeof(unsigned short)))
+			status = E08;
 	}
 	else if (regno >= MOF && regno <= USP) {
 		/* 32 bit register with complex offset.  (P8 has been taken care of.) */
-		hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int),
-			 val, sizeof(unsigned int));
+		if (hex2bin((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
 	} 
         else {
 		/* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */
@@ -759,9 +735,11 @@ handle_exception (int sigval)
 				/* Write registers. GXX..XX
 				   Each byte of register data  is described by two hex digits.
 				   Success: OK
-				   Failure: void. */
-				hex2mem((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers));
-				gdb_cris_strcpy (remcomOutBuffer, "OK");
+				   Failure: E08. */
+				if (hex2bin((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers)))
+					gdb_cris_strcpy (remcomOutBuffer, error_message[E08]);
+				else
+					gdb_cris_strcpy (remcomOutBuffer, "OK");
 				break;
 				
 			case 'P':
@@ -771,7 +749,7 @@ handle_exception (int sigval)
 				   for each byte in the register (target byte order). P1f=11223344 means
 				   set register 31 to 44332211.
 				   Success: OK
-				   Failure: E02, E05 */
+				   Failure: E02, E05, E08 */
 				{
 					char *suffix;
 					int regno = gdb_cris_strtol (&remcomInBuffer[1], &suffix, 16);
@@ -791,6 +769,10 @@ handle_exception (int sigval)
 							/* Do not support non-existing registers on the stack. */
 							gdb_cris_strcpy (remcomOutBuffer, error_message[E07]);
 							break;
+						case E08:
+							/* Invalid parameter. */
+							gdb_cris_strcpy (remcomOutBuffer, error_message[E08]);
+							break;
 						default:
 							/* Valid register number. */
 							gdb_cris_strcpy (remcomOutBuffer, "OK");
@@ -826,7 +808,7 @@ handle_exception (int sigval)
 				   AA..AA is the start address,  LLLL is the number of bytes, and
 				   XX..XX is the hexadecimal data.
 				   Success: OK
-				   Failure: void. */
+				   Failure: E08. */
 				{
 					char *lenptr;
 					char *dataptr;
@@ -835,14 +817,15 @@ handle_exception (int sigval)
 					int length = gdb_cris_strtol(lenptr+1, &dataptr, 16);
 					if (*lenptr == ',' && *dataptr == ':') {
 						if (remcomInBuffer[0] == 'M') {
-							hex2mem(addr, dataptr + 1, length);
-						}
-						else /* X */ {
+							if (hex2bin(addr, dataptr + 1, length))
+								gdb_cris_strcpy (remcomOutBuffer, error_message[E08]);
+							else
+								gdb_cris_strcpy (remcomOutBuffer, "OK");
+						} else /* X */ {
 							bin2mem(addr, dataptr + 1, length);
+							gdb_cris_strcpy (remcomOutBuffer, "OK");
 						}
-						gdb_cris_strcpy (remcomOutBuffer, "OK");
-					}
-					else {
+					} else {
 						gdb_cris_strcpy (remcomOutBuffer, error_message[E06]);
 					}
 				}
@@ -970,7 +953,7 @@ asm ("\n"
 "  move     $ibr,[cris_reg+0x4E]  ; P9,\n"
 "  move     $irp,[cris_reg+0x52]  ; P10,\n"
 "  move     $srp,[cris_reg+0x56]  ; P11,\n"
-"  move     $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n"
+"  move     $bar,[cris_reg+0x5A]  ; P12,\n"
 "                            ; P13, register DCCR already saved\n"
 ";; Due to the old assembler-versions BRP might not be recognized\n"
 "  .word 0xE670              ; move brp,r0\n"
@@ -1063,7 +1046,7 @@ asm ("\n"
 "  move     $ibr,[cris_reg+0x4E]  ; P9,\n"
 "  move     $irp,[cris_reg+0x52]  ; P10,\n"
 "  move     $srp,[cris_reg+0x56]  ; P11,\n"
-"  move     $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n"
+"  move     $bar,[cris_reg+0x5A]  ; P12,\n"
 "                            ; P13, register DCCR already saved\n"
 ";; Due to the old assembler-versions BRP might not be recognized\n"
 "  .word 0xE670              ; move brp,r0\n"
diff --git a/arch/cris/arch-v10/mm/init.c b/arch/cris/arch-v10/mm/init.c
index e7f8066105aa..85e3f1b1f3ac 100644
--- a/arch/cris/arch-v10/mm/init.c
+++ b/arch/cris/arch-v10/mm/init.c
@@ -68,14 +68,10 @@ paging_init(void)
 
 	*R_MMU_KSEG = ( IO_STATE(R_MMU_KSEG, seg_f, seg  ) |  /* bootrom */
 			IO_STATE(R_MMU_KSEG, seg_e, page ) |
-			IO_STATE(R_MMU_KSEG, seg_d, page ) | 
-			IO_STATE(R_MMU_KSEG, seg_c, page ) |   
+			IO_STATE(R_MMU_KSEG, seg_d, page ) |
+			IO_STATE(R_MMU_KSEG, seg_c, page ) |
 			IO_STATE(R_MMU_KSEG, seg_b, seg  ) |  /* kernel reg area */
-#ifdef CONFIG_JULIETTE
-			IO_STATE(R_MMU_KSEG, seg_a, seg  ) |  /* ARTPEC etc. */
-#else
 			IO_STATE(R_MMU_KSEG, seg_a, page ) |
-#endif
 			IO_STATE(R_MMU_KSEG, seg_9, seg  ) |  /* LED's on some boards */
 			IO_STATE(R_MMU_KSEG, seg_8, seg  ) |  /* CSE0/1, flash and I/O */
 			IO_STATE(R_MMU_KSEG, seg_7, page ) |  /* kernel vmalloc area */
@@ -92,14 +88,10 @@ paging_init(void)
 			    IO_FIELD(R_MMU_KBASE_HI, base_d, 0x0 ) |
 			    IO_FIELD(R_MMU_KBASE_HI, base_c, 0x0 ) |
 			    IO_FIELD(R_MMU_KBASE_HI, base_b, 0xb ) |
-#ifdef CONFIG_JULIETTE
-			    IO_FIELD(R_MMU_KBASE_HI, base_a, 0xa ) |
-#else
 			    IO_FIELD(R_MMU_KBASE_HI, base_a, 0x0 ) |
-#endif
 			    IO_FIELD(R_MMU_KBASE_HI, base_9, 0x9 ) |
 			    IO_FIELD(R_MMU_KBASE_HI, base_8, 0x8 ) );
-	
+
 	*R_MMU_KBASE_LO = ( IO_FIELD(R_MMU_KBASE_LO, base_7, 0x0 ) |
 			    IO_FIELD(R_MMU_KBASE_LO, base_6, 0x4 ) |
 			    IO_FIELD(R_MMU_KBASE_LO, base_5, 0x0 ) |
diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig
index 21bbd93be34f..17dbe03af5f4 100644
--- a/arch/cris/arch-v32/Kconfig
+++ b/arch/cris/arch-v32/Kconfig
@@ -11,95 +11,6 @@ config ETRAX_DRAM_VIRTUAL_BASE
 	default "c0000000"
 
 choice
-	prompt "Nbr of Ethernet LED groups"
-	depends on ETRAX_ARCH_V32
-	default ETRAX_NBR_LED_GRP_ONE
-	help
-	  Select how many Ethernet LED groups that can be used. Usually one per Ethernet
-	  interface is a good choice.
-
-config	ETRAX_NBR_LED_GRP_ZERO
-	bool "Use zero LED groups"
-	help
-	  Select this if you do not want any Ethernet LEDs.
-
-config	ETRAX_NBR_LED_GRP_ONE
-	bool "Use one LED group"
-	help
-	  Select this if you want one Ethernet LED group. This LED group
-	  can be used for one or more Ethernet interfaces. However, it is
-	  recommended that each Ethernet interface use a dedicated LED group.
-
-config	ETRAX_NBR_LED_GRP_TWO
-	bool "Use two LED groups"
-	help
-	  Select this if you want two Ethernet LED groups. This is the
-	  best choice if you have more than one Ethernet interface and
-	  would like to have separate LEDs for the interfaces.
-
-endchoice
-
-config ETRAX_LED_G_NET0
-	string "Ethernet LED group 0 green LED bit"
-	depends on ETRAX_ARCH_V32 && (ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO)
-	default "PA3"
-	help
-	  Bit to use for the green LED in Ethernet LED group 0.
-
-config ETRAX_LED_R_NET0
-	string "Ethernet LED group 0 red LED bit"
-	depends on ETRAX_ARCH_V32 && (ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO)
-	default "PA4"
-	help
-	  Bit to use for the red LED in Ethernet LED group 0.
-
-config ETRAX_LED_G_NET1
-	string "Ethernet group 1 green LED bit"
-	depends on ETRAX_ARCH_V32 && ETRAX_NBR_LED_GRP_TWO
-	default ""
-	help
-	  Bit to use for the green LED in Ethernet LED group 1.
-
-config ETRAX_LED_R_NET1
-	string "Ethernet group 1 red LED bit"
-	depends on ETRAX_ARCH_V32 && ETRAX_NBR_LED_GRP_TWO
-	default ""
-	help
-	  Bit to use for the red LED in Ethernet LED group 1.
-
-config ETRAX_V32_LED2G
-	string "Second green LED bit"
-	depends on ETRAX_ARCH_V32
-	default "PA5"
-	help
-	  Bit to use for the first green LED (status LED).
-	  Most Axis products use bit A5 here.
-
-config ETRAX_V32_LED2R
-	string "Second red LED bit"
-	depends on ETRAX_ARCH_V32
-	default "PA6"
-	help
-	  Bit to use for the first red LED (network LED).
-	  Most Axis products use bit A6 here.
-
-config ETRAX_V32_LED3G
-	string "Third green LED bit"
-	depends on ETRAX_ARCH_V32
-	default "PA7"
-	help
-	  Bit to use for the first green LED (drive/power LED).
-	  Most Axis products use bit A7 here.
-
-config ETRAX_V32_LED3R
-	string "Third red LED bit"
-	depends on ETRAX_ARCH_V32
-	default "PA7"
-	help
-	  Bit to use for the first red LED (drive/power LED).
-	  Most Axis products use bit A7 here.
-
-choice
 	prompt "Kernel GDB port"
 	depends on ETRAX_KGDB
 	default ETRAX_KGDB_PORT0
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index e6c523cc40bc..2735eb7671a5 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -149,173 +149,6 @@ config ETRAX_NANDBOOT
 	  Say Y if your boot code, kernel and root file system is in
 	  NAND flash. Say N if they are in NOR flash.
 
-config ETRAX_I2C
-	bool "I2C driver"
-	depends on ETRAX_ARCH_V32
-	help
-	  This option enables the I2C driver used by e.g. the RTC driver.
-
-config ETRAX_V32_I2C_DATA_PORT
-	string "I2C data pin"
-	depends on ETRAX_I2C
-	help
-	  The pin to use for I2C data.
-
-config ETRAX_V32_I2C_CLK_PORT
-	string "I2C clock pin"
-	depends on ETRAX_I2C
-	help
-	  The pin to use for I2C clock.
-
-config ETRAX_GPIO
-	bool "GPIO support"
-	depends on ETRAX_ARCH_V32
-	---help---
-	  Enables the ETRAX general port device (major 120, minors 0-4).
-	  You can use this driver to access the general port bits. It supports
-	  these ioctl's:
-	  #include <linux/etraxgpio.h>
-	  fd = open("/dev/gpioa", O_RDWR); // or /dev/gpiob
-	  ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_SETBITS), bits_to_set);
-	  ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_CLRBITS), bits_to_clear);
-	  err = ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_READ_INBITS), &val);
-	  Remember that you need to setup the port directions appropriately in
-	  the General configuration.
-
-config ETRAX_VIRTUAL_GPIO
-	bool "Virtual GPIO support"
-	depends on ETRAX_GPIO
-	help
-	  Enables the virtual Etrax general port device (major 120, minor 6).
-	  It uses an I/O expander for the I2C-bus.
-
-config ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN
-	int "Virtual GPIO interrupt pin on PA pin"
-	range 0 7
-	depends on ETRAX_VIRTUAL_GPIO
-	help
-	  The pin to use on PA for virtual gpio interrupt.
-
-config ETRAX_PA_CHANGEABLE_DIR
-	hex "PA user changeable dir mask"
-	depends on ETRAX_GPIO
-	default "0x00" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PA that a
-	  user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0 here, but it depends on your hardware.
-
-config ETRAX_PA_CHANGEABLE_BITS
-	hex "PA user changeable bits mask"
-	depends on ETRAX_GPIO
-	default "0x00" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PA
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
-config ETRAX_PB_CHANGEABLE_DIR
-	hex "PB user changeable dir mask"
-	depends on ETRAX_GPIO
-	default "0x00000" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PB
-	  that a user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0 here, but it depends on your hardware.
-
-config ETRAX_PB_CHANGEABLE_BITS
-	hex "PB user changeable bits mask"
-	depends on ETRAX_GPIO
-	default "0x00000" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PB
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
-config ETRAX_PC_CHANGEABLE_DIR
-	hex "PC user changeable dir mask"
-	depends on ETRAX_GPIO
-	default "0x00000" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PC
-	  that a user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0 here, but it depends on your hardware.
-
-config ETRAX_PC_CHANGEABLE_BITS
-	hex "PC user changeable bits mask"
-	depends on ETRAX_GPIO
-	default "0x00000" if ETRAXFS
-	default "0x00000000" if !ETRAXFS
-	help
-	  This is a bitmask with information of what bits in PC
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
-config ETRAX_PD_CHANGEABLE_DIR
-	hex "PD user changeable dir mask"
-	depends on ETRAX_GPIO && ETRAXFS
-	default "0x00000"
-	help
-	  This is a bitmask with information of what bits in PD
-	  that a user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0x00000 here, but it depends on your hardware.
-
-config ETRAX_PD_CHANGEABLE_BITS
-	hex "PD user changeable bits mask"
-	depends on ETRAX_GPIO && ETRAXFS
-	default "0x00000"
-	help
-	  This is a bitmask (18 bits) with information of what bits in PD
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
-config ETRAX_PE_CHANGEABLE_DIR
-	hex "PE user changeable dir mask"
-	depends on ETRAX_GPIO && ETRAXFS
-	default "0x00000"
-	help
-	  This is a bitmask (18 bits) with information of what bits in PE
-	  that a user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0x00000 here, but it depends on your hardware.
-
-config ETRAX_PE_CHANGEABLE_BITS
-	hex "PE user changeable bits mask"
- 	depends on ETRAX_GPIO && ETRAXFS
-	default "0x00000"
-	help
-	  This is a bitmask (18 bits) with information of what bits in PE
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
-config ETRAX_PV_CHANGEABLE_DIR
-	hex "PV user changeable dir mask"
-	depends on ETRAX_VIRTUAL_GPIO
-	default "0x0000"
-	help
-	  This is a bitmask (16 bits) with information of what bits in PV
-	  that a user can change direction on using ioctl's.
-	  Bit set = changeable.
-	  You probably want 0x0000 here, but it depends on your hardware.
-
-config ETRAX_PV_CHANGEABLE_BITS
-	hex "PV user changeable bits mask"
-	depends on ETRAX_VIRTUAL_GPIO
-	default "0x0000"
-	help
-	  This is a bitmask (16 bits) with information of what bits in PV
-	  that a user can change the value on using ioctl's.
-	  Bit set = changeable.
-
 config ETRAX_CARDBUS
         bool "Cardbus support"
         depends on ETRAX_ARCH_V32
diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
index 15fbfefced2c..b5a75fdce77b 100644
--- a/arch/cris/arch-v32/drivers/Makefile
+++ b/arch/cris/arch-v32/drivers/Makefile
@@ -7,6 +7,5 @@ obj-$(CONFIG_ETRAX_AXISFLASHMAP)        += axisflashmap.o
 obj-$(CONFIG_ETRAXFS)                   += mach-fs/
 obj-$(CONFIG_CRIS_MACH_ARTPEC3)         += mach-a3/
 obj-$(CONFIG_ETRAX_IOP_FW_LOAD)         += iop_fw_load.o
-obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 obj-$(CONFIG_PCI)			+= pci/
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 5387424683cc..c6309a182f46 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -361,7 +361,7 @@ static int __init init_axis_flash(void)
 
 #if 0 /* Dump flash memory so we can see what is going on */
 	if (main_mtd) {
-		int sectoraddr, i;
+		int sectoraddr;
 		for (sectoraddr = 0; sectoraddr < 2*65536+4096;
 				sectoraddr += PAGESIZE) {
 			main_mtd->read(main_mtd, sectoraddr, PAGESIZE, &len,
@@ -369,21 +369,7 @@ static int __init init_axis_flash(void)
 			printk(KERN_INFO
 			       "Sector at %d (length %d):\n",
 			       sectoraddr, len);
-			for (i = 0; i < PAGESIZE; i += 16) {
-				printk(KERN_INFO
-				       "%02x %02x %02x %02x "
-				       "%02x %02x %02x %02x "
-				       "%02x %02x %02x %02x "
-				       "%02x %02x %02x %02x\n",
-				       page[i] & 255, page[i+1] & 255,
-				       page[i+2] & 255, page[i+3] & 255,
-				       page[i+4] & 255, page[i+5] & 255,
-				       page[i+6] & 255, page[i+7] & 255,
-				       page[i+8] & 255, page[i+9] & 255,
-				       page[i+10] & 255, page[i+11] & 255,
-				       page[i+12] & 255, page[i+13] & 255,
-				       page[i+14] & 255, page[i+15] & 255);
-			}
+			print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, page, PAGESIZE, false);
 		}
 	}
 #endif
@@ -417,25 +403,11 @@ static int __init init_axis_flash(void)
 
 #if 0 /* Dump partition table so we can see what is going on */
 		printk(KERN_INFO
-		       "axisflashmap: flash read %d bytes at 0x%08x, data: "
-		       "%02x %02x %02x %02x %02x %02x %02x %02x\n",
-		       len, CONFIG_ETRAX_PTABLE_SECTOR,
-		       page[0] & 255, page[1] & 255,
-		       page[2] & 255, page[3] & 255,
-		       page[4] & 255, page[5] & 255,
-		       page[6] & 255, page[7] & 255);
+		       "axisflashmap: flash read %d bytes at 0x%08x, data: %8ph\n",
+		       len, CONFIG_ETRAX_PTABLE_SECTOR, page);
 		printk(KERN_INFO
-		       "axisflashmap: partition table offset %d, data: "
-		       "%02x %02x %02x %02x %02x %02x %02x %02x\n",
-		       PARTITION_TABLE_OFFSET,
-		       page[PARTITION_TABLE_OFFSET+0] & 255,
-		       page[PARTITION_TABLE_OFFSET+1] & 255,
-		       page[PARTITION_TABLE_OFFSET+2] & 255,
-		       page[PARTITION_TABLE_OFFSET+3] & 255,
-		       page[PARTITION_TABLE_OFFSET+4] & 255,
-		       page[PARTITION_TABLE_OFFSET+5] & 255,
-		       page[PARTITION_TABLE_OFFSET+6] & 255,
-		       page[PARTITION_TABLE_OFFSET+7] & 255);
+		       "axisflashmap: partition table offset %d, data: %8ph\n",
+		       PARTITION_TABLE_OFFSET, page + PARTITION_TABLE_OFFSET);
 #endif
 	}
 
diff --git a/arch/cris/arch-v32/drivers/i2c.c b/arch/cris/arch-v32/drivers/i2c.c
deleted file mode 100644
index 3b2c82ce8147..000000000000
--- a/arch/cris/arch-v32/drivers/i2c.c
+++ /dev/null
@@ -1,751 +0,0 @@
-/*!***************************************************************************
-*!
-*! FILE NAME  : i2c.c
-*!
-*! DESCRIPTION: implements an interface for IIC/I2C, both directly from other
-*!              kernel modules (i2c_writereg/readreg) and from userspace using
-*!              ioctl()'s
-*!
-*! Nov 30 1998  Torbjorn Eliasson  Initial version.
-*!              Bjorn Wesen        Elinux kernel version.
-*! Jan 14 2000  Johan Adolfsson    Fixed PB shadow register stuff -
-*!                                 don't use PB_I2C if DS1302 uses same bits,
-*!                                 use PB.
-*| June 23 2003 Pieter Grimmerink  Added 'i2c_sendnack'. i2c_readreg now
-*|                                 generates nack on last received byte,
-*|                                 instead of ack.
-*|                                 i2c_getack changed data level while clock
-*|                                 was high, causing DS75 to see  a stop condition
-*!
-*! ---------------------------------------------------------------------------
-*!
-*! (C) Copyright 1999-2007 Axis Communications AB, LUND, SWEDEN
-*!
-*!***************************************************************************/
-
-/****************** INCLUDE FILES SECTION ***********************************/
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/mutex.h>
-
-#include <asm/etraxi2c.h>
-
-#include <asm/io.h>
-#include <asm/delay.h>
-
-#include "i2c.h"
-
-/****************** I2C DEFINITION SECTION *************************/
-
-#define D(x)
-
-#define I2C_MAJOR 123  /* LOCAL/EXPERIMENTAL */
-static DEFINE_MUTEX(i2c_mutex);
-static const char i2c_name[] = "i2c";
-
-#define CLOCK_LOW_TIME            8
-#define CLOCK_HIGH_TIME           8
-#define START_CONDITION_HOLD_TIME 8
-#define STOP_CONDITION_HOLD_TIME  8
-#define ENABLE_OUTPUT 0x01
-#define ENABLE_INPUT 0x00
-#define I2C_CLOCK_HIGH 1
-#define I2C_CLOCK_LOW 0
-#define I2C_DATA_HIGH 1
-#define I2C_DATA_LOW 0
-
-#define i2c_enable()
-#define i2c_disable()
-
-/* enable or disable output-enable, to select output or input on the i2c bus */
-
-#define i2c_dir_out() crisv32_io_set_dir(&cris_i2c_data, crisv32_io_dir_out)
-#define i2c_dir_in() crisv32_io_set_dir(&cris_i2c_data, crisv32_io_dir_in)
-
-/* control the i2c clock and data signals */
-
-#define i2c_clk(x) crisv32_io_set(&cris_i2c_clk, x)
-#define i2c_data(x) crisv32_io_set(&cris_i2c_data, x)
-
-/* read a bit from the i2c interface */
-
-#define i2c_getbit() crisv32_io_rd(&cris_i2c_data)
-
-#define i2c_delay(usecs) udelay(usecs)
-
-static DEFINE_SPINLOCK(i2c_lock); /* Protect directions etc */
-
-/****************** VARIABLE SECTION ************************************/
-
-static struct crisv32_iopin cris_i2c_clk;
-static struct crisv32_iopin cris_i2c_data;
-
-/****************** FUNCTION DEFINITION SECTION *************************/
-
-
-/* generate i2c start condition */
-
-void
-i2c_start(void)
-{
-	/*
-	 * SCL=1 SDA=1
-	 */
-	i2c_dir_out();
-	i2c_delay(CLOCK_HIGH_TIME/6);
-	i2c_data(I2C_DATA_HIGH);
-	i2c_clk(I2C_CLOCK_HIGH);
-	i2c_delay(CLOCK_HIGH_TIME);
-	/*
-	 * SCL=1 SDA=0
-	 */
-	i2c_data(I2C_DATA_LOW);
-	i2c_delay(START_CONDITION_HOLD_TIME);
-	/*
-	 * SCL=0 SDA=0
-	 */
-	i2c_clk(I2C_CLOCK_LOW);
-	i2c_delay(CLOCK_LOW_TIME);
-}
-
-/* generate i2c stop condition */
-
-void
-i2c_stop(void)
-{
-	i2c_dir_out();
-
-	/*
-	 * SCL=0 SDA=0
-	 */
-	i2c_clk(I2C_CLOCK_LOW);
-	i2c_data(I2C_DATA_LOW);
-	i2c_delay(CLOCK_LOW_TIME*2);
-	/*
-	 * SCL=1 SDA=0
-	 */
-	i2c_clk(I2C_CLOCK_HIGH);
-	i2c_delay(CLOCK_HIGH_TIME*2);
-	/*
-	 * SCL=1 SDA=1
-	 */
-	i2c_data(I2C_DATA_HIGH);
-	i2c_delay(STOP_CONDITION_HOLD_TIME);
-
-	i2c_dir_in();
-}
-
-/* write a byte to the i2c interface */
-
-void
-i2c_outbyte(unsigned char x)
-{
-	int i;
-
-	i2c_dir_out();
-
-	for (i = 0; i < 8; i++) {
-		if (x & 0x80) {
-			i2c_data(I2C_DATA_HIGH);
-		} else {
-			i2c_data(I2C_DATA_LOW);
-		}
-
-		i2c_delay(CLOCK_LOW_TIME/2);
-		i2c_clk(I2C_CLOCK_HIGH);
-		i2c_delay(CLOCK_HIGH_TIME);
-		i2c_clk(I2C_CLOCK_LOW);
-		i2c_delay(CLOCK_LOW_TIME/2);
-		x <<= 1;
-	}
-	i2c_data(I2C_DATA_LOW);
-	i2c_delay(CLOCK_LOW_TIME/2);
-
-	/*
-	 * enable input
-	 */
-	i2c_dir_in();
-}
-
-/* read a byte from the i2c interface */
-
-unsigned char
-i2c_inbyte(void)
-{
-	unsigned char aBitByte = 0;
-	int i;
-
-	/* Switch off I2C to get bit */
-	i2c_disable();
-	i2c_dir_in();
-	i2c_delay(CLOCK_HIGH_TIME/2);
-
-	/* Get bit */
-	aBitByte |= i2c_getbit();
-
-	/* Enable I2C */
-	i2c_enable();
-	i2c_delay(CLOCK_LOW_TIME/2);
-
-	for (i = 1; i < 8; i++) {
-		aBitByte <<= 1;
-		/* Clock pulse */
-		i2c_clk(I2C_CLOCK_HIGH);
-		i2c_delay(CLOCK_HIGH_TIME);
-		i2c_clk(I2C_CLOCK_LOW);
-		i2c_delay(CLOCK_LOW_TIME);
-
-		/* Switch off I2C to get bit */
-		i2c_disable();
-		i2c_dir_in();
-		i2c_delay(CLOCK_HIGH_TIME/2);
-
-		/* Get bit */
-		aBitByte |= i2c_getbit();
-
-		/* Enable I2C */
-		i2c_enable();
-		i2c_delay(CLOCK_LOW_TIME/2);
-	}
-	i2c_clk(I2C_CLOCK_HIGH);
-	i2c_delay(CLOCK_HIGH_TIME);
-
-	/*
-	 * we leave the clock low, getbyte is usually followed
-	 * by sendack/nack, they assume the clock to be low
-	 */
-	i2c_clk(I2C_CLOCK_LOW);
-	return aBitByte;
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_getack
-*#
-*# DESCRIPTION  : checks if ack was received from ic2
-*#
-*#--------------------------------------------------------------------------*/
-
-int
-i2c_getack(void)
-{
-	int ack = 1;
-	/*
-	 * enable output
-	 */
-	i2c_dir_out();
-	/*
-	 * Release data bus by setting
-	 * data high
-	 */
-	i2c_data(I2C_DATA_HIGH);
-	/*
-	 * enable input
-	 */
-	i2c_dir_in();
-	i2c_delay(CLOCK_HIGH_TIME/4);
-	/*
-	 * generate ACK clock pulse
-	 */
-	i2c_clk(I2C_CLOCK_HIGH);
-#if 0
-	/*
-	 * Use PORT PB instead of I2C
-	 * for input. (I2C not working)
-	 */
-	i2c_clk(1);
-	i2c_data(1);
-	/*
-	 * switch off I2C
-	 */
-	i2c_data(1);
-	i2c_disable();
-	i2c_dir_in();
-#endif
-
-	/*
-	 * now wait for ack
-	 */
-	i2c_delay(CLOCK_HIGH_TIME/2);
-	/*
-	 * check for ack
-	 */
-	if (i2c_getbit())
-		ack = 0;
-	i2c_delay(CLOCK_HIGH_TIME/2);
-	if (!ack) {
-		if (!i2c_getbit()) /* receiver pulld SDA low */
-			ack = 1;
-		i2c_delay(CLOCK_HIGH_TIME/2);
-	}
-
-   /*
-    * our clock is high now, make sure data is low
-    * before we enable our output. If we keep data high
-    * and enable output, we would generate a stop condition.
-    */
-#if 0
-   i2c_data(I2C_DATA_LOW);
-
-	/*
-	 * end clock pulse
-	 */
-	i2c_enable();
-	i2c_dir_out();
-#endif
-	i2c_clk(I2C_CLOCK_LOW);
-	i2c_delay(CLOCK_HIGH_TIME/4);
-	/*
-	 * enable output
-	 */
-	i2c_dir_out();
-	/*
-	 * remove ACK clock pulse
-	 */
-	i2c_data(I2C_DATA_HIGH);
-	i2c_delay(CLOCK_LOW_TIME/2);
-	return ack;
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: I2C::sendAck
-*#
-*# DESCRIPTION  : Send ACK on received data
-*#
-*#--------------------------------------------------------------------------*/
-void
-i2c_sendack(void)
-{
-	/*
-	 * enable output
-	 */
-	i2c_delay(CLOCK_LOW_TIME);
-	i2c_dir_out();
-	/*
-	 * set ack pulse high
-	 */
-	i2c_data(I2C_DATA_LOW);
-	/*
-	 * generate clock pulse
-	 */
-	i2c_delay(CLOCK_HIGH_TIME/6);
-	i2c_clk(I2C_CLOCK_HIGH);
-	i2c_delay(CLOCK_HIGH_TIME);
-	i2c_clk(I2C_CLOCK_LOW);
-	i2c_delay(CLOCK_LOW_TIME/6);
-	/*
-	 * reset data out
-	 */
-	i2c_data(I2C_DATA_HIGH);
-	i2c_delay(CLOCK_LOW_TIME);
-
-	i2c_dir_in();
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_sendnack
-*#
-*# DESCRIPTION  : Sends NACK on received data
-*#
-*#--------------------------------------------------------------------------*/
-void
-i2c_sendnack(void)
-{
-	/*
-	 * enable output
-	 */
-	i2c_delay(CLOCK_LOW_TIME);
-	i2c_dir_out();
-	/*
-	 * set data high
-	 */
-	i2c_data(I2C_DATA_HIGH);
-	/*
-	 * generate clock pulse
-	 */
-	i2c_delay(CLOCK_HIGH_TIME/6);
-	i2c_clk(I2C_CLOCK_HIGH);
-	i2c_delay(CLOCK_HIGH_TIME);
-	i2c_clk(I2C_CLOCK_LOW);
-	i2c_delay(CLOCK_LOW_TIME);
-
-	i2c_dir_in();
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_write
-*#
-*# DESCRIPTION  : Writes a value to an I2C device
-*#
-*#--------------------------------------------------------------------------*/
-int
-i2c_write(unsigned char theSlave, void *data, size_t nbytes)
-{
-	int error, cntr = 3;
-	unsigned char bytes_wrote = 0;
-	unsigned char value;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2c_lock, flags);
-
-	do {
-		error = 0;
-
-		i2c_start();
-		/*
-		 * send slave address
-		 */
-		i2c_outbyte((theSlave & 0xfe));
-		/*
-		 * wait for ack
-		 */
-		if (!i2c_getack())
-			error = 1;
-		/*
-		 * send data
-		 */
-		for (bytes_wrote = 0; bytes_wrote < nbytes; bytes_wrote++) {
-			memcpy(&value, data + bytes_wrote, sizeof value);
-			i2c_outbyte(value);
-			/*
-			 * now it's time to wait for ack
-			 */
-			if (!i2c_getack())
-				error |= 4;
-		}
-		/*
-		 * end byte stream
-		 */
-		i2c_stop();
-
-	} while (error && cntr--);
-
-	i2c_delay(CLOCK_LOW_TIME);
-
-	spin_unlock_irqrestore(&i2c_lock, flags);
-
-	return -error;
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_read
-*#
-*# DESCRIPTION  : Reads a value from an I2C device
-*#
-*#--------------------------------------------------------------------------*/
-int
-i2c_read(unsigned char theSlave, void *data, size_t nbytes)
-{
-	unsigned char b = 0;
-	unsigned char bytes_read = 0;
-	int error, cntr = 3;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2c_lock, flags);
-
-	do {
-		error = 0;
-		memset(data, 0, nbytes);
-		/*
-		 * generate start condition
-		 */
-		i2c_start();
-		/*
-		 * send slave address
-		 */
-		i2c_outbyte((theSlave | 0x01));
-		/*
-		 * wait for ack
-		 */
-		if (!i2c_getack())
-			error = 1;
-		/*
-		 * fetch data
-		 */
-		for (bytes_read = 0; bytes_read < nbytes; bytes_read++) {
-			b = i2c_inbyte();
-			memcpy(data + bytes_read, &b, sizeof b);
-
-			if (bytes_read < (nbytes - 1))
-				i2c_sendack();
-		}
-		/*
-		 * last received byte needs to be nacked
-		 * instead of acked
-		 */
-		i2c_sendnack();
-		/*
-		 * end sequence
-		 */
-		i2c_stop();
-	} while (error && cntr--);
-
-	spin_unlock_irqrestore(&i2c_lock, flags);
-
-	return -error;
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_writereg
-*#
-*# DESCRIPTION  : Writes a value to an I2C device
-*#
-*#--------------------------------------------------------------------------*/
-int
-i2c_writereg(unsigned char theSlave, unsigned char theReg,
-	     unsigned char theValue)
-{
-	int error, cntr = 3;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2c_lock, flags);
-
-	do {
-		error = 0;
-
-		i2c_start();
-		/*
-		 * send slave address
-		 */
-		i2c_outbyte((theSlave & 0xfe));
-		/*
-		 * wait for ack
-		 */
-		if(!i2c_getack())
-			error = 1;
-		/*
-		 * now select register
-		 */
-		i2c_dir_out();
-		i2c_outbyte(theReg);
-		/*
-		 * now it's time to wait for ack
-		 */
-		if(!i2c_getack())
-			error |= 2;
-		/*
-		 * send register register data
-		 */
-		i2c_outbyte(theValue);
-		/*
-		 * now it's time to wait for ack
-		 */
-		if(!i2c_getack())
-			error |= 4;
-		/*
-		 * end byte stream
-		 */
-		i2c_stop();
-	} while(error && cntr--);
-
-	i2c_delay(CLOCK_LOW_TIME);
-
-	spin_unlock_irqrestore(&i2c_lock, flags);
-
-	return -error;
-}
-
-/*#---------------------------------------------------------------------------
-*#
-*# FUNCTION NAME: i2c_readreg
-*#
-*# DESCRIPTION  : Reads a value from the decoder registers.
-*#
-*#--------------------------------------------------------------------------*/
-unsigned char
-i2c_readreg(unsigned char theSlave, unsigned char theReg)
-{
-	unsigned char b = 0;
-	int error, cntr = 3;
-	unsigned long flags;
-
-	spin_lock_irqsave(&i2c_lock, flags);
-
-	do {
-		error = 0;
-		/*
-		 * generate start condition
-		 */
-		i2c_start();
-
-		/*
-		 * send slave address
-		 */
-		i2c_outbyte((theSlave & 0xfe));
-		/*
-		 * wait for ack
-		 */
-		if(!i2c_getack())
-			error = 1;
-		/*
-		 * now select register
-		 */
-		i2c_dir_out();
-		i2c_outbyte(theReg);
-		/*
-		 * now it's time to wait for ack
-		 */
-		if(!i2c_getack())
-			error |= 2;
-		/*
-		 * repeat start condition
-		 */
-		i2c_delay(CLOCK_LOW_TIME);
-		i2c_start();
-		/*
-		 * send slave address
-		 */
-		i2c_outbyte(theSlave | 0x01);
-		/*
-		 * wait for ack
-		 */
-		if(!i2c_getack())
-			error |= 4;
-		/*
-		 * fetch register
-		 */
-		b = i2c_inbyte();
-		/*
-		 * last received byte needs to be nacked
-		 * instead of acked
-		 */
-		i2c_sendnack();
-		/*
-		 * end sequence
-		 */
-		i2c_stop();
-
-	} while(error && cntr--);
-
-	spin_unlock_irqrestore(&i2c_lock, flags);
-
-	return b;
-}
-
-static int
-i2c_open(struct inode *inode, struct file *filp)
-{
-	return 0;
-}
-
-static int
-i2c_release(struct inode *inode, struct file *filp)
-{
-	return 0;
-}
-
-/* Main device API. ioctl's to write or read to/from i2c registers.
- */
-
-static long
-i2c_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int ret;
-	if(_IOC_TYPE(cmd) != ETRAXI2C_IOCTYPE) {
-		return -ENOTTY;
-	}
-
-	switch (_IOC_NR(cmd)) {
-		case I2C_WRITEREG:
-			/* write to an i2c slave */
-			D(printk("i2cw %d %d %d\n",
-				 I2C_ARGSLAVE(arg),
-				 I2C_ARGREG(arg),
-				 I2C_ARGVALUE(arg)));
-
-			mutex_lock(&i2c_mutex);
-			ret = i2c_writereg(I2C_ARGSLAVE(arg),
-					    I2C_ARGREG(arg),
-					    I2C_ARGVALUE(arg));
-			mutex_unlock(&i2c_mutex);
-			return ret;
-
-		case I2C_READREG:
-		{
-			unsigned char val;
-			/* read from an i2c slave */
-			D(printk("i2cr %d %d ",
-				I2C_ARGSLAVE(arg),
-				I2C_ARGREG(arg)));
-			mutex_lock(&i2c_mutex);
-			val = i2c_readreg(I2C_ARGSLAVE(arg), I2C_ARGREG(arg));
-			mutex_unlock(&i2c_mutex);
-			D(printk("= %d\n", val));
-			return val;
-		}
-		default:
-			return -EINVAL;
-
-	}
-
-	return 0;
-}
-
-static const struct file_operations i2c_fops = {
-	.owner		= THIS_MODULE,
-	.unlocked_ioctl = i2c_ioctl,
-	.open		= i2c_open,
-	.release	= i2c_release,
-	.llseek		= noop_llseek,
-};
-
-static int __init i2c_init(void)
-{
-	static int res;
-	static int first = 1;
-
-	if (!first)
-		return res;
-
-	first = 0;
-
-	/* Setup and enable the DATA and CLK pins */
-
-	res = crisv32_io_get_name(&cris_i2c_data,
-		CONFIG_ETRAX_V32_I2C_DATA_PORT);
-	if (res < 0)
-		return res;
-
-	res = crisv32_io_get_name(&cris_i2c_clk, CONFIG_ETRAX_V32_I2C_CLK_PORT);
-	crisv32_io_set_dir(&cris_i2c_clk, crisv32_io_dir_out);
-
-	return res;
-}
-
-
-static int __init i2c_register(void)
-{
-	int res;
-
-	res = i2c_init();
-	if (res < 0)
-		return res;
-
-	/* register char device */
-
-	res = register_chrdev(I2C_MAJOR, i2c_name, &i2c_fops);
-	if (res < 0) {
-		printk(KERN_ERR "i2c: couldn't get a major number.\n");
-		return res;
-	}
-
-	printk(KERN_INFO
-		"I2C driver v2.2, (c) 1999-2007 Axis Communications AB\n");
-
-	return 0;
-}
-/* this makes sure that i2c_init is called during boot */
-module_init(i2c_register);
-
-/****************** END OF FILE i2c.c ********************************/
diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h
deleted file mode 100644
index d9cc856f89fb..000000000000
--- a/arch/cris/arch-v32/drivers/i2c.h
+++ /dev/null
@@ -1,16 +0,0 @@
-
-#include <linux/init.h>
-
-/* High level I2C actions */
-int i2c_write(unsigned char theSlave, void *data, size_t nbytes);
-int i2c_read(unsigned char theSlave, void *data, size_t nbytes);
-int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue);
-unsigned char i2c_readreg(unsigned char theSlave, unsigned char theReg);
-
-/* Low level I2C */
-void i2c_start(void);
-void i2c_stop(void);
-void i2c_outbyte(unsigned char x);
-unsigned char i2c_inbyte(void);
-int i2c_getack(void);
-void i2c_sendack(void);
diff --git a/arch/cris/arch-v32/drivers/mach-a3/Makefile b/arch/cris/arch-v32/drivers/mach-a3/Makefile
index 5c6d2a2a080e..59028d0b981c 100644
--- a/arch/cris/arch-v32/drivers/mach-a3/Makefile
+++ b/arch/cris/arch-v32/drivers/mach-a3/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-$(CONFIG_ETRAX_NANDFLASH)   += nandflash.o
-obj-$(CONFIG_ETRAX_GPIO)        += gpio.o
diff --git a/arch/cris/arch-v32/drivers/mach-a3/gpio.c b/arch/cris/arch-v32/drivers/mach-a3/gpio.c
deleted file mode 100644
index c92e1da3684d..000000000000
--- a/arch/cris/arch-v32/drivers/mach-a3/gpio.c
+++ /dev/null
@@ -1,999 +0,0 @@
-/*
- * Artec-3 general port I/O device
- *
- * Copyright (c) 2007 Axis Communications AB
- *
- * Authors:    Bjorn Wesen      (initial version)
- *             Ola Knutsson     (LED handling)
- *             Johan Adolfsson  (read/set directions, write, port G,
- *                               port to ETRAX FS.
- *             Ricard Wanderlof (PWM for Artpec-3)
- *
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-
-#include <asm/etraxgpio.h>
-#include <hwregs/reg_map.h>
-#include <hwregs/reg_rdwr.h>
-#include <hwregs/gio_defs.h>
-#include <hwregs/intr_vect_defs.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <mach/pinmux.h>
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-#include "../i2c.h"
-
-#define VIRT_I2C_ADDR 0x40
-#endif
-
-/* The following gio ports on ARTPEC-3 is available:
- * pa 32 bits
- * pb 32 bits
- * pc 16 bits
- * each port has a rw_px_dout, r_px_din and rw_px_oe register.
- */
-
-#define GPIO_MAJOR 120  /* experimental MAJOR number */
-
-#define I2C_INTERRUPT_BITS 0x300 /* i2c0_done and i2c1_done bits */
-
-#define D(x)
-
-#if 0
-static int dp_cnt;
-#define DP(x) \
-	do { \
-		dp_cnt++; \
-		if (dp_cnt % 1000 == 0) \
-			x; \
-	} while (0)
-#else
-#define DP(x)
-#endif
-
-static DEFINE_MUTEX(gpio_mutex);
-static char gpio_name[] = "etrax gpio";
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static int virtual_gpio_ioctl(struct file *file, unsigned int cmd,
-			      unsigned long arg);
-#endif
-static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-static ssize_t gpio_write(struct file *file, const char __user *buf,
-	size_t count, loff_t *off);
-static int gpio_open(struct inode *inode, struct file *filp);
-static int gpio_release(struct inode *inode, struct file *filp);
-static unsigned int gpio_poll(struct file *filp,
-	struct poll_table_struct *wait);
-
-/* private data per open() of this driver */
-
-struct gpio_private {
-	struct gpio_private *next;
-	/* The IO_CFG_WRITE_MODE_VALUE only support 8 bits: */
-	unsigned char clk_mask;
-	unsigned char data_mask;
-	unsigned char write_msb;
-	unsigned char pad1;
-	/* These fields are generic */
-	unsigned long highalarm, lowalarm;
-	wait_queue_head_t alarm_wq;
-	int minor;
-};
-
-static void gpio_set_alarm(struct gpio_private *priv);
-static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg);
-static int gpio_pwm_ioctl(struct gpio_private *priv, unsigned int cmd,
-	unsigned long arg);
-
-
-/* linked list of alarms to check for */
-
-static struct gpio_private *alarmlist;
-
-static int wanted_interrupts;
-
-static DEFINE_SPINLOCK(gpio_lock);
-
-#define NUM_PORTS (GPIO_MINOR_LAST+1)
-#define GIO_REG_RD_ADDR(reg) \
-	(unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg)
-#define GIO_REG_WR_ADDR(reg) \
-	(unsigned long *)(regi_gio + REG_WR_ADDR_gio_##reg)
-static unsigned long led_dummy;
-static unsigned long port_d_dummy;	/* Only input on Artpec-3 */
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static unsigned long port_e_dummy;	/* Non existent on Artpec-3 */
-static unsigned long virtual_dummy;
-static unsigned long virtual_rw_pv_oe = CONFIG_ETRAX_DEF_GIO_PV_OE;
-static unsigned short cached_virtual_gpio_read;
-#endif
-
-static unsigned long *data_out[NUM_PORTS] = {
-	GIO_REG_WR_ADDR(rw_pa_dout),
-	GIO_REG_WR_ADDR(rw_pb_dout),
-	&led_dummy,
-	GIO_REG_WR_ADDR(rw_pc_dout),
-	&port_d_dummy,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&port_e_dummy,
-	&virtual_dummy,
-#endif
-};
-
-static unsigned long *data_in[NUM_PORTS] = {
-	GIO_REG_RD_ADDR(r_pa_din),
-	GIO_REG_RD_ADDR(r_pb_din),
-	&led_dummy,
-	GIO_REG_RD_ADDR(r_pc_din),
-	GIO_REG_RD_ADDR(r_pd_din),
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&port_e_dummy,
-	&virtual_dummy,
-#endif
-};
-
-static unsigned long changeable_dir[NUM_PORTS] = {
-	CONFIG_ETRAX_PA_CHANGEABLE_DIR,
-	CONFIG_ETRAX_PB_CHANGEABLE_DIR,
-	0,
-	CONFIG_ETRAX_PC_CHANGEABLE_DIR,
-	0,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	0,
-	CONFIG_ETRAX_PV_CHANGEABLE_DIR,
-#endif
-};
-
-static unsigned long changeable_bits[NUM_PORTS] = {
-	CONFIG_ETRAX_PA_CHANGEABLE_BITS,
-	CONFIG_ETRAX_PB_CHANGEABLE_BITS,
-	0,
-	CONFIG_ETRAX_PC_CHANGEABLE_BITS,
-	0,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	0,
-	CONFIG_ETRAX_PV_CHANGEABLE_BITS,
-#endif
-};
-
-static unsigned long *dir_oe[NUM_PORTS] = {
-	GIO_REG_WR_ADDR(rw_pa_oe),
-	GIO_REG_WR_ADDR(rw_pb_oe),
-	&led_dummy,
-	GIO_REG_WR_ADDR(rw_pc_oe),
-	&port_d_dummy,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&port_e_dummy,
-	&virtual_rw_pv_oe,
-#endif
-};
-
-static void gpio_set_alarm(struct gpio_private *priv)
-{
-	int bit;
-	int intr_cfg;
-	int mask;
-	int pins;
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-	intr_cfg = REG_RD_INT(gio, regi_gio, rw_intr_cfg);
-	pins = REG_RD_INT(gio, regi_gio, rw_intr_pins);
-	mask = REG_RD_INT(gio, regi_gio, rw_intr_mask) & I2C_INTERRUPT_BITS;
-
-	for (bit = 0; bit < 32; bit++) {
-		int intr = bit % 8;
-		int pin = bit / 8;
-		if (priv->minor < GPIO_MINOR_LEDS)
-			pin += priv->minor * 4;
-		else
-			pin += (priv->minor - 1) * 4;
-
-		if (priv->highalarm & (1<<bit)) {
-			intr_cfg |= (regk_gio_hi << (intr * 3));
-			mask |= 1 << intr;
-			wanted_interrupts = mask & 0xff;
-			pins |= pin << (intr * 4);
-		} else if (priv->lowalarm & (1<<bit)) {
-			intr_cfg |= (regk_gio_lo << (intr * 3));
-			mask |= 1 << intr;
-			wanted_interrupts = mask & 0xff;
-			pins |= pin << (intr * 4);
-		}
-	}
-
-	REG_WR_INT(gio, regi_gio, rw_intr_cfg, intr_cfg);
-	REG_WR_INT(gio, regi_gio, rw_intr_pins, pins);
-	REG_WR_INT(gio, regi_gio, rw_intr_mask, mask);
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-}
-
-static unsigned int gpio_poll(struct file *file, struct poll_table_struct *wait)
-{
-	unsigned int mask = 0;
-	struct gpio_private *priv = file->private_data;
-	unsigned long data;
-	unsigned long tmp;
-
-	if (priv->minor >= GPIO_MINOR_PWM0 &&
-	    priv->minor <= GPIO_MINOR_LAST_PWM)
-		return 0;
-
-	poll_wait(file, &priv->alarm_wq, wait);
-	if (priv->minor <= GPIO_MINOR_D) {
-		data = readl(data_in[priv->minor]);
-		REG_WR_INT(gio, regi_gio, rw_ack_intr, wanted_interrupts);
-		tmp = REG_RD_INT(gio, regi_gio, rw_intr_mask);
-		tmp &= I2C_INTERRUPT_BITS;
-		tmp |= wanted_interrupts;
-		REG_WR_INT(gio, regi_gio, rw_intr_mask, tmp);
-	} else
-		return 0;
-
-	if ((data & priv->highalarm) || (~data & priv->lowalarm))
-		mask = POLLIN|POLLRDNORM;
-
-	DP(printk(KERN_DEBUG "gpio_poll ready: mask 0x%08X\n", mask));
-	return mask;
-}
-
-static irqreturn_t gpio_interrupt(int irq, void *dev_id)
-{
-	reg_gio_rw_intr_mask intr_mask;
-	reg_gio_r_masked_intr masked_intr;
-	reg_gio_rw_ack_intr ack_intr;
-	unsigned long flags;
-	unsigned long tmp;
-	unsigned long tmp2;
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	unsigned char enable_gpiov_ack = 0;
-#endif
-
-	/* Find what PA interrupts are active */
-	masked_intr = REG_RD(gio, regi_gio, r_masked_intr);
-	tmp = REG_TYPE_CONV(unsigned long, reg_gio_r_masked_intr, masked_intr);
-
-	/* Find those that we have enabled */
-	spin_lock_irqsave(&gpio_lock, flags);
-	tmp &= wanted_interrupts;
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Something changed on virtual GPIO. Interrupt is acked by
-	 * reading the device.
-	 */
-	if (tmp & (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN)) {
-		i2c_read(VIRT_I2C_ADDR, (void *)&cached_virtual_gpio_read,
-			sizeof(cached_virtual_gpio_read));
-		enable_gpiov_ack = 1;
-	}
-#endif
-
-	/* Ack them */
-	ack_intr = REG_TYPE_CONV(reg_gio_rw_ack_intr, unsigned long, tmp);
-	REG_WR(gio, regi_gio, rw_ack_intr, ack_intr);
-
-	/* Disable those interrupts.. */
-	intr_mask = REG_RD(gio, regi_gio, rw_intr_mask);
-	tmp2 = REG_TYPE_CONV(unsigned long, reg_gio_rw_intr_mask, intr_mask);
-	tmp2 &= ~tmp;
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Do not disable interrupt on virtual GPIO. Changes on virtual
-	 * pins are only noticed by an interrupt.
-	 */
-	if (enable_gpiov_ack)
-		tmp2 |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-	intr_mask = REG_TYPE_CONV(reg_gio_rw_intr_mask, unsigned long, tmp2);
-	REG_WR(gio, regi_gio, rw_intr_mask, intr_mask);
-
-	return IRQ_RETVAL(tmp);
-}
-
-static void gpio_write_bit(unsigned long *port, unsigned char data, int bit,
-	unsigned char clk_mask, unsigned char data_mask)
-{
-	unsigned long shadow = readl(port) & ~clk_mask;
-	writel(shadow, port);
-	if (data & 1 << bit)
-		shadow |= data_mask;
-	else
-		shadow &= ~data_mask;
-	writel(shadow, port);
-	/* For FPGA: min 5.0ns (DCC) before CCLK high */
-	shadow |= clk_mask;
-	writel(shadow, port);
-}
-
-static void gpio_write_byte(struct gpio_private *priv, unsigned long *port,
-		unsigned char data)
-{
-	int i;
-
-	if (priv->write_msb)
-		for (i = 7; i >= 0; i--)
-			gpio_write_bit(port, data, i, priv->clk_mask,
-				priv->data_mask);
-	else
-		for (i = 0; i <= 7; i++)
-			gpio_write_bit(port, data, i, priv->clk_mask,
-				priv->data_mask);
-}
-
-
-static ssize_t gpio_write(struct file *file, const char __user *buf,
-	size_t count, loff_t *off)
-{
-	struct gpio_private *priv = file->private_data;
-	unsigned long flags;
-	ssize_t retval = count;
-	/* Only bits 0-7 may be used for write operations but allow all
-	   devices except leds... */
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	if (priv->minor == GPIO_MINOR_V)
-		return -EFAULT;
-#endif
-	if (priv->minor == GPIO_MINOR_LEDS)
-		return -EFAULT;
-
-	if (priv->minor >= GPIO_MINOR_PWM0 &&
-	    priv->minor <= GPIO_MINOR_LAST_PWM)
-		return -EFAULT;
-
-	if (!access_ok(VERIFY_READ, buf, count))
-		return -EFAULT;
-
-	/* It must have been configured using the IO_CFG_WRITE_MODE */
-	/* Perhaps a better error code? */
-	if (priv->clk_mask == 0 || priv->data_mask == 0)
-		return -EPERM;
-
-	D(printk(KERN_DEBUG "gpio_write: %lu to data 0x%02X clk 0x%02X "
-		"msb: %i\n",
-		count, priv->data_mask, priv->clk_mask, priv->write_msb));
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	while (count--)
-		gpio_write_byte(priv, data_out[priv->minor], *buf++);
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-	return retval;
-}
-
-static int gpio_open(struct inode *inode, struct file *filp)
-{
-	struct gpio_private *priv;
-	int p = iminor(inode);
-
-	if (p > GPIO_MINOR_LAST_PWM ||
-	    (p > GPIO_MINOR_LAST && p < GPIO_MINOR_PWM0))
-		return -EINVAL;
-
-	priv = kmalloc(sizeof(struct gpio_private), GFP_KERNEL);
-
-	if (!priv)
-		return -ENOMEM;
-
-	mutex_lock(&gpio_mutex);
-	memset(priv, 0, sizeof(*priv));
-
-	priv->minor = p;
-	filp->private_data = priv;
-
-	/* initialize the io/alarm struct, not for PWM ports though  */
-	if (p <= GPIO_MINOR_LAST) {
-
-		priv->clk_mask = 0;
-		priv->data_mask = 0;
-		priv->highalarm = 0;
-		priv->lowalarm = 0;
-
-		init_waitqueue_head(&priv->alarm_wq);
-
-		/* link it into our alarmlist */
-		spin_lock_irq(&gpio_lock);
-		priv->next = alarmlist;
-		alarmlist = priv;
-		spin_unlock_irq(&gpio_lock);
-	}
-
-	mutex_unlock(&gpio_mutex);
-	return 0;
-}
-
-static int gpio_release(struct inode *inode, struct file *filp)
-{
-	struct gpio_private *p;
-	struct gpio_private *todel;
-	/* local copies while updating them: */
-	unsigned long a_high, a_low;
-
-	/* prepare to free private structure */
-	todel = filp->private_data;
-
-	/* unlink from alarmlist - only for non-PWM ports though */
-	if (todel->minor <= GPIO_MINOR_LAST) {
-		spin_lock_irq(&gpio_lock);
-		p = alarmlist;
-
-		if (p == todel)
-			alarmlist = todel->next;
-		 else {
-			while (p->next != todel)
-				p = p->next;
-			p->next = todel->next;
-		}
-
-		/* Check if there are still any alarms set */
-		p = alarmlist;
-		a_high = 0;
-		a_low = 0;
-		while (p) {
-			if (p->minor == GPIO_MINOR_A) {
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-				p->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-				a_high |= p->highalarm;
-				a_low |= p->lowalarm;
-			}
-
-			p = p->next;
-		}
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Variable 'a_low' needs to be set here again
-	 * to ensure that interrupt for virtual GPIO is handled.
-	 */
-		a_low |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-
-		spin_unlock_irq(&gpio_lock);
-	}
-	kfree(todel);
-
-	return 0;
-}
-
-/* Main device API. ioctl's to read/set/clear bits, as well as to
- * set alarms to wait for using a subsequent select().
- */
-
-inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
-{
-	/* Set direction 0=unchanged 1=input,
-	 * return mask with 1=input
-	 */
-	unsigned long flags;
-	unsigned long dir_shadow;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	dir_shadow = readl(dir_oe[priv->minor]) &
-		~(arg & changeable_dir[priv->minor]);
-	writel(dir_shadow, dir_oe[priv->minor]);
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	if (priv->minor == GPIO_MINOR_C)
-		dir_shadow ^= 0xFFFF;		/* Only 16 bits */
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	else if (priv->minor == GPIO_MINOR_V)
-		dir_shadow ^= 0xFFFF;		/* Only 16 bits */
-#endif
-	else
-		dir_shadow ^= 0xFFFFFFFF;	/* PA, PB and PD 32 bits */
-
-	return dir_shadow;
-
-} /* setget_input */
-
-static inline unsigned long setget_output(struct gpio_private *priv,
-	unsigned long arg)
-{
-	unsigned long flags;
-	unsigned long dir_shadow;
-
-	spin_lock_irqsave(&gpio_lock, flags);
-
-	dir_shadow = readl(dir_oe[priv->minor]) |
-		(arg & changeable_dir[priv->minor]);
-	writel(dir_shadow, dir_oe[priv->minor]);
-
-	spin_unlock_irqrestore(&gpio_lock, flags);
-	return dir_shadow;
-} /* setget_output */
-
-static long gpio_ioctl_unlocked(struct file *file,
-	unsigned int cmd, unsigned long arg)
-{
-	unsigned long flags;
-	unsigned long val;
-	unsigned long shadow;
-	struct gpio_private *priv = file->private_data;
-
-	if (_IOC_TYPE(cmd) != ETRAXGPIO_IOCTYPE)
-		return -ENOTTY;
-
-	/* Check for special ioctl handlers first */
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	if (priv->minor == GPIO_MINOR_V)
-		return virtual_gpio_ioctl(file, cmd, arg);
-#endif
-
-	if (priv->minor == GPIO_MINOR_LEDS)
-		return gpio_leds_ioctl(cmd, arg);
-
-	if (priv->minor >= GPIO_MINOR_PWM0 &&
-	    priv->minor <= GPIO_MINOR_LAST_PWM)
-		return gpio_pwm_ioctl(priv, cmd, arg);
-
-	switch (_IOC_NR(cmd)) {
-	case IO_READBITS: /* Use IO_READ_INBITS and IO_READ_OUTBITS instead */
-		/* Read the port. */
-		return readl(data_in[priv->minor]);
-	case IO_SETBITS:
-		spin_lock_irqsave(&gpio_lock, flags);
-		/* Set changeable bits with a 1 in arg. */
-		shadow = readl(data_out[priv->minor]) |
-			(arg & changeable_bits[priv->minor]);
-		writel(shadow, data_out[priv->minor]);
-		spin_unlock_irqrestore(&gpio_lock, flags);
-		break;
-	case IO_CLRBITS:
-		spin_lock_irqsave(&gpio_lock, flags);
-		/* Clear changeable bits with a 1 in arg. */
-		shadow = readl(data_out[priv->minor]) &
-			~(arg & changeable_bits[priv->minor]);
-		writel(shadow, data_out[priv->minor]);
-		spin_unlock_irqrestore(&gpio_lock, flags);
-		break;
-	case IO_HIGHALARM:
-		/* Set alarm when bits with 1 in arg go high. */
-		priv->highalarm |= arg;
-		gpio_set_alarm(priv);
-		break;
-	case IO_LOWALARM:
-		/* Set alarm when bits with 1 in arg go low. */
-		priv->lowalarm |= arg;
-		gpio_set_alarm(priv);
-		break;
-	case IO_CLRALARM:
-		/* Clear alarm for bits with 1 in arg. */
-		priv->highalarm &= ~arg;
-		priv->lowalarm  &= ~arg;
-		gpio_set_alarm(priv);
-		break;
-	case IO_READDIR: /* Use IO_SETGET_INPUT/OUTPUT instead! */
-		/* Read direction 0=input 1=output */
-		return readl(dir_oe[priv->minor]);
-
-	case IO_SETINPUT: /* Use IO_SETGET_INPUT instead! */
-		/* Set direction 0=unchanged 1=input,
-		 * return mask with 1=input
-		 */
-		return setget_input(priv, arg);
-
-	case IO_SETOUTPUT: /* Use IO_SETGET_OUTPUT instead! */
-		/* Set direction 0=unchanged 1=output,
-		 * return mask with 1=output
-		 */
-		return setget_output(priv, arg);
-
-	case IO_CFG_WRITE_MODE:
-	{
-		int res = -EPERM;
-		unsigned long dir_shadow, clk_mask, data_mask, write_msb;
-
-		clk_mask = arg & 0xFF;
-		data_mask = (arg >> 8) & 0xFF;
-		write_msb = (arg >> 16) & 0x01;
-
-		/* Check if we're allowed to change the bits and
-		 * the direction is correct
-		 */
-		spin_lock_irqsave(&gpio_lock, flags);
-		dir_shadow = readl(dir_oe[priv->minor]);
-		if ((clk_mask & changeable_bits[priv->minor]) &&
-		    (data_mask & changeable_bits[priv->minor]) &&
-		    (clk_mask & dir_shadow) &&
-		    (data_mask & dir_shadow)) {
-			priv->clk_mask = clk_mask;
-			priv->data_mask = data_mask;
-			priv->write_msb = write_msb;
-			res = 0;
-		}
-		spin_unlock_irqrestore(&gpio_lock, flags);
-
-		return res;
-	}
-	case IO_READ_INBITS:
-		/* *arg is result of reading the input pins */
-		val = readl(data_in[priv->minor]);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		return 0;
-	case IO_READ_OUTBITS:
-		 /* *arg is result of reading the output shadow */
-		val = *data_out[priv->minor];
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_INPUT:
-		/* bits set in *arg is set to input,
-		 * *arg updated with current input pins.
-		 */
-		if (copy_from_user(&val, (void __user *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_input(priv, val);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_OUTPUT:
-		/* bits set in *arg is set to output,
-		 * *arg updated with current output pins.
-		 */
-		if (copy_from_user(&val, (void __user *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_output(priv, val);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	default:
-		return -EINVAL;
-	} /* switch */
-
-	return 0;
-}
-
-static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-       long ret;
-
-       mutex_lock(&gpio_mutex);
-       ret = gpio_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&gpio_mutex);
-
-       return ret;
-}
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static int virtual_gpio_ioctl(struct file *file, unsigned int cmd,
-	unsigned long arg)
-{
-	unsigned long flags;
-	unsigned short val;
-	unsigned short shadow;
-	struct gpio_private *priv = file->private_data;
-
-	switch (_IOC_NR(cmd)) {
-	case IO_SETBITS:
-		spin_lock_irqsave(&gpio_lock, flags);
-		/* Set changeable bits with a 1 in arg. */
-		i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		shadow |= ~readl(dir_oe[priv->minor]) |
-			(arg & changeable_bits[priv->minor]);
-		i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		spin_unlock_irqrestore(&gpio_lock, flags);
-		break;
-	case IO_CLRBITS:
-		spin_lock_irqsave(&gpio_lock, flags);
-		/* Clear changeable bits with a 1 in arg. */
-		i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		shadow |= ~readl(dir_oe[priv->minor]) &
-			~(arg & changeable_bits[priv->minor]);
-		i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		spin_unlock_irqrestore(&gpio_lock, flags);
-		break;
-	case IO_HIGHALARM:
-		/* Set alarm when bits with 1 in arg go high. */
-		priv->highalarm |= arg;
-		break;
-	case IO_LOWALARM:
-		/* Set alarm when bits with 1 in arg go low. */
-		priv->lowalarm |= arg;
-		break;
-	case IO_CLRALARM:
-		/* Clear alarm for bits with 1 in arg. */
-		priv->highalarm &= ~arg;
-		priv->lowalarm  &= ~arg;
-		break;
-	case IO_CFG_WRITE_MODE:
-	{
-		unsigned long dir_shadow;
-		dir_shadow = readl(dir_oe[priv->minor]);
-
-		priv->clk_mask = arg & 0xFF;
-		priv->data_mask = (arg >> 8) & 0xFF;
-		priv->write_msb = (arg >> 16) & 0x01;
-		/* Check if we're allowed to change the bits and
-		 * the direction is correct
-		 */
-		if (!((priv->clk_mask & changeable_bits[priv->minor]) &&
-		      (priv->data_mask & changeable_bits[priv->minor]) &&
-		      (priv->clk_mask & dir_shadow) &&
-		      (priv->data_mask & dir_shadow))) {
-			priv->clk_mask = 0;
-			priv->data_mask = 0;
-			return -EPERM;
-		}
-		break;
-	}
-	case IO_READ_INBITS:
-		/* *arg is result of reading the input pins */
-		val = cached_virtual_gpio_read & ~readl(dir_oe[priv->minor]);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		return 0;
-
-	case IO_READ_OUTBITS:
-		 /* *arg is result of reading the output shadow */
-		i2c_read(VIRT_I2C_ADDR, (void *)&val, sizeof(val));
-		val &= readl(dir_oe[priv->minor]);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_INPUT:
-	{
-		/* bits set in *arg is set to input,
-		 * *arg updated with current input pins.
-		 */
-		unsigned short input_mask = ~readl(dir_oe[priv->minor]);
-		if (copy_from_user(&val, (void __user *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_input(priv, val);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		if ((input_mask & val) != input_mask) {
-			/* Input pins changed. All ports desired as input
-			 * should be set to logic 1.
-			 */
-			unsigned short change = input_mask ^ val;
-			i2c_read(VIRT_I2C_ADDR, (void *)&shadow,
-				sizeof(shadow));
-			shadow &= ~change;
-			shadow |= val;
-			i2c_write(VIRT_I2C_ADDR, (void *)&shadow,
-				sizeof(shadow));
-		}
-		break;
-	}
-	case IO_SETGET_OUTPUT:
-		/* bits set in *arg is set to output,
-		 * *arg updated with current output pins.
-		 */
-		if (copy_from_user(&val, (void __user *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_output(priv, val);
-		if (copy_to_user((void __user *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	default:
-		return -EINVAL;
-	} /* switch */
-	return 0;
-}
-#endif /* CONFIG_ETRAX_VIRTUAL_GPIO */
-
-static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg)
-{
-	unsigned char green;
-	unsigned char red;
-
-	switch (_IOC_NR(cmd)) {
-	case IO_LEDACTIVE_SET:
-		green = ((unsigned char) arg) & 1;
-		red   = (((unsigned char) arg) >> 1) & 1;
-		CRIS_LED_ACTIVE_SET_G(green);
-		CRIS_LED_ACTIVE_SET_R(red);
-		break;
-
-	default:
-		return -EINVAL;
-	} /* switch */
-
-	return 0;
-}
-
-static int gpio_pwm_set_mode(unsigned long arg, int pwm_port)
-{
-	int pinmux_pwm = pinmux_pwm0 + pwm_port;
-	int mode;
-	reg_gio_rw_pwm0_ctrl rw_pwm_ctrl = {
-		.ccd_val = 0,
-		.ccd_override = regk_gio_no,
-		.mode = regk_gio_no
-	};
-	int allocstatus;
-
-	if (get_user(mode, &((struct io_pwm_set_mode *) arg)->mode))
-		return -EFAULT;
-	rw_pwm_ctrl.mode = mode;
-	if (mode != PWM_OFF)
-		allocstatus = crisv32_pinmux_alloc_fixed(pinmux_pwm);
-	else
-		allocstatus = crisv32_pinmux_dealloc_fixed(pinmux_pwm);
-	if (allocstatus)
-		return allocstatus;
-	REG_WRITE(reg_gio_rw_pwm0_ctrl, REG_ADDR(gio, regi_gio, rw_pwm0_ctrl) +
-		12 * pwm_port, rw_pwm_ctrl);
-	return 0;
-}
-
-static int gpio_pwm_set_period(unsigned long arg, int pwm_port)
-{
-	struct io_pwm_set_period periods;
-	reg_gio_rw_pwm0_var rw_pwm_widths;
-
-	if (copy_from_user(&periods, (void __user *)arg, sizeof(periods)))
-		return -EFAULT;
-	if (periods.lo > 8191 || periods.hi > 8191)
-		return -EINVAL;
-	rw_pwm_widths.lo = periods.lo;
-	rw_pwm_widths.hi = periods.hi;
-	REG_WRITE(reg_gio_rw_pwm0_var, REG_ADDR(gio, regi_gio, rw_pwm0_var) +
-		12 * pwm_port, rw_pwm_widths);
-	return 0;
-}
-
-static int gpio_pwm_set_duty(unsigned long arg, int pwm_port)
-{
-	unsigned int duty;
-	reg_gio_rw_pwm0_data rw_pwm_duty;
-
-	if (get_user(duty, &((struct io_pwm_set_duty *) arg)->duty))
-		return -EFAULT;
-	if (duty > 255)
-		return -EINVAL;
-	rw_pwm_duty.data = duty;
-	REG_WRITE(reg_gio_rw_pwm0_data, REG_ADDR(gio, regi_gio, rw_pwm0_data) +
-		12 * pwm_port, rw_pwm_duty);
-	return 0;
-}
-
-static int gpio_pwm_ioctl(struct gpio_private *priv, unsigned int cmd,
-	unsigned long arg)
-{
-	int pwm_port = priv->minor - GPIO_MINOR_PWM0;
-
-	switch (_IOC_NR(cmd)) {
-	case IO_PWM_SET_MODE:
-		return gpio_pwm_set_mode(arg, pwm_port);
-	case IO_PWM_SET_PERIOD:
-		return gpio_pwm_set_period(arg, pwm_port);
-	case IO_PWM_SET_DUTY:
-		return gpio_pwm_set_duty(arg, pwm_port);
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static const struct file_operations gpio_fops = {
-	.owner		= THIS_MODULE,
-	.poll		= gpio_poll,
-	.unlocked_ioctl	= gpio_ioctl,
-	.write		= gpio_write,
-	.open		= gpio_open,
-	.release	= gpio_release,
-	.llseek		= noop_llseek,
-};
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static void __init virtual_gpio_init(void)
-{
-	reg_gio_rw_intr_cfg intr_cfg;
-	reg_gio_rw_intr_mask intr_mask;
-	unsigned short shadow;
-
-	shadow = ~virtual_rw_pv_oe; /* Input ports should be set to logic 1 */
-	shadow |= CONFIG_ETRAX_DEF_GIO_PV_OUT;
-	i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-
-	/* Set interrupt mask and on what state the interrupt shall trigger.
-	 * For virtual gpio the interrupt shall trigger on logic '0'.
-	 */
-	intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg);
-	intr_mask = REG_RD(gio, regi_gio, rw_intr_mask);
-
-	switch (CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN) {
-	case 0:
-		intr_cfg.pa0 = regk_gio_lo;
-		intr_mask.pa0 = regk_gio_yes;
-		break;
-	case 1:
-		intr_cfg.pa1 = regk_gio_lo;
-		intr_mask.pa1 = regk_gio_yes;
-		break;
-	case 2:
-		intr_cfg.pa2 = regk_gio_lo;
-		intr_mask.pa2 = regk_gio_yes;
-		break;
-	case 3:
-		intr_cfg.pa3 = regk_gio_lo;
-		intr_mask.pa3 = regk_gio_yes;
-		break;
-	case 4:
-		intr_cfg.pa4 = regk_gio_lo;
-		intr_mask.pa4 = regk_gio_yes;
-		break;
-	case 5:
-		intr_cfg.pa5 = regk_gio_lo;
-		intr_mask.pa5 = regk_gio_yes;
-		break;
-	case 6:
-		intr_cfg.pa6 = regk_gio_lo;
-		intr_mask.pa6 = regk_gio_yes;
-		break;
-	case 7:
-		intr_cfg.pa7 = regk_gio_lo;
-		intr_mask.pa7 = regk_gio_yes;
-		break;
-	}
-
-	REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg);
-	REG_WR(gio, regi_gio, rw_intr_mask, intr_mask);
-}
-#endif
-
-/* main driver initialization routine, called from mem.c */
-
-static int __init gpio_init(void)
-{
-	int res, res2;
-
-	printk(KERN_INFO "ETRAX FS GPIO driver v2.7, (c) 2003-2008 "
-		"Axis Communications AB\n");
-
-	/* do the formalities */
-
-	res = register_chrdev(GPIO_MAJOR, gpio_name, &gpio_fops);
-	if (res < 0) {
-		printk(KERN_ERR "gpio: couldn't get a major number.\n");
-		return res;
-	}
-
-	/* Clear all leds */
-	CRIS_LED_NETWORK_GRP0_SET(0);
-	CRIS_LED_NETWORK_GRP1_SET(0);
-	CRIS_LED_ACTIVE_SET(0);
-	CRIS_LED_DISK_READ(0);
-	CRIS_LED_DISK_WRITE(0);
-
-	res2 = request_irq(GIO_INTR_VECT, gpio_interrupt,
-		IRQF_SHARED, "gpio", &alarmlist);
-	if (res2) {
-		printk(KERN_ERR "err: irq for gpio\n");
-		return res2;
-	}
-
-	/* No IRQs by default. */
-	REG_WR_INT(gio, regi_gio, rw_intr_pins, 0);
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	virtual_gpio_init();
-#endif
-
-	return res;
-}
-
-/* this makes sure that gpio_init is called during kernel boot */
-
-module_init(gpio_init);
diff --git a/arch/cris/arch-v32/drivers/mach-fs/Makefile b/arch/cris/arch-v32/drivers/mach-fs/Makefile
index 5c6d2a2a080e..59028d0b981c 100644
--- a/arch/cris/arch-v32/drivers/mach-fs/Makefile
+++ b/arch/cris/arch-v32/drivers/mach-fs/Makefile
@@ -3,4 +3,3 @@
 #
 
 obj-$(CONFIG_ETRAX_NANDFLASH)   += nandflash.o
-obj-$(CONFIG_ETRAX_GPIO)        += gpio.o
diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
deleted file mode 100644
index 72968fbf814b..000000000000
--- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c
+++ /dev/null
@@ -1,978 +0,0 @@
-/*
- * ETRAX CRISv32 general port I/O device
- *
- * Copyright (c) 1999-2006 Axis Communications AB
- *
- * Authors:    Bjorn Wesen      (initial version)
- *             Ola Knutsson     (LED handling)
- *             Johan Adolfsson  (read/set directions, write, port G,
- *                               port to ETRAX FS.
- *
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-
-#include <asm/etraxgpio.h>
-#include <hwregs/reg_map.h>
-#include <hwregs/reg_rdwr.h>
-#include <hwregs/gio_defs.h>
-#include <hwregs/intr_vect_defs.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-#include "../i2c.h"
-
-#define VIRT_I2C_ADDR 0x40
-#endif
-
-/* The following gio ports on ETRAX FS is available:
- * pa  8 bits, supports interrupts off, hi, low, set, posedge, negedge anyedge
- * pb 18 bits
- * pc 18 bits
- * pd 18 bits
- * pe 18 bits
- * each port has a rw_px_dout, r_px_din and rw_px_oe register.
- */
-
-#define GPIO_MAJOR 120  /* experimental MAJOR number */
-
-#define D(x)
-
-#if 0
-static int dp_cnt;
-#define DP(x) \
-	do { \
-		dp_cnt++; \
-		if (dp_cnt % 1000 == 0) \
-			x; \
-	} while (0)
-#else
-#define DP(x)
-#endif
-
-static DEFINE_MUTEX(gpio_mutex);
-static char gpio_name[] = "etrax gpio";
-
-#if 0
-static wait_queue_head_t *gpio_wq;
-#endif
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static int virtual_gpio_ioctl(struct file *file, unsigned int cmd,
-	unsigned long arg);
-#endif
-static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-static ssize_t gpio_write(struct file *file, const char *buf, size_t count,
-	loff_t *off);
-static int gpio_open(struct inode *inode, struct file *filp);
-static int gpio_release(struct inode *inode, struct file *filp);
-static unsigned int gpio_poll(struct file *filp,
-	struct poll_table_struct *wait);
-
-/* private data per open() of this driver */
-
-struct gpio_private {
-	struct gpio_private *next;
-	/* The IO_CFG_WRITE_MODE_VALUE only support 8 bits: */
-	unsigned char clk_mask;
-	unsigned char data_mask;
-	unsigned char write_msb;
-	unsigned char pad1;
-	/* These fields are generic */
-	unsigned long highalarm, lowalarm;
-	wait_queue_head_t alarm_wq;
-	int minor;
-};
-
-/* linked list of alarms to check for */
-
-static struct gpio_private *alarmlist;
-
-static int gpio_some_alarms; /* Set if someone uses alarm */
-static unsigned long gpio_pa_high_alarms;
-static unsigned long gpio_pa_low_alarms;
-
-static DEFINE_SPINLOCK(alarm_lock);
-
-#define NUM_PORTS (GPIO_MINOR_LAST+1)
-#define GIO_REG_RD_ADDR(reg) \
-	(volatile unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg)
-#define GIO_REG_WR_ADDR(reg) \
-	(volatile unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg)
-unsigned long led_dummy;
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static unsigned long virtual_dummy;
-static unsigned long virtual_rw_pv_oe = CONFIG_ETRAX_DEF_GIO_PV_OE;
-static unsigned short cached_virtual_gpio_read;
-#endif
-
-static volatile unsigned long *data_out[NUM_PORTS] = {
-	GIO_REG_WR_ADDR(rw_pa_dout),
-	GIO_REG_WR_ADDR(rw_pb_dout),
-	&led_dummy,
-	GIO_REG_WR_ADDR(rw_pc_dout),
-	GIO_REG_WR_ADDR(rw_pd_dout),
-	GIO_REG_WR_ADDR(rw_pe_dout),
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&virtual_dummy,
-#endif
-};
-
-static volatile unsigned long *data_in[NUM_PORTS] = {
-	GIO_REG_RD_ADDR(r_pa_din),
-	GIO_REG_RD_ADDR(r_pb_din),
-	&led_dummy,
-	GIO_REG_RD_ADDR(r_pc_din),
-	GIO_REG_RD_ADDR(r_pd_din),
-	GIO_REG_RD_ADDR(r_pe_din),
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&virtual_dummy,
-#endif
-};
-
-static unsigned long changeable_dir[NUM_PORTS] = {
-	CONFIG_ETRAX_PA_CHANGEABLE_DIR,
-	CONFIG_ETRAX_PB_CHANGEABLE_DIR,
-	0,
-	CONFIG_ETRAX_PC_CHANGEABLE_DIR,
-	CONFIG_ETRAX_PD_CHANGEABLE_DIR,
-	CONFIG_ETRAX_PE_CHANGEABLE_DIR,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	CONFIG_ETRAX_PV_CHANGEABLE_DIR,
-#endif
-};
-
-static unsigned long changeable_bits[NUM_PORTS] = {
-	CONFIG_ETRAX_PA_CHANGEABLE_BITS,
-	CONFIG_ETRAX_PB_CHANGEABLE_BITS,
-	0,
-	CONFIG_ETRAX_PC_CHANGEABLE_BITS,
-	CONFIG_ETRAX_PD_CHANGEABLE_BITS,
-	CONFIG_ETRAX_PE_CHANGEABLE_BITS,
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	CONFIG_ETRAX_PV_CHANGEABLE_BITS,
-#endif
-};
-
-static volatile unsigned long *dir_oe[NUM_PORTS] = {
-	GIO_REG_WR_ADDR(rw_pa_oe),
-	GIO_REG_WR_ADDR(rw_pb_oe),
-	&led_dummy,
-	GIO_REG_WR_ADDR(rw_pc_oe),
-	GIO_REG_WR_ADDR(rw_pd_oe),
-	GIO_REG_WR_ADDR(rw_pe_oe),
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	&virtual_rw_pv_oe,
-#endif
-};
-
-
-
-static unsigned int gpio_poll(struct file *file, struct poll_table_struct *wait)
-{
-	unsigned int mask = 0;
-	struct gpio_private *priv = file->private_data;
-	unsigned long data;
-	poll_wait(file, &priv->alarm_wq, wait);
-	if (priv->minor == GPIO_MINOR_A) {
-		reg_gio_rw_intr_cfg intr_cfg;
-		unsigned long tmp;
-		unsigned long flags;
-
-		local_irq_save(flags);
-		data = REG_TYPE_CONV(unsigned long, reg_gio_r_pa_din,
-			REG_RD(gio, regi_gio, r_pa_din));
-		/* PA has support for interrupt
-		 * lets activate high for those low and with highalarm set
-		 */
-		intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg);
-
-		tmp = ~data & priv->highalarm & 0xFF;
-		if (tmp & (1 << 0))
-			intr_cfg.pa0 = regk_gio_hi;
-		if (tmp & (1 << 1))
-			intr_cfg.pa1 = regk_gio_hi;
-		if (tmp & (1 << 2))
-			intr_cfg.pa2 = regk_gio_hi;
-		if (tmp & (1 << 3))
-			intr_cfg.pa3 = regk_gio_hi;
-		if (tmp & (1 << 4))
-			intr_cfg.pa4 = regk_gio_hi;
-		if (tmp & (1 << 5))
-			intr_cfg.pa5 = regk_gio_hi;
-		if (tmp & (1 << 6))
-			intr_cfg.pa6 = regk_gio_hi;
-		if (tmp & (1 << 7))
-			intr_cfg.pa7 = regk_gio_hi;
-		/*
-		 * lets activate low for those high and with lowalarm set
-		 */
-		tmp = data & priv->lowalarm & 0xFF;
-		if (tmp & (1 << 0))
-			intr_cfg.pa0 = regk_gio_lo;
-		if (tmp & (1 << 1))
-			intr_cfg.pa1 = regk_gio_lo;
-		if (tmp & (1 << 2))
-			intr_cfg.pa2 = regk_gio_lo;
-		if (tmp & (1 << 3))
-			intr_cfg.pa3 = regk_gio_lo;
-		if (tmp & (1 << 4))
-			intr_cfg.pa4 = regk_gio_lo;
-		if (tmp & (1 << 5))
-			intr_cfg.pa5 = regk_gio_lo;
-		if (tmp & (1 << 6))
-			intr_cfg.pa6 = regk_gio_lo;
-		if (tmp & (1 << 7))
-			intr_cfg.pa7 = regk_gio_lo;
-
-		REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg);
-		local_irq_restore(flags);
-	} else if (priv->minor <= GPIO_MINOR_E)
-		data = *data_in[priv->minor];
-	else
-		return 0;
-
-	if ((data & priv->highalarm) || (~data & priv->lowalarm))
-		mask = POLLIN|POLLRDNORM;
-
-	DP(printk(KERN_DEBUG "gpio_poll ready: mask 0x%08X\n", mask));
-	return mask;
-}
-
-int etrax_gpio_wake_up_check(void)
-{
-	struct gpio_private *priv;
-	unsigned long data = 0;
-	unsigned long flags;
-	int ret = 0;
-	spin_lock_irqsave(&alarm_lock, flags);
-	priv = alarmlist;
-	while (priv) {
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-		if (priv->minor == GPIO_MINOR_V)
-			data = (unsigned long)cached_virtual_gpio_read;
-		else {
-			data = *data_in[priv->minor];
-			if (priv->minor == GPIO_MINOR_A)
-				priv->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-		}
-#else
-		data = *data_in[priv->minor];
-#endif
-		if ((data & priv->highalarm) ||
-		    (~data & priv->lowalarm)) {
-			DP(printk(KERN_DEBUG
-				"etrax_gpio_wake_up_check %i\n", priv->minor));
-			wake_up_interruptible(&priv->alarm_wq);
-			ret = 1;
-		}
-		priv = priv->next;
-	}
-	spin_unlock_irqrestore(&alarm_lock, flags);
-	return ret;
-}
-
-static irqreturn_t
-gpio_poll_timer_interrupt(int irq, void *dev_id)
-{
-	if (gpio_some_alarms)
-		return IRQ_RETVAL(etrax_gpio_wake_up_check());
-	return IRQ_NONE;
-}
-
-static irqreturn_t
-gpio_pa_interrupt(int irq, void *dev_id)
-{
-	reg_gio_rw_intr_mask intr_mask;
-	reg_gio_r_masked_intr masked_intr;
-	reg_gio_rw_ack_intr ack_intr;
-	unsigned long tmp;
-	unsigned long tmp2;
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	unsigned char enable_gpiov_ack = 0;
-#endif
-
-	/* Find what PA interrupts are active */
-	masked_intr = REG_RD(gio, regi_gio, r_masked_intr);
-	tmp = REG_TYPE_CONV(unsigned long, reg_gio_r_masked_intr, masked_intr);
-
-	/* Find those that we have enabled */
-	spin_lock(&alarm_lock);
-	tmp &= (gpio_pa_high_alarms | gpio_pa_low_alarms);
-	spin_unlock(&alarm_lock);
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Something changed on virtual GPIO. Interrupt is acked by
-	 * reading the device.
-	 */
-	if (tmp & (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN)) {
-		i2c_read(VIRT_I2C_ADDR, (void *)&cached_virtual_gpio_read,
-			sizeof(cached_virtual_gpio_read));
-		enable_gpiov_ack = 1;
-	}
-#endif
-
-	/* Ack them */
-	ack_intr = REG_TYPE_CONV(reg_gio_rw_ack_intr, unsigned long, tmp);
-	REG_WR(gio, regi_gio, rw_ack_intr, ack_intr);
-
-	/* Disable those interrupts.. */
-	intr_mask = REG_RD(gio, regi_gio, rw_intr_mask);
-	tmp2 = REG_TYPE_CONV(unsigned long, reg_gio_rw_intr_mask, intr_mask);
-	tmp2 &= ~tmp;
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Do not disable interrupt on virtual GPIO. Changes on virtual
-	 * pins are only noticed by an interrupt.
-	 */
-	if (enable_gpiov_ack)
-		tmp2 |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-	intr_mask = REG_TYPE_CONV(reg_gio_rw_intr_mask, unsigned long, tmp2);
-	REG_WR(gio, regi_gio, rw_intr_mask, intr_mask);
-
-	if (gpio_some_alarms)
-		return IRQ_RETVAL(etrax_gpio_wake_up_check());
-	return IRQ_NONE;
-}
-
-
-static ssize_t gpio_write(struct file *file, const char *buf, size_t count,
-	loff_t *off)
-{
-	struct gpio_private *priv = file->private_data;
-	unsigned char data, clk_mask, data_mask, write_msb;
-	unsigned long flags;
-	unsigned long shadow;
-	volatile unsigned long *port;
-	ssize_t retval = count;
-	/* Only bits 0-7 may be used for write operations but allow all
-	   devices except leds... */
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	if (priv->minor == GPIO_MINOR_V)
-		return -EFAULT;
-#endif
-	if (priv->minor == GPIO_MINOR_LEDS)
-		return -EFAULT;
-
-	if (!access_ok(VERIFY_READ, buf, count))
-		return -EFAULT;
-	clk_mask = priv->clk_mask;
-	data_mask = priv->data_mask;
-	/* It must have been configured using the IO_CFG_WRITE_MODE */
-	/* Perhaps a better error code? */
-	if (clk_mask == 0 || data_mask == 0)
-		return -EPERM;
-	write_msb = priv->write_msb;
-	D(printk(KERN_DEBUG "gpio_write: %lu to data 0x%02X clk 0x%02X "
-		"msb: %i\n", count, data_mask, clk_mask, write_msb));
-	port = data_out[priv->minor];
-
-	while (count--) {
-		int i;
-		data = *buf++;
-		if (priv->write_msb) {
-			for (i = 7; i >= 0; i--) {
-				local_irq_save(flags);
-				shadow = *port;
-				*port = shadow &= ~clk_mask;
-				if (data & 1<<i)
-					*port = shadow |= data_mask;
-				else
-					*port = shadow &= ~data_mask;
-			/* For FPGA: min 5.0ns (DCC) before CCLK high */
-				*port = shadow |= clk_mask;
-				local_irq_restore(flags);
-			}
-		} else {
-			for (i = 0; i <= 7; i++) {
-				local_irq_save(flags);
-				shadow = *port;
-				*port = shadow &= ~clk_mask;
-				if (data & 1<<i)
-					*port = shadow |= data_mask;
-				else
-					*port = shadow &= ~data_mask;
-			/* For FPGA: min 5.0ns (DCC) before CCLK high */
-				*port = shadow |= clk_mask;
-				local_irq_restore(flags);
-			}
-		}
-	}
-	return retval;
-}
-
-
-
-static int
-gpio_open(struct inode *inode, struct file *filp)
-{
-	struct gpio_private *priv;
-	int p = iminor(inode);
-
-	if (p > GPIO_MINOR_LAST)
-		return -EINVAL;
-
-	priv = kzalloc(sizeof(struct gpio_private), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	mutex_lock(&gpio_mutex);
-
-	priv->minor = p;
-
-	/* initialize the io/alarm struct */
-
-	priv->clk_mask = 0;
-	priv->data_mask = 0;
-	priv->highalarm = 0;
-	priv->lowalarm = 0;
-	init_waitqueue_head(&priv->alarm_wq);
-
-	filp->private_data = (void *)priv;
-
-	/* link it into our alarmlist */
-	spin_lock_irq(&alarm_lock);
-	priv->next = alarmlist;
-	alarmlist = priv;
-	spin_unlock_irq(&alarm_lock);
-
-	mutex_unlock(&gpio_mutex);
-	return 0;
-}
-
-static int
-gpio_release(struct inode *inode, struct file *filp)
-{
-	struct gpio_private *p;
-	struct gpio_private *todel;
-	/* local copies while updating them: */
-	unsigned long a_high, a_low;
-	unsigned long some_alarms;
-
-	/* unlink from alarmlist and free the private structure */
-
-	spin_lock_irq(&alarm_lock);
-	p = alarmlist;
-	todel = filp->private_data;
-
-	if (p == todel) {
-		alarmlist = todel->next;
-	} else {
-		while (p->next != todel)
-			p = p->next;
-		p->next = todel->next;
-	}
-
-	kfree(todel);
-	/* Check if there are still any alarms set */
-	p = alarmlist;
-	some_alarms = 0;
-	a_high = 0;
-	a_low = 0;
-	while (p) {
-		if (p->minor == GPIO_MINOR_A) {
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-			p->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-			a_high |= p->highalarm;
-			a_low |= p->lowalarm;
-		}
-
-		if (p->highalarm | p->lowalarm)
-			some_alarms = 1;
-		p = p->next;
-	}
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	/* Variables 'some_alarms' and 'a_low' needs to be set here again
-	 * to ensure that interrupt for virtual GPIO is handled.
-	 */
-	some_alarms = 1;
-	a_low |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-#endif
-
-	gpio_some_alarms = some_alarms;
-	gpio_pa_high_alarms = a_high;
-	gpio_pa_low_alarms = a_low;
-	spin_unlock_irq(&alarm_lock);
-
-	return 0;
-}
-
-/* Main device API. ioctl's to read/set/clear bits, as well as to
- * set alarms to wait for using a subsequent select().
- */
-
-inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg)
-{
-	/* Set direction 0=unchanged 1=input,
-	 * return mask with 1=input
-	 */
-	unsigned long flags;
-	unsigned long dir_shadow;
-
-	local_irq_save(flags);
-	dir_shadow = *dir_oe[priv->minor];
-	dir_shadow &= ~(arg & changeable_dir[priv->minor]);
-	*dir_oe[priv->minor] = dir_shadow;
-	local_irq_restore(flags);
-
-	if (priv->minor == GPIO_MINOR_A)
-		dir_shadow ^= 0xFF;    /* Only 8 bits */
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	else if (priv->minor == GPIO_MINOR_V)
-		dir_shadow ^= 0xFFFF;  /* Only 16 bits */
-#endif
-	else
-		dir_shadow ^= 0x3FFFF; /* Only 18 bits */
-	return dir_shadow;
-
-} /* setget_input */
-
-inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg)
-{
-	unsigned long flags;
-	unsigned long dir_shadow;
-
-	local_irq_save(flags);
-	dir_shadow = *dir_oe[priv->minor];
-	dir_shadow |=  (arg & changeable_dir[priv->minor]);
-	*dir_oe[priv->minor] = dir_shadow;
-	local_irq_restore(flags);
-	return dir_shadow;
-} /* setget_output */
-
-static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg);
-
-static int
-gpio_ioctl_unlocked(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	unsigned long flags;
-	unsigned long val;
-	unsigned long shadow;
-	struct gpio_private *priv = file->private_data;
-	if (_IOC_TYPE(cmd) != ETRAXGPIO_IOCTYPE)
-		return -EINVAL;
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	if (priv->minor == GPIO_MINOR_V)
-		return virtual_gpio_ioctl(file, cmd, arg);
-#endif
-
-	switch (_IOC_NR(cmd)) {
-	case IO_READBITS: /* Use IO_READ_INBITS and IO_READ_OUTBITS instead */
-		/* Read the port. */
-		return *data_in[priv->minor];
-		break;
-	case IO_SETBITS:
-		local_irq_save(flags);
-		/* Set changeable bits with a 1 in arg. */
-		shadow = *data_out[priv->minor];
-		shadow |=  (arg & changeable_bits[priv->minor]);
-		*data_out[priv->minor] = shadow;
-		local_irq_restore(flags);
-		break;
-	case IO_CLRBITS:
-		local_irq_save(flags);
-		/* Clear changeable bits with a 1 in arg. */
-		shadow = *data_out[priv->minor];
-		shadow &=  ~(arg & changeable_bits[priv->minor]);
-		*data_out[priv->minor] = shadow;
-		local_irq_restore(flags);
-		break;
-	case IO_HIGHALARM:
-		/* Set alarm when bits with 1 in arg go high. */
-		priv->highalarm |= arg;
-		spin_lock_irqsave(&alarm_lock, flags);
-		gpio_some_alarms = 1;
-		if (priv->minor == GPIO_MINOR_A)
-			gpio_pa_high_alarms |= arg;
-		spin_unlock_irqrestore(&alarm_lock, flags);
-		break;
-	case IO_LOWALARM:
-		/* Set alarm when bits with 1 in arg go low. */
-		priv->lowalarm |= arg;
-		spin_lock_irqsave(&alarm_lock, flags);
-		gpio_some_alarms = 1;
-		if (priv->minor == GPIO_MINOR_A)
-			gpio_pa_low_alarms |= arg;
-		spin_unlock_irqrestore(&alarm_lock, flags);
-		break;
-	case IO_CLRALARM:
-		/* Clear alarm for bits with 1 in arg. */
-		priv->highalarm &= ~arg;
-		priv->lowalarm  &= ~arg;
-		spin_lock_irqsave(&alarm_lock, flags);
-		if (priv->minor == GPIO_MINOR_A) {
-			if (gpio_pa_high_alarms & arg ||
-			    gpio_pa_low_alarms & arg)
-				/* Must update the gpio_pa_*alarms masks */
-				;
-		}
-		spin_unlock_irqrestore(&alarm_lock, flags);
-		break;
-	case IO_READDIR: /* Use IO_SETGET_INPUT/OUTPUT instead! */
-		/* Read direction 0=input 1=output */
-		return *dir_oe[priv->minor];
-	case IO_SETINPUT: /* Use IO_SETGET_INPUT instead! */
-		/* Set direction 0=unchanged 1=input,
-		 * return mask with 1=input
-		 */
-		return setget_input(priv, arg);
-		break;
-	case IO_SETOUTPUT: /* Use IO_SETGET_OUTPUT instead! */
-		/* Set direction 0=unchanged 1=output,
-		 * return mask with 1=output
-		 */
-		return setget_output(priv, arg);
-
-	case IO_CFG_WRITE_MODE:
-	{
-		unsigned long dir_shadow;
-		dir_shadow = *dir_oe[priv->minor];
-
-		priv->clk_mask = arg & 0xFF;
-		priv->data_mask = (arg >> 8) & 0xFF;
-		priv->write_msb = (arg >> 16) & 0x01;
-		/* Check if we're allowed to change the bits and
-		 * the direction is correct
-		 */
-		if (!((priv->clk_mask & changeable_bits[priv->minor]) &&
-		      (priv->data_mask & changeable_bits[priv->minor]) &&
-		      (priv->clk_mask & dir_shadow) &&
-		      (priv->data_mask & dir_shadow))) {
-			priv->clk_mask = 0;
-			priv->data_mask = 0;
-			return -EPERM;
-		}
-		break;
-	}
-	case IO_READ_INBITS:
-		/* *arg is result of reading the input pins */
-		val = *data_in[priv->minor];
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		return 0;
-		break;
-	case IO_READ_OUTBITS:
-		 /* *arg is result of reading the output shadow */
-		val = *data_out[priv->minor];
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_INPUT:
-		/* bits set in *arg is set to input,
-		 * *arg updated with current input pins.
-		 */
-		if (copy_from_user(&val, (unsigned long *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_input(priv, val);
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_OUTPUT:
-		/* bits set in *arg is set to output,
-		 * *arg updated with current output pins.
-		 */
-		if (copy_from_user(&val, (unsigned long *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_output(priv, val);
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	default:
-		if (priv->minor == GPIO_MINOR_LEDS)
-			return gpio_leds_ioctl(cmd, arg);
-		else
-			return -EINVAL;
-	} /* switch */
-
-	return 0;
-}
-
-static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-       long ret;
-
-       mutex_lock(&gpio_mutex);
-       ret = gpio_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&gpio_mutex);
-
-       return ret;
-}
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static int
-virtual_gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	unsigned long flags;
-	unsigned short val;
-	unsigned short shadow;
-	struct gpio_private *priv = file->private_data;
-
-	switch (_IOC_NR(cmd)) {
-	case IO_SETBITS:
-		local_irq_save(flags);
-		/* Set changeable bits with a 1 in arg. */
-		i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		shadow |= ~*dir_oe[priv->minor];
-		shadow |= (arg & changeable_bits[priv->minor]);
-		i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		local_irq_restore(flags);
-		break;
-	case IO_CLRBITS:
-		local_irq_save(flags);
-		/* Clear changeable bits with a 1 in arg. */
-		i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		shadow |= ~*dir_oe[priv->minor];
-		shadow &= ~(arg & changeable_bits[priv->minor]);
-		i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-		local_irq_restore(flags);
-		break;
-	case IO_HIGHALARM:
-		/* Set alarm when bits with 1 in arg go high. */
-		priv->highalarm |= arg;
-		spin_lock(&alarm_lock);
-		gpio_some_alarms = 1;
-		spin_unlock(&alarm_lock);
-		break;
-	case IO_LOWALARM:
-		/* Set alarm when bits with 1 in arg go low. */
-		priv->lowalarm |= arg;
-		spin_lock(&alarm_lock);
-		gpio_some_alarms = 1;
-		spin_unlock(&alarm_lock);
-		break;
-	case IO_CLRALARM:
-		/* Clear alarm for bits with 1 in arg. */
-		priv->highalarm &= ~arg;
-		priv->lowalarm  &= ~arg;
-		spin_lock(&alarm_lock);
-		spin_unlock(&alarm_lock);
-		break;
-	case IO_CFG_WRITE_MODE:
-	{
-		unsigned long dir_shadow;
-		dir_shadow = *dir_oe[priv->minor];
-
-		priv->clk_mask = arg & 0xFF;
-		priv->data_mask = (arg >> 8) & 0xFF;
-		priv->write_msb = (arg >> 16) & 0x01;
-		/* Check if we're allowed to change the bits and
-		 * the direction is correct
-		 */
-		if (!((priv->clk_mask & changeable_bits[priv->minor]) &&
-		      (priv->data_mask & changeable_bits[priv->minor]) &&
-		      (priv->clk_mask & dir_shadow) &&
-		      (priv->data_mask & dir_shadow))) {
-			priv->clk_mask = 0;
-			priv->data_mask = 0;
-			return -EPERM;
-		}
-		break;
-	}
-	case IO_READ_INBITS:
-		/* *arg is result of reading the input pins */
-		val = cached_virtual_gpio_read;
-		val &= ~*dir_oe[priv->minor];
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		return 0;
-		break;
-	case IO_READ_OUTBITS:
-		 /* *arg is result of reading the output shadow */
-		i2c_read(VIRT_I2C_ADDR, (void *)&val, sizeof(val));
-		val &= *dir_oe[priv->minor];
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	case IO_SETGET_INPUT:
-	{
-		/* bits set in *arg is set to input,
-		 * *arg updated with current input pins.
-		 */
-		unsigned short input_mask = ~*dir_oe[priv->minor];
-		if (copy_from_user(&val, (unsigned long *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_input(priv, val);
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		if ((input_mask & val) != input_mask) {
-			/* Input pins changed. All ports desired as input
-			 * should be set to logic 1.
-			 */
-			unsigned short change = input_mask ^ val;
-			i2c_read(VIRT_I2C_ADDR, (void *)&shadow,
-				sizeof(shadow));
-			shadow &= ~change;
-			shadow |= val;
-			i2c_write(VIRT_I2C_ADDR, (void *)&shadow,
-				sizeof(shadow));
-		}
-		break;
-	}
-	case IO_SETGET_OUTPUT:
-		/* bits set in *arg is set to output,
-		 * *arg updated with current output pins.
-		 */
-		if (copy_from_user(&val, (unsigned long *)arg, sizeof(val)))
-			return -EFAULT;
-		val = setget_output(priv, val);
-		if (copy_to_user((unsigned long *)arg, &val, sizeof(val)))
-			return -EFAULT;
-		break;
-	default:
-		return -EINVAL;
-	} /* switch */
-  return 0;
-}
-#endif /* CONFIG_ETRAX_VIRTUAL_GPIO */
-
-static int
-gpio_leds_ioctl(unsigned int cmd, unsigned long arg)
-{
-	unsigned char green;
-	unsigned char red;
-
-	switch (_IOC_NR(cmd)) {
-	case IO_LEDACTIVE_SET:
-		green = ((unsigned char) arg) & 1;
-		red   = (((unsigned char) arg) >> 1) & 1;
-		CRIS_LED_ACTIVE_SET_G(green);
-		CRIS_LED_ACTIVE_SET_R(red);
-		break;
-
-	default:
-		return -EINVAL;
-	} /* switch */
-
-	return 0;
-}
-
-static const struct file_operations gpio_fops = {
-	.owner		= THIS_MODULE,
-	.poll		= gpio_poll,
-	.unlocked_ioctl	= gpio_ioctl,
-	.write		= gpio_write,
-	.open		= gpio_open,
-	.release	= gpio_release,
-	.llseek		= noop_llseek,
-};
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-static void
-virtual_gpio_init(void)
-{
-	reg_gio_rw_intr_cfg intr_cfg;
-	reg_gio_rw_intr_mask intr_mask;
-	unsigned short shadow;
-
-	shadow = ~virtual_rw_pv_oe; /* Input ports should be set to logic 1 */
-	shadow |= CONFIG_ETRAX_DEF_GIO_PV_OUT;
-	i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow));
-
-	/* Set interrupt mask and on what state the interrupt shall trigger.
-	 * For virtual gpio the interrupt shall trigger on logic '0'.
-	 */
-	intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg);
-	intr_mask = REG_RD(gio, regi_gio, rw_intr_mask);
-
-	switch (CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN) {
-	case 0:
-		intr_cfg.pa0 = regk_gio_lo;
-		intr_mask.pa0 = regk_gio_yes;
-		break;
-	case 1:
-		intr_cfg.pa1 = regk_gio_lo;
-		intr_mask.pa1 = regk_gio_yes;
-		break;
-	case 2:
-		intr_cfg.pa2 = regk_gio_lo;
-		intr_mask.pa2 = regk_gio_yes;
-		break;
-	case 3:
-		intr_cfg.pa3 = regk_gio_lo;
-		intr_mask.pa3 = regk_gio_yes;
-		break;
-	case 4:
-		intr_cfg.pa4 = regk_gio_lo;
-		intr_mask.pa4 = regk_gio_yes;
-		break;
-	case 5:
-		intr_cfg.pa5 = regk_gio_lo;
-		intr_mask.pa5 = regk_gio_yes;
-		break;
-	case 6:
-		intr_cfg.pa6 = regk_gio_lo;
-		intr_mask.pa6 = regk_gio_yes;
-		break;
-	case 7:
-		intr_cfg.pa7 = regk_gio_lo;
-		intr_mask.pa7 = regk_gio_yes;
-	break;
-	}
-
-	REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg);
-	REG_WR(gio, regi_gio, rw_intr_mask, intr_mask);
-
-	gpio_pa_low_alarms |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN);
-	gpio_some_alarms = 1;
-}
-#endif
-
-/* main driver initialization routine, called from mem.c */
-
-static __init int
-gpio_init(void)
-{
-	int res;
-
-	/* do the formalities */
-
-	res = register_chrdev(GPIO_MAJOR, gpio_name, &gpio_fops);
-	if (res < 0) {
-		printk(KERN_ERR "gpio: couldn't get a major number.\n");
-		return res;
-	}
-
-	/* Clear all leds */
-	CRIS_LED_NETWORK_GRP0_SET(0);
-	CRIS_LED_NETWORK_GRP1_SET(0);
-	CRIS_LED_ACTIVE_SET(0);
-	CRIS_LED_DISK_READ(0);
-	CRIS_LED_DISK_WRITE(0);
-
-	printk(KERN_INFO "ETRAX FS GPIO driver v2.5, (c) 2003-2007 "
-		"Axis Communications AB\n");
-	/* We call etrax_gpio_wake_up_check() from timer interrupt */
-	if (request_irq(TIMER0_INTR_VECT, gpio_poll_timer_interrupt,
-			IRQF_SHARED, "gpio poll", &alarmlist))
-		printk(KERN_ERR "timer0 irq for gpio\n");
-
-	if (request_irq(GIO_INTR_VECT, gpio_pa_interrupt,
-			IRQF_SHARED, "gpio PA", &alarmlist))
-		printk(KERN_ERR "PA irq for gpio\n");
-
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	virtual_gpio_init();
-#endif
-
-	return res;
-}
-
-/* this makes sure that gpio_init is called during kernel boot */
-
-module_init(gpio_init);
diff --git a/arch/cris/arch-v32/kernel/crisksyms.c b/arch/cris/arch-v32/kernel/crisksyms.c
index bde8d1a10cad..b0566350a840 100644
--- a/arch/cris/arch-v32/kernel/crisksyms.c
+++ b/arch/cris/arch-v32/kernel/crisksyms.c
@@ -3,7 +3,6 @@
 #include <arch/dma.h>
 #include <arch/intmem.h>
 #include <mach/pinmux.h>
-#include <arch/io.h>
 
 /* Functions for allocating DMA channels */
 EXPORT_SYMBOL(crisv32_request_dma);
@@ -20,8 +19,6 @@ EXPORT_SYMBOL(crisv32_pinmux_alloc);
 EXPORT_SYMBOL(crisv32_pinmux_alloc_fixed);
 EXPORT_SYMBOL(crisv32_pinmux_dealloc);
 EXPORT_SYMBOL(crisv32_pinmux_dealloc_fixed);
-EXPORT_SYMBOL(crisv32_io_get_name);
-EXPORT_SYMBOL(crisv32_io_get);
 
 /* Functions masking/unmasking interrupts */
 EXPORT_SYMBOL(crisv32_mask_irq);
diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
index 02e33ebe51ec..d2f3f9c37102 100644
--- a/arch/cris/arch-v32/kernel/debugport.c
+++ b/arch/cris/arch-v32/kernel/debugport.c
@@ -77,8 +77,6 @@ static struct dbg_port *port =
 	&ports[2];
 #elif defined(CONFIG_ETRAX_DEBUG_PORT3)
 	&ports[3];
-#elif defined(CONFIG_ETRAX_DEBUG_PORT4)
-	&ports[4];
 #else
 	NULL;
 #endif
diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
index 74a66e0e3777..ea6366800df7 100644
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -292,11 +292,7 @@ _no_romfs_in_flash:
 	;; For cramfs, partition starts with magic and length.
 	;; For jffs2, a jhead is prepended which contains with magic and length.
 	;; The jhead is not part of the jffs2 partition however.
-#ifndef CONFIG_ETRAXFS_SIM
 	move.d	__bss_start, $r0
-#else
-	move.d	__end, $r0
-#endif
 	move.d	[$r0], $r1
 	cmp.d	CRAMFS_MAGIC, $r1 ; cramfs magic?
 	beq	2f		  ; yes, jump
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 6a881e0e92b4..6de8db67cb09 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -37,7 +37,7 @@
 #define IGNOREMASK (1 << (SER0_INTR_VECT - FIRST_IRQ))
 #elif defined(CONFIG_ETRAX_KGDB_PORT1)
 #define IGNOREMASK (1 << (SER1_INTR_VECT - FIRST_IRQ))
-#elif defined(CONFIG_ETRAX_KGB_PORT2)
+#elif defined(CONFIG_ETRAX_KGDB_PORT2)
 #define IGNOREMASK (1 << (SER2_INTR_VECT - FIRST_IRQ))
 #elif defined(CONFIG_ETRAX_KGDB_PORT3)
 #define IGNOREMASK (1 << (SER3_INTR_VECT - FIRST_IRQ))
@@ -464,14 +464,14 @@ init_IRQ(void)
 		etrax_irv->v[i] = weird_irq;
 
 	np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc");
-	domain = irq_domain_add_legacy(np, NR_IRQS - FIRST_IRQ,
+	domain = irq_domain_add_legacy(np, NBR_INTR_VECT - FIRST_IRQ,
 				       FIRST_IRQ, FIRST_IRQ,
 				       &crisv32_irq_ops, NULL);
 	BUG_ON(!domain);
 	irq_set_default_host(domain);
 	of_node_put(np);
 
-	for (i = FIRST_IRQ, j = 0; j < NR_IRQS; i++, j++) {
+	for (i = FIRST_IRQ, j = 0; j < NBR_INTR_VECT; i++, j++) {
 		set_exception_vector(i, interrupt[j]);
 	}
 
diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c
index b06813aeb120..e0fdea706eca 100644
--- a/arch/cris/arch-v32/kernel/kgdb.c
+++ b/arch/cris/arch-v32/kernel/kgdb.c
@@ -384,19 +384,11 @@ int getDebugChar(void);
 /* Serial port, writes one character. ETRAX 100 specific. from debugport.c */
 void putDebugChar(int val);
 
-/* Returns the integer equivalent of a hexadecimal character. */
-static int hex(char ch);
-
 /* Convert the memory, pointed to by mem into hexadecimal representation.
    Put the result in buf, and return a pointer to the last character
    in buf (null). */
 static char *mem2hex(char *buf, unsigned char *mem, int count);
 
-/* Convert the array, in hexadecimal representation, pointed to by buf into
-   binary representation. Put the result in mem, and return a pointer to
-   the character after the last byte written. */
-static unsigned char *hex2mem(unsigned char *mem, char *buf, int count);
-
 /* Put the content of the array, in binary representation, pointed to by buf
    into memory pointed to by mem, and return a pointer to
    the character after the last byte written. */
@@ -449,7 +441,7 @@ static char output_buffer[BUFMAX];
 /* Error and warning messages. */
 enum error_type
 {
-	SUCCESS, E01, E02, E03, E04, E05, E06,
+	SUCCESS, E01, E02, E03, E04, E05, E06, E07, E08
 };
 
 static char *error_message[] =
@@ -461,6 +453,8 @@ static char *error_message[] =
 	"E04 The command is not supported - [s,C,S,!,R,d,r] - internal error.",
 	"E05 Change register content - P - the register is not implemented..",
 	"E06 Change memory content - M - internal error.",
+	"E07 Change register content - P - the register is not stored on the stack",
+	"E08 Invalid parameter"
 };
 
 /********************************** Breakpoint *******************************/
@@ -539,7 +533,7 @@ gdb_cris_strtol(const char *s, char **endptr, int base)
 /********************************* Register image ****************************/
 
 /* Write a value to a specified register in the register image of the current
-   thread. Returns status code SUCCESS, E02 or E05. */
+   thread. Returns status code SUCCESS, E02, E05 or E08. */
 static int
 write_register(int regno, char *val)
 {
@@ -547,8 +541,9 @@ write_register(int regno, char *val)
 
         if (regno >= R0 && regno <= ACR) {
 		/* Consecutive 32-bit registers. */
-		hex2mem((unsigned char *)&reg.r0 + (regno - R0) * sizeof(unsigned int),
-			val, sizeof(unsigned int));
+		if (hex2bin((unsigned char *)&reg.r0 + (regno - R0) * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
 
 	} else if (regno == BZ || regno == VR || regno == WZ || regno == DZ) {
 		/* Read-only registers. */
@@ -557,16 +552,19 @@ write_register(int regno, char *val)
 	} else if (regno == PID) {
 		/* 32-bit register. (Even though we already checked SRS and WZ, we cannot
 		   combine this with the EXS - SPC write since SRS and WZ have different size.) */
-		hex2mem((unsigned char *)&reg.pid, val, sizeof(unsigned int));
+		if (hex2bin((unsigned char *)&reg.pid, val, sizeof(unsigned int)))
+			status = E08;
 
 	} else if (regno == SRS) {
 		/* 8-bit register. */
-		hex2mem((unsigned char *)&reg.srs, val, sizeof(unsigned char));
+		if (hex2bin((unsigned char *)&reg.srs, val, sizeof(unsigned char)))
+			status = E08;
 
 	} else if (regno >= EXS && regno <= SPC) {
 		/* Consecutive 32-bit registers. */
-		hex2mem((unsigned char *)&reg.exs + (regno - EXS) * sizeof(unsigned int),
-			 val, sizeof(unsigned int));
+		if (hex2bin((unsigned char *)&reg.exs + (regno - EXS) * sizeof(unsigned int),
+			    val, sizeof(unsigned int)))
+			status = E08;
 
        } else if (regno == PC) {
                /* Pseudo-register. Treat as read-only. */
@@ -574,7 +572,9 @@ write_register(int regno, char *val)
 
        } else if (regno >= S0 && regno <= S15) {
                /* 32-bit registers. */
-               hex2mem((unsigned char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int), val, sizeof(unsigned int));
+               if (hex2bin((unsigned char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int),
+			   val, sizeof(unsigned int)))
+			status = E08;
 	} else {
 		/* Non-existing register. */
 		status = E05;
@@ -630,19 +630,6 @@ read_register(char regno, unsigned int *valptr)
 }
 
 /********************************** Packet I/O ******************************/
-/* Returns the integer equivalent of a hexadecimal character. */
-static int
-hex(char ch)
-{
-	if ((ch >= 'a') && (ch <= 'f'))
-		return (ch - 'a' + 10);
-	if ((ch >= '0') && (ch <= '9'))
-		return (ch - '0');
-	if ((ch >= 'A') && (ch <= 'F'))
-		return (ch - 'A' + 10);
-	return -1;
-}
-
 /* Convert the memory, pointed to by mem into hexadecimal representation.
    Put the result in buf, and return a pointer to the last character
    in buf (null). */
@@ -689,22 +676,6 @@ mem2hex_nbo(char *buf, unsigned char *mem, int count)
 	return buf;
 }
 
-/* Convert the array, in hexadecimal representation, pointed to by buf into
-   binary representation. Put the result in mem, and return a pointer to
-   the character after the last byte written. */
-static unsigned char*
-hex2mem(unsigned char *mem, char *buf, int count)
-{
-	int i;
-	unsigned char ch;
-	for (i = 0; i < count; i++) {
-		ch = hex (*buf++) << 4;
-		ch = ch + hex (*buf++);
-		*mem++ = ch;
-	}
-	return mem;
-}
-
 /* Put the content of the array, in binary representation, pointed to by buf
    into memory pointed to by mem, and return a pointer to the character after
    the last byte written.
@@ -763,8 +734,8 @@ getpacket(char *buffer)
 		buffer[count] = 0;
 
 		if (ch == '#') {
-			xmitcsum = hex(getDebugChar()) << 4;
-			xmitcsum += hex(getDebugChar());
+			xmitcsum = hex_to_bin(getDebugChar()) << 4;
+			xmitcsum += hex_to_bin(getDebugChar());
 			if (checksum != xmitcsum) {
 				/* Wrong checksum */
 				putDebugChar('-');
@@ -1304,14 +1275,17 @@ handle_exception(int sigval)
 				/* Write registers. GXX..XX
 				   Each byte of register data  is described by two hex digits.
 				   Success: OK
-				   Failure: void. */
+				   Failure: E08. */
 				/* General and special registers. */
-				hex2mem((char *)&reg, &input_buffer[1], sizeof(registers));
+				if (hex2bin((char *)&reg, &input_buffer[1], sizeof(registers)))
+					gdb_cris_strcpy(output_buffer, error_message[E08]);
 				/* Support registers. */
-				hex2mem((char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)),
+				else if (hex2bin((char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)),
 					&input_buffer[1] + sizeof(registers),
-					16 * sizeof(unsigned int));
-				gdb_cris_strcpy(output_buffer, "OK");
+					16 * sizeof(unsigned int)))
+					gdb_cris_strcpy(output_buffer, error_message[E08]);
+				else
+					gdb_cris_strcpy(output_buffer, "OK");
 				break;
 
 			case 'P':
@@ -1338,6 +1312,10 @@ handle_exception(int sigval)
 							/* Do not support non-existing registers. */
 							gdb_cris_strcpy(output_buffer, error_message[E05]);
 							break;
+						case E08:
+							/* Invalid parameter. */
+							gdb_cris_strcpy(output_buffer, error_message[E08]);
+							break;
 						default:
 							/* Valid register number. */
 							gdb_cris_strcpy(output_buffer, "OK");
@@ -1380,7 +1358,7 @@ handle_exception(int sigval)
 				   AA..AA is the start address,  LLLL is the number of bytes, and
 				   XX..XX is the hexadecimal data.
 				   Success: OK
-				   Failure: void. */
+				   Failure: E08. */
 				{
 					char *lenptr;
 					char *dataptr;
@@ -1389,13 +1367,15 @@ handle_exception(int sigval)
 					int len = gdb_cris_strtol(lenptr+1, &dataptr, 16);
 					if (*lenptr == ',' && *dataptr == ':') {
 						if (input_buffer[0] == 'M') {
-							hex2mem(addr, dataptr + 1, len);
+							if (hex2bin(addr, dataptr + 1, len))
+								gdb_cris_strcpy(output_buffer, error_message[E08]);
+							else
+								gdb_cris_strcpy(output_buffer, "OK");
 						} else /* X */ {
 							bin2mem(addr, dataptr + 1, len);
+							gdb_cris_strcpy(output_buffer, "OK");
 						}
-						gdb_cris_strcpy(output_buffer, "OK");
-					}
-					else {
+					} else {
 						gdb_cris_strcpy(output_buffer, error_message[E06]);
 					}
 				}
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c
index cd1865d68b2e..fe50287aa928 100644
--- a/arch/cris/arch-v32/kernel/setup.c
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -129,10 +129,6 @@ static struct i2c_board_info __initdata i2c_info[] = {
 #ifdef CONFIG_RTC_DRV_PCF8563
 	{I2C_BOARD_INFO("pcf8563", 0x51)},
 #endif
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	{I2C_BOARD_INFO("vgpio", 0x20)},
-	{I2C_BOARD_INFO("vgpio", 0x21)},
-#endif
 	{I2C_BOARD_INFO("pca9536", 0x41)},
 	{I2C_BOARD_INFO("fnp300", 0x40)},
 	{I2C_BOARD_INFO("fnp300", 0x42)},
@@ -146,10 +142,6 @@ static struct i2c_board_info __initdata i2c_info2[] = {
 	{I2C_BOARD_INFO("tmp100", 0x4C)},
 	{I2C_BOARD_INFO("tmp100", 0x4D)},
 	{I2C_BOARD_INFO("tmp100", 0x4E)},
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-	{I2C_BOARD_INFO("vgpio", 0x20)},
-	{I2C_BOARD_INFO("vgpio", 0x21)},
-#endif
 	{I2C_BOARD_INFO("pca9536", 0x41)},
 	{I2C_BOARD_INFO("fnp300", 0x40)},
 	{I2C_BOARD_INFO("fnp300", 0x42)},
diff --git a/arch/cris/arch-v32/mach-a3/Makefile b/arch/cris/arch-v32/mach-a3/Makefile
index 18a227196a41..0cc6eebacbed 100644
--- a/arch/cris/arch-v32/mach-a3/Makefile
+++ b/arch/cris/arch-v32/mach-a3/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y   := dma.o pinmux.o io.o arbiter.o
+obj-y   := dma.o pinmux.o arbiter.o
 
 clean:
 
diff --git a/arch/cris/arch-v32/mach-a3/io.c b/arch/cris/arch-v32/mach-a3/io.c
deleted file mode 100644
index 090ceb99ef0b..000000000000
--- a/arch/cris/arch-v32/mach-a3/io.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Helper functions for I/O pins.
- *
- * Copyright (c) 2005-2007 Axis Communications AB.
- */
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <mach/pinmux.h>
-#include <hwregs/gio_defs.h>
-
-struct crisv32_ioport crisv32_ioports[] = {
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pa_din),
-		32
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pb_din),
-		32
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pc_din),
-		16
-	},
-};
-
-#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
-
-struct crisv32_iopin crisv32_led_net0_green;
-struct crisv32_iopin crisv32_led_net0_red;
-struct crisv32_iopin crisv32_led2_green;
-struct crisv32_iopin crisv32_led2_red;
-struct crisv32_iopin crisv32_led3_green;
-struct crisv32_iopin crisv32_led3_red;
-
-/* Dummy port used when green LED and red LED is on the same bit */
-static unsigned long io_dummy;
-static struct crisv32_ioport dummy_port = {
-	&io_dummy,
-	&io_dummy,
-	&io_dummy,
-	32
-};
-static struct crisv32_iopin dummy_led = {
-	&dummy_port,
-	0
-};
-
-static int __init crisv32_io_init(void)
-{
-	int ret = 0;
-
-	u32 i;
-
-	/* Locks *should* be dynamically initialized. */
-	for (i = 0; i < ARRAY_SIZE(crisv32_ioports); i++)
-		spin_lock_init(&crisv32_ioports[i].lock);
-	spin_lock_init(&dummy_port.lock);
-
-	/* Initialize LEDs */
-#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO))
-	ret += crisv32_io_get_name(&crisv32_led_net0_green,
-		CONFIG_ETRAX_LED_G_NET0);
-	crisv32_io_set_dir(&crisv32_led_net0_green, crisv32_io_dir_out);
-	if (strcmp(CONFIG_ETRAX_LED_G_NET0, CONFIG_ETRAX_LED_R_NET0)) {
-		ret += crisv32_io_get_name(&crisv32_led_net0_red,
-			CONFIG_ETRAX_LED_R_NET0);
-		crisv32_io_set_dir(&crisv32_led_net0_red, crisv32_io_dir_out);
-	} else
-		crisv32_led_net0_red = dummy_led;
-#endif
-
-	ret += crisv32_io_get_name(&crisv32_led2_green, CONFIG_ETRAX_V32_LED2G);
-	ret += crisv32_io_get_name(&crisv32_led2_red, CONFIG_ETRAX_V32_LED2R);
-	ret += crisv32_io_get_name(&crisv32_led3_green, CONFIG_ETRAX_V32_LED3G);
-	ret += crisv32_io_get_name(&crisv32_led3_red, CONFIG_ETRAX_V32_LED3R);
-
-	crisv32_io_set_dir(&crisv32_led2_green, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led2_red, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led3_green, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led3_red, crisv32_io_dir_out);
-
-	return ret;
-}
-
-__initcall(crisv32_io_init);
-
-int crisv32_io_get(struct crisv32_iopin *iopin,
-	unsigned int port, unsigned int pin)
-{
-	if (port > NBR_OF_PORTS)
-		return -EINVAL;
-	if (port > crisv32_ioports[port].pin_count)
-		return -EINVAL;
-
-	iopin->bit = 1 << pin;
-	iopin->port = &crisv32_ioports[port];
-
-	if (crisv32_pinmux_alloc(port, pin, pin, pinmux_gpio))
-		return -EIO;
-
-	return 0;
-}
-
-int crisv32_io_get_name(struct crisv32_iopin *iopin, const char *name)
-{
-	int port;
-	int pin;
-
-	if (toupper(*name) == 'P')
-		name++;
-
-	if (toupper(*name) < 'A' || toupper(*name) > 'E')
-		return -EINVAL;
-
-	port = toupper(*name) - 'A';
-	name++;
-	pin = simple_strtoul(name, NULL, 10);
-
-	if (pin < 0 || pin > crisv32_ioports[port].pin_count)
-		return -EINVAL;
-
-	iopin->bit = 1 << pin;
-	iopin->port = &crisv32_ioports[port];
-
-	if (crisv32_pinmux_alloc(port, pin, pin, pinmux_gpio))
-		return -EIO;
-
-	return 0;
-}
-
-#ifdef CONFIG_PCI
-/* PCI I/O access stuff */
-struct cris_io_operations *cris_iops = NULL;
-EXPORT_SYMBOL(cris_iops);
-#endif
-
diff --git a/arch/cris/arch-v32/mach-fs/Kconfig b/arch/cris/arch-v32/mach-fs/Kconfig
index 774de82abef6..7d1ab972bc0f 100644
--- a/arch/cris/arch-v32/mach-fs/Kconfig
+++ b/arch/cris/arch-v32/mach-fs/Kconfig
@@ -192,25 +192,6 @@ config ETRAX_DEF_GIO_PE_OUT
 	  Configures the initial data for the general port E bits.  Most
 	  products should use 00000 here.
 
-config ETRAX_DEF_GIO_PV_OE
-	hex "GIO_PV_OE"
-	depends on ETRAX_VIRTUAL_GPIO
-	default "0000"
-	help
-	  Configures the direction of virtual general port V bits. 1 is out,
-	  0 is in. This is often totally different depending on the product
-	  used. These bits are used for all kinds of stuff. If you don't know
-	  what to use, it is always safe to put all as inputs, although
-	  floating inputs isn't good.
-
-config ETRAX_DEF_GIO_PV_OUT
-	hex "GIO_PV_OUT"
-	depends on ETRAX_VIRTUAL_GPIO
-	default "0000"
-	help
-	  Configures the initial data for the virtual general port V bits.
-	  Most products should use 0000 here.
-
 endmenu
 
 endif
diff --git a/arch/cris/arch-v32/mach-fs/Makefile b/arch/cris/arch-v32/mach-fs/Makefile
index 18a227196a41..0cc6eebacbed 100644
--- a/arch/cris/arch-v32/mach-fs/Makefile
+++ b/arch/cris/arch-v32/mach-fs/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y   := dma.o pinmux.o io.o arbiter.o
+obj-y   := dma.o pinmux.o arbiter.o
 
 clean:
 
diff --git a/arch/cris/arch-v32/mach-fs/io.c b/arch/cris/arch-v32/mach-fs/io.c
deleted file mode 100644
index a6958661fa8e..000000000000
--- a/arch/cris/arch-v32/mach-fs/io.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Helper functions for I/O pins.
- *
- * Copyright (c) 2004-2007 Axis Communications AB.
- */
-
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <mach/pinmux.h>
-#include <hwregs/gio_defs.h>
-
-#ifndef DEBUG
-#define DEBUG(x)
-#endif
-
-struct crisv32_ioport crisv32_ioports[] = {
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pa_din),
-		8
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pb_din),
-		18
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pc_din),
-		18
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pd_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pd_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pd_din),
-		18
-	},
-	{
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pe_oe),
-		(unsigned long *)REG_ADDR(gio, regi_gio, rw_pe_dout),
-		(unsigned long *)REG_ADDR(gio, regi_gio, r_pe_din),
-		18
-	}
-};
-
-#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
-
-struct crisv32_iopin crisv32_led_net0_green;
-struct crisv32_iopin crisv32_led_net0_red;
-struct crisv32_iopin crisv32_led_net1_green;
-struct crisv32_iopin crisv32_led_net1_red;
-struct crisv32_iopin crisv32_led2_green;
-struct crisv32_iopin crisv32_led2_red;
-struct crisv32_iopin crisv32_led3_green;
-struct crisv32_iopin crisv32_led3_red;
-
-/* Dummy port used when green LED and red LED is on the same bit */
-static unsigned long io_dummy;
-static struct crisv32_ioport dummy_port = {
-	&io_dummy,
-	&io_dummy,
-	&io_dummy,
-	18
-};
-static struct crisv32_iopin dummy_led = {
-	&dummy_port,
-	0
-};
-
-static int __init crisv32_io_init(void)
-{
-	int ret = 0;
-
-	u32 i;
-
-	/* Locks *should* be dynamically initialized. */
-	for (i = 0; i < ARRAY_SIZE(crisv32_ioports); i++)
-		spin_lock_init(&crisv32_ioports[i].lock);
-	spin_lock_init(&dummy_port.lock);
-
-	/* Initialize LEDs */
-#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO))
-	ret +=
-	    crisv32_io_get_name(&crisv32_led_net0_green,
-				CONFIG_ETRAX_LED_G_NET0);
-	crisv32_io_set_dir(&crisv32_led_net0_green, crisv32_io_dir_out);
-	if (strcmp(CONFIG_ETRAX_LED_G_NET0, CONFIG_ETRAX_LED_R_NET0)) {
-		ret +=
-		    crisv32_io_get_name(&crisv32_led_net0_red,
-					CONFIG_ETRAX_LED_R_NET0);
-		crisv32_io_set_dir(&crisv32_led_net0_red, crisv32_io_dir_out);
-	} else
-		crisv32_led_net0_red = dummy_led;
-#endif
-
-#ifdef CONFIG_ETRAX_NBR_LED_GRP_TWO
-	ret +=
-	    crisv32_io_get_name(&crisv32_led_net1_green,
-				CONFIG_ETRAX_LED_G_NET1);
-	crisv32_io_set_dir(&crisv32_led_net1_green, crisv32_io_dir_out);
-	if (strcmp(CONFIG_ETRAX_LED_G_NET1, CONFIG_ETRAX_LED_R_NET1)) {
-		crisv32_io_get_name(&crisv32_led_net1_red,
-				    CONFIG_ETRAX_LED_R_NET1);
-		crisv32_io_set_dir(&crisv32_led_net1_red, crisv32_io_dir_out);
-	} else
-		crisv32_led_net1_red = dummy_led;
-#endif
-
-	ret += crisv32_io_get_name(&crisv32_led2_green, CONFIG_ETRAX_V32_LED2G);
-	ret += crisv32_io_get_name(&crisv32_led2_red, CONFIG_ETRAX_V32_LED2R);
-	ret += crisv32_io_get_name(&crisv32_led3_green, CONFIG_ETRAX_V32_LED3G);
-	ret += crisv32_io_get_name(&crisv32_led3_red, CONFIG_ETRAX_V32_LED3R);
-
-	crisv32_io_set_dir(&crisv32_led2_green, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led2_red, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led3_green, crisv32_io_dir_out);
-	crisv32_io_set_dir(&crisv32_led3_red, crisv32_io_dir_out);
-
-	return ret;
-}
-
-__initcall(crisv32_io_init);
-
-int crisv32_io_get(struct crisv32_iopin *iopin,
-		   unsigned int port, unsigned int pin)
-{
-	if (port > NBR_OF_PORTS)
-		return -EINVAL;
-	if (port > crisv32_ioports[port].pin_count)
-		return -EINVAL;
-
-	iopin->bit = 1 << pin;
-	iopin->port = &crisv32_ioports[port];
-
-	/* Only allocate pinmux gpiopins if port != PORT_A (port 0) */
-	/* NOTE! crisv32_pinmux_alloc thinks PORT_B is port 0 */
-	if (port != 0 && crisv32_pinmux_alloc(port - 1, pin, pin, pinmux_gpio))
-		return -EIO;
-	DEBUG(printk(KERN_DEBUG "crisv32_io_get: Allocated pin %d on port %d\n",
-		pin, port));
-
-	return 0;
-}
-
-int crisv32_io_get_name(struct crisv32_iopin *iopin, const char *name)
-{
-	int port;
-	int pin;
-
-	if (toupper(*name) == 'P')
-		name++;
-
-	if (toupper(*name) < 'A' || toupper(*name) > 'E')
-		return -EINVAL;
-
-	port = toupper(*name) - 'A';
-	name++;
-	pin = simple_strtoul(name, NULL, 10);
-
-	if (pin < 0 || pin > crisv32_ioports[port].pin_count)
-		return -EINVAL;
-
-	iopin->bit = 1 << pin;
-	iopin->port = &crisv32_ioports[port];
-
-	/* Only allocate pinmux gpiopins if port != PORT_A (port 0) */
-	/* NOTE! crisv32_pinmux_alloc thinks PORT_B is port 0 */
-	if (port != 0 && crisv32_pinmux_alloc(port - 1, pin, pin, pinmux_gpio))
-		return -EIO;
-
-	DEBUG(printk(KERN_DEBUG
-		"crisv32_io_get_name: Allocated pin %d on port %d\n",
-		pin, port));
-
-	return 0;
-}
-
-#ifdef CONFIG_PCI
-/* PCI I/O access stuff */
-struct cris_io_operations *cris_iops = NULL;
-EXPORT_SYMBOL(cris_iops);
-#endif
diff --git a/arch/cris/boot/dts/artpec3.dtsi b/arch/cris/boot/dts/artpec3.dtsi
new file mode 100644
index 000000000000..be15be67b653
--- /dev/null
+++ b/arch/cris/boot/dts/artpec3.dtsi
@@ -0,0 +1,46 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "axis,crisv32";
+			reg = <0>;
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "artpec3";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		intc: interrupt-controller {
+			compatible = "axis,crisv32-intc";
+			reg = <0xb002a000 0x1000>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		gio: gpio@b0020000 {
+			compatible = "axis,artpec3-gio";
+			reg = <0xb0020000 0x1000>;
+			interrupts = <61>;
+			gpio-controller;
+			#gpio-cells = <3>;
+		};
+
+		serial@b003e000 {
+			compatible = "axis,etraxfs-uart";
+			reg = <0xb003e000 0x1000>;
+			interrupts = <64>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/cris/boot/dts/dev88.dts b/arch/cris/boot/dts/dev88.dts
index 4fa5a3f9d0ec..b9a230d10874 100644
--- a/arch/cris/boot/dts/dev88.dts
+++ b/arch/cris/boot/dts/dev88.dts
@@ -1,5 +1,7 @@
 /dts-v1/;
 
+#include <dt-bindings/gpio/gpio.h>
+
 /include/ "etraxfs.dtsi"
 
 / {
@@ -15,4 +17,51 @@
 			status = "okay";
 		};
 	};
+
+	spi {
+		compatible = "spi-gpio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		gpio-sck = <&gio 1 0 0xd>;
+		gpio-miso = <&gio 4 0 0xd>;
+		gpio-mosi = <&gio 0 0 0xd>;
+		cs-gpios = <&gio 3 0 0xd>;
+		num-chipselects = <1>;
+
+		temp-sensor@0 {
+			compatible = "ti,lm70";
+			reg = <0>;
+
+			spi-max-frequency = <100000>;
+		};
+	};
+
+	i2c {
+		compatible = "i2c-gpio";
+		gpios = <&gio 5 0 0xd>, <&gio 6 0 0xd>;
+		i2c-gpio,delay-us = <2>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		rtc@51 {
+			compatible = "nxp,pcf8563";
+			reg = <0x51>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		network {
+			label = "network";
+			gpios = <&gio 2 GPIO_ACTIVE_LOW 0xa>;
+		};
+
+		status {
+			label = "status";
+			gpios = <&gio 3 GPIO_ACTIVE_LOW 0xa>;
+			linux,default-trigger = "heartbeat";
+		};
+	};
 };
diff --git a/arch/cris/boot/dts/etraxfs.dtsi b/arch/cris/boot/dts/etraxfs.dtsi
index 909bcedc3565..bf1b8582d4d8 100644
--- a/arch/cris/boot/dts/etraxfs.dtsi
+++ b/arch/cris/boot/dts/etraxfs.dtsi
@@ -28,6 +28,14 @@
 			#interrupt-cells = <1>;
 		};
 
+		gio: gpio@b001a000 {
+			compatible = "axis,etraxfs-gio";
+			reg = <0xb001a000 0x1000>;
+			interrupts = <50>;
+			gpio-controller;
+			#gpio-cells = <3>;
+		};
+
 		serial@b00260000 {
 			compatible = "axis,etraxfs-uart";
 			reg = <0xb0026000 0x1000>;
diff --git a/arch/cris/boot/dts/include/dt-bindings b/arch/cris/boot/dts/include/dt-bindings
new file mode 120000
index 000000000000..08c00e4972fa
--- /dev/null
+++ b/arch/cris/boot/dts/include/dt-bindings
@@ -0,0 +1 @@
+../../../../../include/dt-bindings
+\ No newline at end of file
diff --git a/arch/cris/boot/dts/p1343.dts b/arch/cris/boot/dts/p1343.dts
new file mode 100644
index 000000000000..fab7bdbd0f15
--- /dev/null
+++ b/arch/cris/boot/dts/p1343.dts
@@ -0,0 +1,76 @@
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/include/ "artpec3.dtsi"
+
+/ {
+	model = "Axis P1343 Network Camera";
+	compatible = "axis,p1343";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	soc {
+		uart0: serial@b003e000 {
+			status = "okay";
+		};
+	};
+
+	i2c {
+		compatible = "i2c-gpio";
+		gpios = <&gio 3 0 0xa>, <&gio 2 0 0xa>;
+		i2c-gpio,delay-us = <2>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		rtc@51 {
+			compatible = "nxp,pcf8563";
+			reg = <0x51>;
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+
+		status_green {
+			label = "status:green";
+			gpios = <&gio 0 GPIO_ACTIVE_LOW 0xc>;
+			linux,default-trigger = "heartbeat";
+		};
+
+		status_red {
+			label = "status:red";
+			gpios = <&gio 1 GPIO_ACTIVE_LOW 0xc>;
+		};
+
+		network_green {
+			label = "network:green";
+			gpios = <&gio 2 GPIO_ACTIVE_LOW 0xc>;
+		};
+
+		network_red {
+			label = "network:red";
+			gpios = <&gio 3 GPIO_ACTIVE_LOW 0xc>;
+		};
+
+		power_red {
+			label = "power:red";
+			gpios = <&gio 4 GPIO_ACTIVE_LOW 0xc>;
+		};
+	};
+
+	gpio_keys {
+		compatible = "gpio-keys";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		activity-button@0 {
+			label = "Activity Button";
+			linux,code = <KEY_FN>;
+			gpios = <&gio 13 GPIO_ACTIVE_LOW 0xd>;
+		};
+	};
+};
diff --git a/arch/cris/boot/rescue/head_v10.S b/arch/cris/boot/rescue/head_v10.S
index af55df0994b3..1c05492f3eb2 100644
--- a/arch/cris/boot/rescue/head_v10.S
+++ b/arch/cris/boot/rescue/head_v10.S
@@ -281,9 +281,6 @@ wait_ser:
 #ifdef CONFIG_ETRAX_PB_LEDS
 	move.b	$r2, [R_PORT_PB_DATA]
 #endif
-#ifdef CONFIG_ETRAX_90000000_LEDS
-	move.b	$r2, [0x90000000]
-#endif
 #endif
 
 	;; check if we got something on the serial port
diff --git a/arch/cris/include/arch-v32/arch/io.h b/arch/cris/include/arch-v32/arch/io.h
deleted file mode 100644
index adc5484351bf..000000000000
--- a/arch/cris/include/arch-v32/arch/io.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _ASM_ARCH_CRIS_IO_H
-#define _ASM_ARCH_CRIS_IO_H
-
-#include <linux/spinlock.h>
-#include <hwregs/reg_map.h>
-#include <hwregs/reg_rdwr.h>
-#include <hwregs/gio_defs.h>
-
-enum crisv32_io_dir
-{
-  crisv32_io_dir_in = 0,
-  crisv32_io_dir_out = 1
-};
-
-struct crisv32_ioport
-{
-  volatile unsigned long *oe;
-  volatile unsigned long *data;
-  volatile unsigned long *data_in;
-  unsigned int pin_count;
-  spinlock_t lock;
-};
-
-struct crisv32_iopin
-{
-  struct crisv32_ioport* port;
-  int bit;
-};
-
-extern struct crisv32_ioport crisv32_ioports[];
-
-extern struct crisv32_iopin crisv32_led1_green;
-extern struct crisv32_iopin crisv32_led1_red;
-extern struct crisv32_iopin crisv32_led2_green;
-extern struct crisv32_iopin crisv32_led2_red;
-extern struct crisv32_iopin crisv32_led3_green;
-extern struct crisv32_iopin crisv32_led3_red;
-
-extern struct crisv32_iopin crisv32_led_net0_green;
-extern struct crisv32_iopin crisv32_led_net0_red;
-extern struct crisv32_iopin crisv32_led_net1_green;
-extern struct crisv32_iopin crisv32_led_net1_red;
-
-static inline void crisv32_io_set(struct crisv32_iopin *iopin, int val)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&iopin->port->lock, flags);
-
-	if (iopin->port->data) {
-		if (val)
-			*iopin->port->data |= iopin->bit;
-		else
-			*iopin->port->data &= ~iopin->bit;
-	}
-
-	spin_unlock_irqrestore(&iopin->port->lock, flags);
-}
-
-static inline void crisv32_io_set_dir(struct crisv32_iopin* iopin,
-			       enum crisv32_io_dir dir)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&iopin->port->lock, flags);
-
-	if (iopin->port->oe) {
-		if (dir == crisv32_io_dir_in)
-			*iopin->port->oe &= ~iopin->bit;
-		else
-			*iopin->port->oe |= iopin->bit;
-	}
-
-	spin_unlock_irqrestore(&iopin->port->lock, flags);
-}
-
-static inline int crisv32_io_rd(struct crisv32_iopin* iopin)
-{
-	return ((*iopin->port->data_in & iopin->bit) ? 1 : 0);
-}
-
-int crisv32_io_get(struct crisv32_iopin* iopin,
-                   unsigned int port, unsigned int pin);
-int crisv32_io_get_name(struct crisv32_iopin* iopin,
-			const char *name);
-
-#define CRIS_LED_OFF    0x00
-#define CRIS_LED_GREEN  0x01
-#define CRIS_LED_RED    0x02
-#define CRIS_LED_ORANGE (CRIS_LED_GREEN | CRIS_LED_RED)
-
-#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO))
-#define CRIS_LED_NETWORK_GRP0_SET(x)                          \
-	do {                                             \
-		CRIS_LED_NETWORK_GRP0_SET_G((x) & CRIS_LED_GREEN); \
-		CRIS_LED_NETWORK_GRP0_SET_R((x) & CRIS_LED_RED);   \
-	} while (0)
-#else
-#define CRIS_LED_NETWORK_GRP0_SET(x) while (0) {}
-#endif
-
-#define CRIS_LED_NETWORK_GRP0_SET_G(x) \
-	crisv32_io_set(&crisv32_led_net0_green, !(x));
-
-#define CRIS_LED_NETWORK_GRP0_SET_R(x) \
-	crisv32_io_set(&crisv32_led_net0_red, !(x));
-
-#if defined(CONFIG_ETRAX_NBR_LED_GRP_TWO)
-#define CRIS_LED_NETWORK_GRP1_SET(x)                          \
-	do {                                             \
-		CRIS_LED_NETWORK_GRP1_SET_G((x) & CRIS_LED_GREEN); \
-		CRIS_LED_NETWORK_GRP1_SET_R((x) & CRIS_LED_RED);   \
-	} while (0)
-#else
-#define CRIS_LED_NETWORK_GRP1_SET(x) while (0) {}
-#endif
-
-#define CRIS_LED_NETWORK_GRP1_SET_G(x) \
-	crisv32_io_set(&crisv32_led_net1_green, !(x));
-
-#define CRIS_LED_NETWORK_GRP1_SET_R(x) \
-	crisv32_io_set(&crisv32_led_net1_red, !(x));
-
-#define CRIS_LED_ACTIVE_SET(x)                           \
-	do {                                        \
-		CRIS_LED_ACTIVE_SET_G((x) & CRIS_LED_GREEN);  \
-		CRIS_LED_ACTIVE_SET_R((x) & CRIS_LED_RED);    \
-	} while (0)
-
-#define CRIS_LED_ACTIVE_SET_G(x) \
-	crisv32_io_set(&crisv32_led2_green, !(x));
-#define CRIS_LED_ACTIVE_SET_R(x) \
-	crisv32_io_set(&crisv32_led2_red, !(x));
-#define CRIS_LED_DISK_WRITE(x) \
-         do{\
-		crisv32_io_set(&crisv32_led3_green, !(x)); \
-		crisv32_io_set(&crisv32_led3_red, !(x));   \
-        }while(0)
-#define CRIS_LED_DISK_READ(x) \
-	crisv32_io_set(&crisv32_led3_green, !(x));
-
-#endif
diff --git a/arch/cris/include/arch-v32/arch/irq.h b/arch/cris/include/arch-v32/arch/irq.h
index 0c1b4d3a34e7..8270a1bbfdb6 100644
--- a/arch/cris/include/arch-v32/arch/irq.h
+++ b/arch/cris/include/arch-v32/arch/irq.h
@@ -4,7 +4,7 @@
 #include <hwregs/intr_vect.h>
 
 /* Number of non-cpu interrupts. */
-#define NR_IRQS NBR_INTR_VECT /* Exceptions + IRQs */
+#define NR_IRQS (NBR_INTR_VECT + 256) /* Exceptions + IRQs */
 #define FIRST_IRQ 0x31 /* Exception number for first IRQ */
 #define NR_REAL_IRQS (NBR_INTR_VECT - FIRST_IRQ) /* IRQs */
 #if NR_REAL_IRQS > 32
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index b7f68192d15b..1778805f6380 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -43,4 +43,5 @@ generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/cris/include/asm/eshlibld.h b/arch/cris/include/asm/eshlibld.h
index 10ce36cf79a9..70aa448256b0 100644
--- a/arch/cris/include/asm/eshlibld.h
+++ b/arch/cris/include/asm/eshlibld.h
@@ -45,8 +45,7 @@
    assumed that we want to share code when debugging (exposes more
    trouble). */
 #ifndef SHARE_LIB_CORE
-# if (defined(__KERNEL__) || !defined(RELOC_DEBUG)) \
-     && !defined(CONFIG_SHARE_SHLIB_CORE)
+# if (defined(__KERNEL__) || !defined(RELOC_DEBUG))
 #  define SHARE_LIB_CORE 0
 # else
 #  define SHARE_LIB_CORE 1
diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h
index 752a3f45df60..cce8664d5dd6 100644
--- a/arch/cris/include/asm/io.h
+++ b/arch/cris/include/asm/io.h
@@ -2,7 +2,9 @@
 #define _ASM_CRIS_IO_H
 
 #include <asm/page.h>   /* for __va, __pa */
+#ifdef CONFIG_ETRAX_ARCH_V10
 #include <arch/io.h>
+#endif
 #include <asm-generic/iomap.h>
 #include <linux/kernel.h>
 
diff --git a/arch/cris/include/uapi/asm/etraxgpio.h b/arch/cris/include/uapi/asm/etraxgpio.h
index 461c089db765..c6e7d57c8b24 100644
--- a/arch/cris/include/uapi/asm/etraxgpio.h
+++ b/arch/cris/include/uapi/asm/etraxgpio.h
@@ -11,26 +11,6 @@
  *       g1-g7 and g25-g31 is both input and outputs but on different pins
  *       Also note that some bits change pins depending on what interfaces
  *       are enabled.
- *
- * For ETRAX FS (CONFIG_ETRAXFS):
- * /dev/gpioa  minor 0,  8 bit GPIO, each bit can change direction
- * /dev/gpiob  minor 1, 18 bit GPIO, each bit can change direction
- * /dev/gpioc  minor 3, 18 bit GPIO, each bit can change direction
- * /dev/gpiod  minor 4, 18 bit GPIO, each bit can change direction
- * /dev/gpioe  minor 5, 18 bit GPIO, each bit can change direction
- * /dev/leds   minor 2, Access to leds depending on kernelconfig
- *
- * For ARTPEC-3 (CONFIG_CRIS_MACH_ARTPEC3):
- * /dev/gpioa  minor 0, 32 bit GPIO, each bit can change direction
- * /dev/gpiob  minor 1, 32 bit GPIO, each bit can change direction
- * /dev/gpioc  minor 3, 16 bit GPIO, each bit can change direction
- * /dev/gpiod  minor 4, 32 bit GPIO, input only
- * /dev/leds   minor 2, Access to leds depending on kernelconfig
- * /dev/pwm0   minor 16, PWM channel 0 on PA30
- * /dev/pwm1   minor 17, PWM channel 1 on PA31
- * /dev/pwm2   minor 18, PWM channel 2 on PB26
- * /dev/ppwm   minor 19, PPWM channel
- *
  */
 #ifndef _ASM_ETRAXGPIO_H
 #define _ASM_ETRAXGPIO_H
@@ -40,52 +20,12 @@
 #define ETRAXGPIO_IOCTYPE 43
 
 /* etraxgpio _IOC_TYPE, bits 8 to 15 in ioctl cmd */
-#ifdef CONFIG_ETRAX_ARCH_V10
 #define GPIO_MINOR_A 0
 #define GPIO_MINOR_B 1
 #define GPIO_MINOR_LEDS 2
 #define GPIO_MINOR_G 3
 #define GPIO_MINOR_LAST 3
 #define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST
-#endif
-
-#ifdef CONFIG_ETRAXFS
-#define GPIO_MINOR_A 0
-#define GPIO_MINOR_B 1
-#define GPIO_MINOR_LEDS 2
-#define GPIO_MINOR_C 3
-#define GPIO_MINOR_D 4
-#define GPIO_MINOR_E 5
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-#define GPIO_MINOR_V 6
-#define GPIO_MINOR_LAST 6
-#else
-#define GPIO_MINOR_LAST 5
-#endif
-#define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST
-#endif
-
-#ifdef CONFIG_CRIS_MACH_ARTPEC3
-#define GPIO_MINOR_A 0
-#define GPIO_MINOR_B 1
-#define GPIO_MINOR_LEDS 2
-#define GPIO_MINOR_C 3
-#define GPIO_MINOR_D 4
-#ifdef CONFIG_ETRAX_VIRTUAL_GPIO
-#define GPIO_MINOR_V 6
-#define GPIO_MINOR_LAST 6
-#else
-#define GPIO_MINOR_LAST 4
-#endif
-#define GPIO_MINOR_FIRST_PWM 16
-#define GPIO_MINOR_PWM0 (GPIO_MINOR_FIRST_PWM+0)
-#define GPIO_MINOR_PWM1 (GPIO_MINOR_FIRST_PWM+1)
-#define GPIO_MINOR_PWM2 (GPIO_MINOR_FIRST_PWM+2)
-#define GPIO_MINOR_PPWM (GPIO_MINOR_FIRST_PWM+3)
-#define GPIO_MINOR_LAST_PWM GPIO_MINOR_PPWM
-#define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST_PWM
-#endif
-
 
 
 /* supported ioctl _IOC_NR's */
@@ -139,101 +79,4 @@
 #define IO_SETGET_OUTPUT 0x13 /* bits set in *arg is set to output, */
 			      /* *arg updated with current output pins. */
 
-/* The following ioctl's are applicable to the PWM channels only */
-
-#define IO_PWM_SET_MODE     0x20
-
-enum io_pwm_mode {
-	PWM_OFF = 0,		/* disabled, deallocated */
-	PWM_STANDARD = 1,	/* 390 kHz, duty cycle 0..255/256 */
-	PWM_FAST = 2,		/* variable freq, w/ 10ns active pulse len */
-	PWM_VARFREQ = 3,	/* individually configurable high/low periods */
-	PWM_SOFT = 4		/* software generated */
-};
-
-struct io_pwm_set_mode {
-	enum io_pwm_mode mode;
-};
-
-/* Only for mode PWM_VARFREQ. Period lo/high set in increments of 10ns
- * from 10ns (value = 0) to 81920ns (value = 8191)
- * (Resulting frequencies range from 50 MHz (10ns + 10ns) down to
- * 6.1 kHz (81920ns + 81920ns) at 50% duty cycle, to 12.2 kHz at min/max duty
- * cycle (81920 + 10ns or 10ns + 81920ns, respectively).)
- */
-#define IO_PWM_SET_PERIOD   0x21
-
-struct io_pwm_set_period {
-	unsigned int lo;		/* 0..8191 */
-	unsigned int hi;		/* 0..8191 */
-};
-
-/* Only for modes PWM_STANDARD and PWM_FAST.
- * For PWM_STANDARD, set duty cycle of 390 kHz PWM output signal, from
- * 0 (value = 0) to 255/256 (value = 255).
- * For PWM_FAST, set duty cycle of PWM output signal from
- * 0% (value = 0) to 100% (value = 255). Output signal in this mode
- * is a 10ns pulse surrounded by a high or low level depending on duty
- * cycle (except for 0% and 100% which result in a constant output).
- * Resulting output frequency varies from 50 MHz at 50% duty cycle,
- * down to 390 kHz at min/max duty cycle.
- */
-#define IO_PWM_SET_DUTY     0x22
-
-struct io_pwm_set_duty {
-	int duty;		/* 0..255 */
-};
-
-/* Returns information about the latest PWM pulse.
- * lo: Length of the latest low period, in units of 10ns.
- * hi: Length of the latest high period, in units of 10ns.
- * cnt: Time since last detected edge, in units of 10ns.
- *
- * The input source to PWM is decied by IO_PWM_SET_INPUT_SRC.
- *
- * NOTE: All PWM devices is connected to the same input source.
- */
-#define IO_PWM_GET_PERIOD   0x23
-
-struct io_pwm_get_period {
-	unsigned int lo;
-	unsigned int hi;
-	unsigned int cnt;
-};
-
-/* Sets the input source for the PWM input. For the src value see the
- * register description for gio:rw_pwm_in_cfg.
- *
- * NOTE: All PWM devices is connected to the same input source.
- */
-#define IO_PWM_SET_INPUT_SRC   0x24
-struct io_pwm_set_input_src {
-	unsigned int src;	/* 0..7 */
-};
-
-/* Sets the duty cycles in steps of 1/256, 0 = 0%, 255 = 100% duty cycle */
-#define IO_PPWM_SET_DUTY     0x25
-
-struct io_ppwm_set_duty {
-	int duty;		/* 0..255 */
-};
-
-/* Configuraton struct for the IO_PWMCLK_SET_CONFIG ioctl to configure
- * PWM capable gpio pins:
- */
-#define IO_PWMCLK_SETGET_CONFIG 0x26
-struct gpio_pwmclk_conf {
-  unsigned int gpiopin; /* The pin number based on the opened device */
-  unsigned int baseclk; /* The base clock to use, or sw will select one close*/
-  unsigned int low;     /* The number of low periods of the baseclk */
-  unsigned int high;    /* The number of high periods of the baseclk */
-};
-
-/* Examples:
- * To get a symmetric 12 MHz clock without knowing anything about the hardware:
- * baseclk = 12000000, low = 0, high = 0
- * To just get info of current setting:
- * baseclk = 0, low = 0, high = 0, the values will be updated by driver.
- */
-
 #endif
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index e704f81f85cc..31b4bd288cad 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -18,7 +18,6 @@
 #include <asm/pgtable.h>
 #include <asm/fasttimer.h>
 
-extern unsigned long get_cmos_time(void);
 extern void __Udiv(void);
 extern void __Umod(void);
 extern void __Div(void);
@@ -30,7 +29,6 @@ extern void __negdi2(void);
 extern void iounmap(volatile void * __iomem);
 
 /* Platform dependent support */
-EXPORT_SYMBOL(get_cmos_time);
 EXPORT_SYMBOL(loops_per_usec);
 
 /* Math functions */
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index 7780d379522f..2dda6da71521 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -39,31 +39,6 @@
 extern unsigned long loops_per_jiffy; /* init/main.c */
 unsigned long loops_per_usec;
 
-int set_rtc_mmss(unsigned long nowtime)
-{
-	D(printk(KERN_DEBUG "set_rtc_mmss(%lu)\n", nowtime));
-	return 0;
-}
-
-/* grab the time from the RTC chip */
-unsigned long get_cmos_time(void)
-{
-	return 0;
-}
-
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
-}
-
-void read_persistent_clock(struct timespec *ts)
-{
-	ts->tv_sec = 0;
-	ts->tv_nsec = 0;
-}
-
-
 extern void cris_profile_sample(struct pt_regs* regs);
 
 void
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 8e47b832cc76..1fa084cf1a43 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 0da689def4cc..64f02d451aa8 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -32,8 +32,8 @@
  */
 
 #define ATOMIC_INIT(i)		{ (i) }
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = (i))
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 static inline int atomic_inc_return(atomic_t *v)
 {
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 70e6ae1e7006..373cb23301e3 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -73,4 +73,5 @@ generic-y += uaccess.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 702ee539f87d..4435a445ae7e 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -11,8 +11,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/kernel.h>
 
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index daee37bd0999..db8ddabc6bd2 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -58,4 +58,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 811d61f6422d..55696c4100d4 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -48,7 +48,7 @@ static inline void atomic_set(atomic_t *v, int new)
  *
  * Assumes all word reads on our architecture are atomic.
  */
-#define atomic_read(v)		((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
  * atomic_xchg - atomic
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 9de3ba12f6b9..502a91d8dbbd 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -8,3 +8,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
+generic-y += word-at-a-time.h
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index be4beeb77d57..8dfb5f6f6c35 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -21,11 +21,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		(((v)->counter) = (i))
-#define atomic64_set(v,i)	(((v)->counter) = (i))
+#define atomic_set(v,i)		WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ int							\
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 36d2c1e3928b..07039d168f37 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -64,11 +64,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 #define pci_legacy_read platform_pci_legacy_read
 #define pci_legacy_write platform_pci_legacy_write
 
-struct iospace_resource {
-	struct list_head list;
-	struct resource res;
-};
-
 struct pci_controller {
 	struct acpi_device *companion;
 	void *iommu;
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 99c96a5e6016..db73390568c8 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -11,7 +11,7 @@
 
 
 
-#define NR_syscalls			321 /* length of syscall table */
+#define NR_syscalls			322 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h
index 98e94e19a5a0..9038726e7d26 100644
--- a/arch/ia64/include/uapi/asm/unistd.h
+++ b/arch/ia64/include/uapi/asm/unistd.h
@@ -334,5 +334,6 @@
 #define __NR_execveat			1342
 #define __NR_userfaultfd		1343
 #define __NR_membarrier			1344
+#define __NR_kcmp			1345
 
 #endif /* _UAPI_ASM_IA64_UNISTD_H */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 37cc7a65cd3e..dcd97f84d065 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1770,5 +1770,6 @@ sys_call_table:
 	data8 sys_execveat
 	data8 sys_userfaultfd
 	data8 sys_membarrier
+	data8 sys_kcmp				// 1345
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7cc3be9fa7c6..8f6ac2f8ae4c 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -115,33 +115,13 @@ struct pci_ops pci_root_ops = {
 	.write = pci_write,
 };
 
-/* Called by ACPI when it finds a new root bus.  */
-
-static struct pci_controller *alloc_pci_controller(int seg)
-{
-	struct pci_controller *controller;
-
-	controller = kzalloc(sizeof(*controller), GFP_KERNEL);
-	if (!controller)
-		return NULL;
-
-	controller->segment = seg;
-	return controller;
-}
-
 struct pci_root_info {
-	struct acpi_device *bridge;
-	struct pci_controller *controller;
-	struct list_head resources;
-	struct resource *res;
-	resource_size_t *res_offset;
-	unsigned int res_num;
+	struct acpi_pci_root_info common;
+	struct pci_controller controller;
 	struct list_head io_resources;
-	char *name;
 };
 
-static unsigned int
-new_space (u64 phys_base, int sparse)
+static unsigned int new_space(u64 phys_base, int sparse)
 {
 	u64 mmio_base;
 	int i;
@@ -168,39 +148,36 @@ new_space (u64 phys_base, int sparse)
 	return i;
 }
 
-static u64 add_io_space(struct pci_root_info *info,
-			struct acpi_resource_address64 *addr)
+static int add_io_space(struct device *dev, struct pci_root_info *info,
+			struct resource_entry *entry)
 {
-	struct iospace_resource *iospace;
-	struct resource *resource;
+	struct resource_entry *iospace;
+	struct resource *resource, *res = entry->res;
 	char *name;
 	unsigned long base, min, max, base_port;
 	unsigned int sparse = 0, space_nr, len;
 
-	len = strlen(info->name) + 32;
-	iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL);
+	len = strlen(info->common.name) + 32;
+	iospace = resource_list_create_entry(NULL, len);
 	if (!iospace) {
-		dev_err(&info->bridge->dev,
-				"PCI: No memory for %s I/O port space\n",
-				info->name);
-		goto out;
+		dev_err(dev, "PCI: No memory for %s I/O port space\n",
+			info->common.name);
+		return -ENOMEM;
 	}
 
-	name = (char *)(iospace + 1);
-
-	min = addr->address.minimum;
-	max = min + addr->address.address_length - 1;
-	if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION)
+	if (res->flags & IORESOURCE_IO_SPARSE)
 		sparse = 1;
-
-	space_nr = new_space(addr->address.translation_offset, sparse);
+	space_nr = new_space(entry->offset, sparse);
 	if (space_nr == ~0)
 		goto free_resource;
 
+	name = (char *)(iospace + 1);
+	min = res->start - entry->offset;
+	max = res->end - entry->offset;
 	base = __pa(io_space[space_nr].mmio_base);
 	base_port = IO_SPACE_BASE(space_nr);
-	snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name,
-		base_port + min, base_port + max);
+	snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->common.name,
+		 base_port + min, base_port + max);
 
 	/*
 	 * The SDM guarantees the legacy 0-64K space is sparse, but if the
@@ -210,270 +187,125 @@ static u64 add_io_space(struct pci_root_info *info,
 	if (space_nr == 0)
 		sparse = 1;
 
-	resource = &iospace->res;
+	resource = iospace->res;
 	resource->name  = name;
 	resource->flags = IORESOURCE_MEM;
 	resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min);
 	resource->end   = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max);
 	if (insert_resource(&iomem_resource, resource)) {
-		dev_err(&info->bridge->dev,
-				"can't allocate host bridge io space resource  %pR\n",
-				resource);
+		dev_err(dev,
+			"can't allocate host bridge io space resource  %pR\n",
+			resource);
 		goto free_resource;
 	}
 
-	list_add_tail(&iospace->list, &info->io_resources);
-	return base_port;
+	entry->offset = base_port;
+	res->start = min + base_port;
+	res->end = max + base_port;
+	resource_list_add_tail(iospace, &info->io_resources);
 
-free_resource:
-	kfree(iospace);
-out:
-	return ~0;
-}
-
-static acpi_status resource_to_window(struct acpi_resource *resource,
-				      struct acpi_resource_address64 *addr)
-{
-	acpi_status status;
+	return 0;
 
-	/*
-	 * We're only interested in _CRS descriptors that are
-	 *	- address space descriptors for memory or I/O space
-	 *	- non-zero size
-	 */
-	status = acpi_resource_to_address64(resource, addr);
-	if (ACPI_SUCCESS(status) &&
-	    (addr->resource_type == ACPI_MEMORY_RANGE ||
-	     addr->resource_type == ACPI_IO_RANGE) &&
-	    addr->address.address_length)
-		return AE_OK;
-
-	return AE_ERROR;
+free_resource:
+	resource_list_free_entry(iospace);
+	return -ENOSPC;
 }
 
-static acpi_status count_window(struct acpi_resource *resource, void *data)
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
 {
-	unsigned int *windows = (unsigned int *) data;
-	struct acpi_resource_address64 addr;
-	acpi_status status;
-
-	status = resource_to_window(resource, &addr);
-	if (ACPI_SUCCESS(status))
-		(*windows)++;
-
-	return AE_OK;
+	return (res->flags & IORESOURCE_IO) &&
+		res->start == 0xCF8 && res->end == 0xCFF;
 }
 
-static acpi_status add_window(struct acpi_resource *res, void *data)
+static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
 {
-	struct pci_root_info *info = data;
-	struct resource *resource;
-	struct acpi_resource_address64 addr;
-	acpi_status status;
-	unsigned long flags, offset = 0;
-	struct resource *root;
-
-	/* Return AE_OK for non-window resources to keep scanning for more */
-	status = resource_to_window(res, &addr);
-	if (!ACPI_SUCCESS(status))
-		return AE_OK;
-
-	if (addr.resource_type == ACPI_MEMORY_RANGE) {
-		flags = IORESOURCE_MEM;
-		root = &iomem_resource;
-		offset = addr.address.translation_offset;
-	} else if (addr.resource_type == ACPI_IO_RANGE) {
-		flags = IORESOURCE_IO;
-		root = &ioport_resource;
-		offset = add_io_space(info, &addr);
-		if (offset == ~0)
-			return AE_OK;
-	} else
-		return AE_OK;
-
-	resource = &info->res[info->res_num];
-	resource->name = info->name;
-	resource->flags = flags;
-	resource->start = addr.address.minimum + offset;
-	resource->end = resource->start + addr.address.address_length - 1;
-	info->res_offset[info->res_num] = offset;
-
-	if (insert_resource(root, resource)) {
-		dev_err(&info->bridge->dev,
-			"can't allocate host bridge window %pR\n",
-			resource);
-	} else {
-		if (offset)
-			dev_info(&info->bridge->dev, "host bridge window %pR "
-				 "(PCI address [%#llx-%#llx])\n",
-				 resource,
-				 resource->start - offset,
-				 resource->end - offset);
-		else
-			dev_info(&info->bridge->dev,
-				 "host bridge window %pR\n", resource);
+	struct device *dev = &ci->bridge->dev;
+	struct pci_root_info *info;
+	struct resource *res;
+	struct resource_entry *entry, *tmp;
+	int status;
+
+	status = acpi_pci_probe_root_resources(ci);
+	if (status > 0) {
+		info = container_of(ci, struct pci_root_info, common);
+		resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
+			res = entry->res;
+			if (res->flags & IORESOURCE_MEM) {
+				/*
+				 * HP's firmware has a hack to work around a
+				 * Windows bug. Ignore these tiny memory ranges.
+				 */
+				if (resource_size(res) <= 16) {
+					resource_list_del(entry);
+					insert_resource(&iomem_resource,
+							entry->res);
+					resource_list_add_tail(entry,
+							&info->io_resources);
+				}
+			} else if (res->flags & IORESOURCE_IO) {
+				if (resource_is_pcicfg_ioport(entry->res))
+					resource_list_destroy_entry(entry);
+				else if (add_io_space(dev, info, entry))
+					resource_list_destroy_entry(entry);
+			}
+		}
 	}
-	/* HP's firmware has a hack to work around a Windows bug.
-	 * Ignore these tiny memory ranges */
-	if (!((resource->flags & IORESOURCE_MEM) &&
-	      (resource->end - resource->start < 16)))
-		pci_add_resource_offset(&info->resources, resource,
-					info->res_offset[info->res_num]);
-
-	info->res_num++;
-	return AE_OK;
-}
 
-static void free_pci_root_info_res(struct pci_root_info *info)
-{
-	struct iospace_resource *iospace, *tmp;
-
-	list_for_each_entry_safe(iospace, tmp, &info->io_resources, list)
-		kfree(iospace);
-
-	kfree(info->name);
-	kfree(info->res);
-	info->res = NULL;
-	kfree(info->res_offset);
-	info->res_offset = NULL;
-	info->res_num = 0;
-	kfree(info->controller);
-	info->controller = NULL;
+	return status;
 }
 
-static void __release_pci_root_info(struct pci_root_info *info)
+static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci)
 {
-	int i;
-	struct resource *res;
-	struct iospace_resource *iospace;
+	struct pci_root_info *info;
+	struct resource_entry *entry, *tmp;
 
-	list_for_each_entry(iospace, &info->io_resources, list)
-		release_resource(&iospace->res);
-
-	for (i = 0; i < info->res_num; i++) {
-		res = &info->res[i];
-
-		if (!res->parent)
-			continue;
-
-		if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
-			continue;
-
-		release_resource(res);
+	info = container_of(ci, struct pci_root_info, common);
+	resource_list_for_each_entry_safe(entry, tmp, &info->io_resources) {
+		release_resource(entry->res);
+		resource_list_destroy_entry(entry);
 	}
-
-	free_pci_root_info_res(info);
 	kfree(info);
 }
 
-static void release_pci_root_info(struct pci_host_bridge *bridge)
-{
-	struct pci_root_info *info = bridge->release_data;
-
-	__release_pci_root_info(info);
-}
-
-static int
-probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
-		int busnum, int domain)
-{
-	char *name;
-
-	name = kmalloc(16, GFP_KERNEL);
-	if (!name)
-		return -ENOMEM;
-
-	sprintf(name, "PCI Bus %04x:%02x", domain, busnum);
-	info->bridge = device;
-	info->name = name;
-
-	acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
-			&info->res_num);
-	if (info->res_num) {
-		info->res =
-			kzalloc_node(sizeof(*info->res) * info->res_num,
-				     GFP_KERNEL, info->controller->node);
-		if (!info->res) {
-			kfree(name);
-			return -ENOMEM;
-		}
-
-		info->res_offset =
-			kzalloc_node(sizeof(*info->res_offset) * info->res_num,
-					GFP_KERNEL, info->controller->node);
-		if (!info->res_offset) {
-			kfree(name);
-			kfree(info->res);
-			info->res = NULL;
-			return -ENOMEM;
-		}
-
-		info->res_num = 0;
-		acpi_walk_resources(device->handle, METHOD_NAME__CRS,
-			add_window, info);
-	} else
-		kfree(name);
-
-	return 0;
-}
+static struct acpi_pci_root_ops pci_acpi_root_ops = {
+	.pci_ops = &pci_root_ops,
+	.release_info = pci_acpi_root_release_info,
+	.prepare_resources = pci_acpi_root_prepare_resources,
+};
 
 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 {
 	struct acpi_device *device = root->device;
-	int domain = root->segment;
-	int bus = root->secondary.start;
-	struct pci_controller *controller;
-	struct pci_root_info *info = NULL;
-	int busnum = root->secondary.start;
-	struct pci_bus *pbus;
-	int ret;
-
-	controller = alloc_pci_controller(domain);
-	if (!controller)
-		return NULL;
-
-	controller->companion = device;
-	controller->node = acpi_get_node(device->handle);
+	struct pci_root_info *info;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		dev_err(&device->dev,
-				"pci_bus %04x:%02x: ignored (out of memory)\n",
-				domain, busnum);
-		kfree(controller);
+			"pci_bus %04x:%02x: ignored (out of memory)\n",
+			root->segment, (int)root->secondary.start);
 		return NULL;
 	}
 
-	info->controller = controller;
+	info->controller.segment = root->segment;
+	info->controller.companion = device;
+	info->controller.node = acpi_get_node(device->handle);
 	INIT_LIST_HEAD(&info->io_resources);
-	INIT_LIST_HEAD(&info->resources);
-
-	ret = probe_pci_root_info(info, device, busnum, domain);
-	if (ret) {
-		kfree(info->controller);
-		kfree(info);
-		return NULL;
-	}
-	/* insert busn resource at first */
-	pci_add_resource(&info->resources, &root->secondary);
-	/*
-	 * See arch/x86/pci/acpi.c.
-	 * The desired pci bus might already be scanned in a quirk. We
-	 * should handle the case here, but it appears that IA64 hasn't
-	 * such quirk. So we just ignore the case now.
-	 */
-	pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller,
-				   &info->resources);
-	if (!pbus) {
-		pci_free_resource_list(&info->resources);
-		__release_pci_root_info(info);
-		return NULL;
-	}
-
-	pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge),
-			release_pci_root_info, info);
-	pci_scan_child_bus(pbus);
-	return pbus;
+	return acpi_pci_root_create(root, &pci_acpi_root_ops,
+				    &info->common, &info->controller);
 }
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index e0eb704ca1fa..fd104bd221ce 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += module.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 025e2a170493..ea35160d632b 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -28,7 +28,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)	READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -37,7 +37,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)	(((v)->counter) = (i))
+#define atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_CHIP_M32700_TS1
 #define __ATOMIC_CLOBBER	, "r4"
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 0b6b40d37b95..5b4ec541ba7c 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -57,7 +58,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -67,10 +67,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -179,6 +181,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -206,6 +209,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -370,6 +375,7 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -537,6 +543,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index eeb3a8991fc4..6e5198e2c124 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -344,6 +349,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -495,6 +501,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 3a7006654ce9..f75600b0ca23 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -355,6 +360,7 @@ CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -517,6 +523,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 0586b323a673..a42d91c389a6 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -343,6 +348,7 @@ CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index ad1dbce07aa4..77f4a11083e9 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -345,6 +350,7 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -497,6 +503,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b44acacaecf4..5a329f77329b 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -54,7 +55,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -64,10 +64,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -176,6 +178,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -203,6 +206,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -364,6 +369,7 @@ CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -519,6 +525,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 8afca3753db1..83c80d2030ec 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -64,7 +65,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -74,10 +74,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -186,6 +188,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -213,6 +216,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -281,6 +285,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -410,6 +415,7 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
@@ -599,6 +605,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index ef00875994d9..6cb42c3bf5a2 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -52,7 +53,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -62,10 +62,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -174,6 +176,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -201,6 +204,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -266,6 +270,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -343,6 +348,7 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 387c2bd90ff1..c7508c30330c 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -343,6 +348,7 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 35355c1bc714..64b71664a303 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -354,6 +359,7 @@ CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
@@ -510,6 +516,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 8442d267b877..9a4cab78a2ea 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -341,6 +346,7 @@ CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_MANAGER=y
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 0e1b542e1555..1a2eaac13dbd 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
@@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_GENEVE_CORE=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
@@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m
 # CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
+CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
@@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m
 CONFIG_IP_SET_LIST_SET=m
 CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_DUP_IPV4=m
 CONFIG_NF_TABLES_ARP=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
@@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m
 CONFIG_MPLS=y
 CONFIG_NET_MPLS_GSO=m
 CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -341,6 +346,7 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
 CONFIG_TEST_FIRMWARE=m
 CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_ENCRYPTED_KEYS=m
 CONFIG_CRYPTO_RSA=m
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 039fac120cc0..4858178260f9 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -17,8 +17,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 /*
  * The ColdFire parts cannot do some immediate to memory operations,
diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h
index 5a822bb790f7..066e74f666ae 100644
--- a/arch/m68k/include/asm/linkage.h
+++ b/arch/m68k/include/asm/linkage.h
@@ -4,4 +4,34 @@
 #define __ALIGN .align 4
 #define __ALIGN_STR ".align 4"
 
+/*
+ * Make sure the compiler doesn't do anything stupid with the
+ * arguments on the stack - they are owned by the *caller*, not
+ * the callee. This just fools gcc into not spilling into them,
+ * and keeps it from doing tailcall recursion and/or using the
+ * stack slots for temporaries, since they are live and "used"
+ * all the way to the end of the function.
+ */
+#define asmlinkage_protect(n, ret, args...) \
+	__asmlinkage_protect##n(ret, ##args)
+#define __asmlinkage_protect_n(ret, args...) \
+	__asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
+#define __asmlinkage_protect0(ret) \
+	__asmlinkage_protect_n(ret)
+#define __asmlinkage_protect1(ret, arg1) \
+	__asmlinkage_protect_n(ret, "m" (arg1))
+#define __asmlinkage_protect2(ret, arg1, arg2) \
+	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
+#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
+	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
+#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
+	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+			      "m" (arg4))
+#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
+	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+			      "m" (arg4), "m" (arg5))
+#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
+	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+			      "m" (arg4), "m" (arg5), "m" (arg6))
+
 #endif
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 244e0dbe45db..0793a7f17417 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		356
+#define NR_syscalls		375
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 61fb6cb9d2ae..5e6fae6c275f 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -361,5 +361,24 @@
 #define __NR_memfd_create	353
 #define __NR_bpf		354
 #define __NR_execveat		355
+#define __NR_socket		356
+#define __NR_socketpair		357
+#define __NR_bind		358
+#define __NR_connect		359
+#define __NR_listen		360
+#define __NR_accept4		361
+#define __NR_getsockopt		362
+#define __NR_setsockopt		363
+#define __NR_getsockname	364
+#define __NR_getpeername	365
+#define __NR_sendto		366
+#define __NR_sendmsg		367
+#define __NR_recvfrom		368
+#define __NR_recvmsg		369
+#define __NR_shutdown		370
+#define __NR_recvmmsg		371
+#define __NR_sendmmsg		372
+#define __NR_userfaultfd	373
+#define __NR_membarrier		374
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index a0ec4303f2c8..5dd0e80042f5 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -376,4 +376,22 @@ ENTRY(sys_call_table)
 	.long sys_memfd_create
 	.long sys_bpf
 	.long sys_execveat		/* 355 */
-
+	.long sys_socket
+	.long sys_socketpair
+	.long sys_bind
+	.long sys_connect
+	.long sys_listen		/* 360 */
+	.long sys_accept4
+	.long sys_getsockopt
+	.long sys_setsockopt
+	.long sys_getsockname
+	.long sys_getpeername		/* 365 */
+	.long sys_sendto
+	.long sys_sendmsg
+	.long sys_recvfrom
+	.long sys_recvmsg
+	.long sys_shutdown		/* 370 */
+	.long sys_recvmmsg
+	.long sys_sendmmsg
+	.long sys_userfaultfd
+	.long sys_membarrier
diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c
index cd38f29955c8..2290c0cae48b 100644
--- a/arch/m68k/mac/psc.c
+++ b/arch/m68k/mac/psc.c
@@ -29,6 +29,7 @@
 
 int psc_present;
 volatile __u8 *psc;
+EXPORT_SYMBOL_GPL(psc);
 
 /*
  * Debugging dump, used in various places to see what's going on.
diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c
index c86ac37d1983..cfe9aa422343 100644
--- a/arch/m68k/sun3/idprom.c
+++ b/arch/m68k/sun3/idprom.c
@@ -125,8 +125,5 @@ void __init idprom_init(void)
 
 	display_system_type(idprom->id_machtype);
 
-	printk("Ethernet address: %x:%x:%x:%x:%x:%x\n",
-		    idprom->id_ethaddr[0], idprom->id_ethaddr[1],
-		    idprom->id_ethaddr[2], idprom->id_ethaddr[3],
-		    idprom->id_ethaddr[4], idprom->id_ethaddr[5]);
+	printk("Ethernet address: %pM\n", idprom->id_ethaddr);
 }
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index df31353fd200..29acb89daaaa 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -54,4 +54,5 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 21c4c268b86c..a62581815624 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -3,7 +3,7 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_set(v, i)		((v)->counter = (i))
+#define atomic_set(v, i)		WRITE_ONCE((v)->counter, (i))
 
 #include <linux/compiler.h>
 
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index f8efe380fe8b..0295d9b8d5bf 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -10,7 +10,7 @@
 
 static inline int atomic_read(const atomic_t *v)
 {
-	return (v)->counter;
+	return READ_ONCE((v)->counter);
 }
 
 /*
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 2f222f355c4b..b0ae88c9fed9 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -10,3 +10,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index ab5b488e1fde..89a2a9394927 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -194,7 +194,7 @@ void __init time_init(void)
 {
 	of_clk_init(NULL);
 	setup_cpuinfo_clk();
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index 15ecb4831e12..eeb3953ed8ac 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -293,8 +293,26 @@ static int __init ath79_misc_intc_of_init(
 
 	return 0;
 }
-IRQCHIP_DECLARE(ath79_misc_intc, "qca,ar7100-misc-intc",
-		ath79_misc_intc_of_init);
+
+static int __init ar7100_misc_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
+	return ath79_misc_intc_of_init(node, parent);
+}
+
+IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc",
+		ar7100_misc_intc_of_init);
+
+static int __init ar7240_misc_intc_of_init(
+	struct device_node *node, struct device_node *parent)
+{
+	ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack;
+	return ath79_misc_intc_of_init(node, parent);
+}
+
+IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc",
+		ar7240_misc_intc_of_init);
 
 static int __init ar79_cpu_intc_of_init(
 	struct device_node *node, struct device_node *parent)
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index 51ed599cc894..e970fd9cf769 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -4,6 +4,7 @@ config BCM47XX_SSB
 	bool "SSB Support for Broadcom BCM47XX"
 	select SYS_HAS_CPU_BMIPS32_3300
 	select SSB
+	select SSB_HOST_SOC
 	select SSB_DRIVER_MIPS
 	select SSB_DRIVER_EXTIF
 	select SSB_EMBEDDED
diff --git a/arch/mips/bcm63xx/dev-spi.c b/arch/mips/bcm63xx/dev-spi.c
index ad448e41e3bd..232385441e46 100644
--- a/arch/mips/bcm63xx/dev-spi.c
+++ b/arch/mips/bcm63xx/dev-spi.c
@@ -18,29 +18,6 @@
 #include <bcm63xx_dev_spi.h>
 #include <bcm63xx_regs.h>
 
-/*
- * register offsets
- */
-static const unsigned long bcm6348_regs_spi[] = {
-	__GEN_SPI_REGS_TABLE(6348)
-};
-
-static const unsigned long bcm6358_regs_spi[] = {
-	__GEN_SPI_REGS_TABLE(6358)
-};
-
-const unsigned long *bcm63xx_regs_spi;
-EXPORT_SYMBOL(bcm63xx_regs_spi);
-
-static __init void bcm63xx_spi_regs_init(void)
-{
-	if (BCMCPU_IS_6338() || BCMCPU_IS_6348())
-		bcm63xx_regs_spi = bcm6348_regs_spi;
-	if (BCMCPU_IS_3368() || BCMCPU_IS_6358() ||
-		BCMCPU_IS_6362() || BCMCPU_IS_6368())
-		bcm63xx_regs_spi = bcm6358_regs_spi;
-}
-
 static struct resource spi_resources[] = {
 	{
 		.start		= -1, /* filled at runtime */
@@ -53,19 +30,10 @@ static struct resource spi_resources[] = {
 	},
 };
 
-static struct bcm63xx_spi_pdata spi_pdata = {
-	.bus_num		= 0,
-	.num_chipselect		= 8,
-};
-
 static struct platform_device bcm63xx_spi_device = {
-	.name		= "bcm63xx-spi",
 	.id		= -1,
 	.num_resources	= ARRAY_SIZE(spi_resources),
 	.resource	= spi_resources,
-	.dev		= {
-		.platform_data = &spi_pdata,
-	},
 };
 
 int __init bcm63xx_spi_register(void)
@@ -78,21 +46,15 @@ int __init bcm63xx_spi_register(void)
 	spi_resources[1].start = bcm63xx_get_irq_number(IRQ_SPI);
 
 	if (BCMCPU_IS_6338() || BCMCPU_IS_6348()) {
+		bcm63xx_spi_device.name = "bcm6348-spi",
 		spi_resources[0].end += BCM_6348_RSET_SPI_SIZE - 1;
-		spi_pdata.fifo_size = SPI_6348_MSG_DATA_SIZE;
-		spi_pdata.msg_type_shift = SPI_6348_MSG_TYPE_SHIFT;
-		spi_pdata.msg_ctl_width = SPI_6348_MSG_CTL_WIDTH;
 	}
 
 	if (BCMCPU_IS_3368() || BCMCPU_IS_6358() || BCMCPU_IS_6362() ||
 		BCMCPU_IS_6368()) {
+		bcm63xx_spi_device.name = "bcm6358-spi",
 		spi_resources[0].end += BCM_6358_RSET_SPI_SIZE - 1;
-		spi_pdata.fifo_size = SPI_6358_MSG_DATA_SIZE;
-		spi_pdata.msg_type_shift = SPI_6358_MSG_TYPE_SHIFT;
-		spi_pdata.msg_ctl_width = SPI_6358_MSG_CTL_WIDTH;
 	}
 
-	bcm63xx_spi_regs_init();
-
 	return platform_device_register(&bcm63xx_spi_device);
 }
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 0352bc8d56b3..4f9eb0576884 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1094,7 +1094,7 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d,
 	unsigned int pin;
 	unsigned int trigger;
 
-	if (d->of_node != node)
+	if (irq_domain_get_of_node(d) != node)
 		return -EINVAL;
 
 	if (intsize < 2)
@@ -2163,7 +2163,7 @@ static int octeon_irq_cib_map(struct irq_domain *d,
 
 	if (hw >= host_data->max_bits) {
 		pr_err("ERROR: %s mapping %u is to big!\n",
-		       d->of_node->name, (unsigned)hw);
+		       irq_domain_get_of_node(d)->name, (unsigned)hw);
 		return -EINVAL;
 	}
 
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index 89a628455bc2..bd634259eab9 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -933,7 +933,7 @@ void __init plat_mem_setup(void)
 	while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
 		&& (total < MAX_MEMORY)) {
 		memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
-						__pa_symbol(&__init_end), -1,
+						__pa_symbol(&_end), -1,
 						0x100000,
 						CVMX_BOOTMEM_FLAG_NO_LOCKING);
 		if (memory >= 0) {
diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig
index 642b50946943..8b7429127a1d 100644
--- a/arch/mips/configs/pistachio_defconfig
+++ b/arch/mips/configs/pistachio_defconfig
@@ -257,7 +257,6 @@ CONFIG_MMC=y
 CONFIG_MMC_BLOCK_MINORS=16
 CONFIG_MMC_TEST=m
 CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_RTC_CLASS=y
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 40ec4ca3f946..c7fe4d01e79c 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -17,4 +17,5 @@ generic-y += segment.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += user.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 4c42fd9af777..f82d3af07931 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -30,7 +30,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /*
  * atomic_set - set atomic variable
@@ -39,7 +39,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)		((v)->counter = (i))
+#define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC_OP(op, c_op, asm_op)					      \
 static __inline__ void atomic_##op(int i, atomic_t * v)			      \
@@ -315,14 +315,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @v: pointer of type atomic64_t
  *
  */
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
 /*
  * atomic64_set - set atomic variable
  * @v: pointer of type atomic64_t
  * @i: required value
  */
-#define atomic64_set(v, i)	((v)->counter = (i))
+#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC64_OP(op, c_op, asm_op)					      \
 static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 9801ac982655..fe67f12ac239 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -20,6 +20,9 @@
 #ifndef cpu_has_tlb
 #define cpu_has_tlb		(cpu_data[0].options & MIPS_CPU_TLB)
 #endif
+#ifndef cpu_has_ftlb
+#define cpu_has_ftlb		(cpu_data[0].options & MIPS_CPU_FTLB)
+#endif
 #ifndef cpu_has_tlbinv
 #define cpu_has_tlbinv		(cpu_data[0].options & MIPS_CPU_TLBINV)
 #endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index cd89e9855775..82ad15f11049 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -385,6 +385,7 @@ enum cpu_type_enum {
 #define MIPS_CPU_CDMM		0x4000000000ull	/* CPU has Common Device Memory Map */
 #define MIPS_CPU_BP_GHIST	0x8000000000ull /* R12K+ Branch Prediction Global History */
 #define MIPS_CPU_SP		0x10000000000ull /* Small (1KB) page support */
+#define MIPS_CPU_FTLB		0x20000000000ull /* CPU has Fixed-page-size TLB */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 9e777cd42b67..d10fd80dbb7e 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -256,6 +256,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si
  */
 #define ioremap_nocache(offset, size)					\
 	__ioremap_mode((offset), (size), _CACHE_UNCACHED)
+#define ioremap_uc ioremap_nocache
 
 /*
  * ioremap_cachable -	map bus memory into CPU space
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 5a1a882e0a75..6ded8d347af9 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h
index b02891f9caaf..21d9607c80d7 100644
--- a/arch/mips/include/asm/maar.h
+++ b/arch/mips/include/asm/maar.h
@@ -66,6 +66,15 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower,
 }
 
 /**
+ * maar_init() - initialise MAARs
+ *
+ * Performs initialisation of MAARs for the current CPU, making use of the
+ * platforms implementation of platform_maar_init where necessary and
+ * duplicating the setup it provides on secondary CPUs.
+ */
+extern void maar_init(void);
+
+/**
  * struct maar_config - MAAR configuration data
  * @lower:	The lowest address that the MAAR pair will affect. Must be
  *		aligned to a 2^16 byte boundary.
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h
index 25737655d141..dd299548860d 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h
@@ -7,48 +7,4 @@
 
 int __init bcm63xx_spi_register(void);
 
-struct bcm63xx_spi_pdata {
-	unsigned int	fifo_size;
-	unsigned int	msg_type_shift;
-	unsigned int	msg_ctl_width;
-	int		bus_num;
-	int		num_chipselect;
-};
-
-enum bcm63xx_regs_spi {
-	SPI_CMD,
-	SPI_INT_STATUS,
-	SPI_INT_MASK_ST,
-	SPI_INT_MASK,
-	SPI_ST,
-	SPI_CLK_CFG,
-	SPI_FILL_BYTE,
-	SPI_MSG_TAIL,
-	SPI_RX_TAIL,
-	SPI_MSG_CTL,
-	SPI_MSG_DATA,
-	SPI_RX_DATA,
-};
-
-#define __GEN_SPI_REGS_TABLE(__cpu)					\
-	[SPI_CMD]		= SPI_## __cpu ##_CMD,			\
-	[SPI_INT_STATUS]	= SPI_## __cpu ##_INT_STATUS,		\
-	[SPI_INT_MASK_ST]	= SPI_## __cpu ##_INT_MASK_ST,		\
-	[SPI_INT_MASK]		= SPI_## __cpu ##_INT_MASK,		\
-	[SPI_ST]		= SPI_## __cpu ##_ST,			\
-	[SPI_CLK_CFG]		= SPI_## __cpu ##_CLK_CFG,		\
-	[SPI_FILL_BYTE]		= SPI_## __cpu ##_FILL_BYTE,		\
-	[SPI_MSG_TAIL]		= SPI_## __cpu ##_MSG_TAIL,		\
-	[SPI_RX_TAIL]		= SPI_## __cpu ##_RX_TAIL,		\
-	[SPI_MSG_CTL]		= SPI_## __cpu ##_MSG_CTL,		\
-	[SPI_MSG_DATA]		= SPI_## __cpu ##_MSG_DATA,		\
-	[SPI_RX_DATA]		= SPI_## __cpu ##_RX_DATA,
-
-static inline unsigned long bcm63xx_spireg(enum bcm63xx_regs_spi reg)
-{
-	extern const unsigned long *bcm63xx_regs_spi;
-
-	return bcm63xx_regs_spi[reg];
-}
-
 #endif /* BCM63XX_DEV_SPI_H */
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index d75b75e78ebb..1f1927ab4269 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -194,6 +194,7 @@ BUILD_CM_RW(reg3_mask,		MIPS_CM_GCB_OFS + 0xc8)
 BUILD_CM_R_(gic_status,		MIPS_CM_GCB_OFS + 0xd0)
 BUILD_CM_R_(cpc_status,		MIPS_CM_GCB_OFS + 0xf0)
 BUILD_CM_RW(l2_config,		MIPS_CM_GCB_OFS + 0x130)
+BUILD_CM_RW(sys_config2,	MIPS_CM_GCB_OFS + 0x150)
 
 /* Core Local & Core Other register accessor functions */
 BUILD_CM_Cx_RW(reset_release,	0x00)
@@ -316,6 +317,10 @@ BUILD_CM_Cx_R_(tcid_8_priority,	0x80)
 #define CM_GCR_L2_CONFIG_ASSOC_SHF		0
 #define CM_GCR_L2_CONFIG_ASSOC_MSK		(_ULCAST_(0xff) << 0)
 
+/* GCR_SYS_CONFIG2 register fields */
+#define CM_GCR_SYS_CONFIG2_MAXVPW_SHF		0
+#define CM_GCR_SYS_CONFIG2_MAXVPW_MSK		(_ULCAST_(0xf) << 0)
+
 /* GCR_Cx_COHERENCE register fields */
 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_SHF	0
 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK	(_ULCAST_(0xff) << 0)
@@ -405,4 +410,38 @@ static inline int mips_cm_revision(void)
 	return read_gcr_rev();
 }
 
+/**
+ * mips_cm_max_vp_width() - return the width in bits of VP indices
+ *
+ * Return: the width, in bits, of VP indices in fields that combine core & VP
+ * indices.
+ */
+static inline unsigned int mips_cm_max_vp_width(void)
+{
+	extern int smp_num_siblings;
+
+	if (mips_cm_revision() >= CM_REV_CM3)
+		return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
+
+	return smp_num_siblings;
+}
+
+/**
+ * mips_cm_vp_id() - calculate the hardware VP ID for a CPU
+ * @cpu: the CPU whose VP ID to calculate
+ *
+ * Hardware such as the GIC uses identifiers for VPs which may not match the
+ * CPU numbers used by Linux. This function calculates the hardware VP
+ * identifier corresponding to a given CPU.
+ *
+ * Return: the VP ID for the CPU.
+ */
+static inline unsigned int mips_cm_vp_id(unsigned int cpu)
+{
+	unsigned int core = cpu_data[cpu].core;
+	unsigned int vp = cpu_vpe_id(&cpu_data[cpu]);
+
+	return (core * mips_cm_max_vp_width()) + vp;
+}
+
 #endif /* __MIPS_ASM_MIPS_CM_H__ */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index d3cd8eac81e3..c64781cf649f 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -487,6 +487,8 @@
 
 /* Bits specific to the MIPS32/64 PRA.	*/
 #define MIPS_CONF_MT		(_ULCAST_(7) <<	 7)
+#define MIPS_CONF_MT_TLB	(_ULCAST_(1) <<  7)
+#define MIPS_CONF_MT_FTLB	(_ULCAST_(4) <<  7)
 #define MIPS_CONF_AR		(_ULCAST_(7) << 10)
 #define MIPS_CONF_AT		(_ULCAST_(3) << 13)
 #define MIPS_CONF_M		(_ULCAST_(1) << 31)
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index cfcb876cae6b..97c03f468924 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -61,6 +61,12 @@
  */
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
 
 #define MADV_NORMAL	0		/* no further special treatment */
 #define MADV_RANDOM	1		/* expect random page references */
diff --git a/arch/mips/include/uapi/asm/swab.h b/arch/mips/include/uapi/asm/swab.h
index c4ddc4f0d2dc..23cd9b118c9e 100644
--- a/arch/mips/include/uapi/asm/swab.h
+++ b/arch/mips/include/uapi/asm/swab.h
@@ -13,16 +13,15 @@
 
 #define __SWAB_64_THRU_32__
 
-#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) ||		\
-    defined(_MIPS_ARCH_LOONGSON3A)
+#if !defined(__mips16) &&					\
+	((defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) ||	\
+	 defined(_MIPS_ARCH_LOONGSON3A))
 
-static inline __attribute__((nomips16)) __attribute_const__
-		__u16 __arch_swab16(__u16 x)
+static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
 {
 	__asm__(
 	"	.set	push			\n"
 	"	.set	arch=mips32r2		\n"
-	"	.set	nomips16		\n"
 	"	wsbh	%0, %1			\n"
 	"	.set	pop			\n"
 	: "=r" (x)
@@ -32,13 +31,11 @@ static inline __attribute__((nomips16)) __attribute_const__
 }
 #define __arch_swab16 __arch_swab16
 
-static inline __attribute__((nomips16)) __attribute_const__
-		__u32 __arch_swab32(__u32 x)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 {
 	__asm__(
 	"	.set	push			\n"
 	"	.set	arch=mips32r2		\n"
-	"	.set	nomips16		\n"
 	"	wsbh	%0, %1			\n"
 	"	rotr	%0, %0, 16		\n"
 	"	.set	pop			\n"
@@ -54,13 +51,11 @@ static inline __attribute__((nomips16)) __attribute_const__
  * 64-bit kernel on r2 CPUs.
  */
 #ifdef __mips64
-static inline __attribute__((nomips16)) __attribute_const__
-		__u64 __arch_swab64(__u64 x)
+static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
 {
 	__asm__(
 	"	.set	push			\n"
 	"	.set	arch=mips64r2		\n"
-	"	.set	nomips16		\n"
 	"	dsbh	%0, %1			\n"
 	"	dshd	%0, %0			\n"
 	"	.set	pop			\n"
@@ -71,5 +66,5 @@ static inline __attribute__((nomips16)) __attribute_const__
 }
 #define __arch_swab64 __arch_swab64
 #endif /* __mips64 */
-#endif /* MIPS R2 or newer or Loongson 3A */
+#endif /* (not __mips16) and (MIPS R2 or newer or Loongson 3A) */
 #endif /* _ASM_SWAB_H */
diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index c03088f9f514..cfabadb135d9 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -377,16 +377,18 @@
 #define __NR_memfd_create		(__NR_Linux + 354)
 #define __NR_bpf			(__NR_Linux + 355)
 #define __NR_execveat			(__NR_Linux + 356)
+#define __NR_userfaultfd		(__NR_Linux + 357)
+#define __NR_membarrier			(__NR_Linux + 358)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		356
+#define __NR_Linux_syscalls		358
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		356
+#define __NR_O32_Linux_syscalls		358
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -711,16 +713,18 @@
 #define __NR_memfd_create		(__NR_Linux + 314)
 #define __NR_bpf			(__NR_Linux + 315)
 #define __NR_execveat			(__NR_Linux + 316)
+#define __NR_userfaultfd		(__NR_Linux + 317)
+#define __NR_membarrier			(__NR_Linux + 318)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		316
+#define __NR_Linux_syscalls		318
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		316
+#define __NR_64_Linux_syscalls		318
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1049,15 +1053,17 @@
 #define __NR_memfd_create		(__NR_Linux + 318)
 #define __NR_bpf			(__NR_Linux + 319)
 #define __NR_execveat			(__NR_Linux + 320)
+#define __NR_userfaultfd		(__NR_Linux + 321)
+#define __NR_membarrier			(__NR_Linux + 322)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		320
+#define __NR_Linux_syscalls		322
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		320
+#define __NR_N32_Linux_syscalls		322
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 4e62bf85d0b0..459cb017306c 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -26,6 +26,7 @@
 #include <linux/power/jz4740-battery.h>
 #include <linux/power/gpio-charger.h>
 
+#include <asm/mach-jz4740/gpio.h>
 #include <asm/mach-jz4740/jz4740_fb.h>
 #include <asm/mach-jz4740/jz4740_mmc.h>
 #include <asm/mach-jz4740/jz4740_nand.h>
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index a74e181058b0..8c6d76c9b2d6 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -28,6 +28,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/mach-jz4740/base.h>
+#include <asm/mach-jz4740/gpio.h>
 
 #define JZ4740_GPIO_BASE_A (32*0)
 #define JZ4740_GPIO_BASE_B (32*1)
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 9f71c06aebf6..209ded16806b 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -39,6 +39,7 @@
 	 mfc0	\dest, CP0_CONFIG, 3
 	andi	\dest, \dest, MIPS_CONF3_MT
 	beqz	\dest, \nomt
+	 nop
 	.endm
 
 .section .text.cps-vec
@@ -223,10 +224,9 @@ LEAF(excep_ejtag)
 	END(excep_ejtag)
 
 LEAF(mips_cps_core_init)
-#ifdef CONFIG_MIPS_MT
+#ifdef CONFIG_MIPS_MT_SMP
 	/* Check that the core implements the MT ASE */
 	has_mt	t0, 3f
-	 nop
 
 	.set	push
 	.set	mips64r2
@@ -310,8 +310,9 @@ LEAF(mips_cps_boot_vpes)
 	PTR_ADDU t0, t0, t1
 
 	/* Calculate this VPEs ID. If the core doesn't support MT use 0 */
+	li	t9, 0
+#ifdef CONFIG_MIPS_MT_SMP
 	has_mt	ta2, 1f
-	 li	t9, 0
 
 	/* Find the number of VPEs present in the core */
 	mfc0	t1, CP0_MVPCONF0
@@ -330,6 +331,7 @@ LEAF(mips_cps_boot_vpes)
 	/* Retrieve the VPE ID from EBase.CPUNum */
 	mfc0	t9, $15, 1
 	and	t9, t9, t1
+#endif
 
 1:	/* Calculate a pointer to this VPEs struct vpe_boot_config */
 	li	t1, VPEBOOTCFG_SIZE
@@ -337,7 +339,7 @@ LEAF(mips_cps_boot_vpes)
 	PTR_L	ta3, COREBOOTCFG_VPECONFIG(t0)
 	PTR_ADDU v0, v0, ta3
 
-#ifdef CONFIG_MIPS_MT
+#ifdef CONFIG_MIPS_MT_SMP
 
 	/* If the core doesn't support MT then return */
 	bnez	ta2, 1f
@@ -451,7 +453,7 @@ LEAF(mips_cps_boot_vpes)
 
 2:	.set	pop
 
-#endif /* CONFIG_MIPS_MT */
+#endif /* CONFIG_MIPS_MT_SMP */
 
 	/* Return */
 	jr	ra
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 571a8e6ea5bd..09a51d091941 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -410,16 +410,18 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable)
 static inline unsigned int decode_config0(struct cpuinfo_mips *c)
 {
 	unsigned int config0;
-	int isa;
+	int isa, mt;
 
 	config0 = read_c0_config();
 
 	/*
 	 * Look for Standard TLB or Dual VTLB and FTLB
 	 */
-	if ((((config0 & MIPS_CONF_MT) >> 7) == 1) ||
-	    (((config0 & MIPS_CONF_MT) >> 7) == 4))
+	mt = config0 & MIPS_CONF_MT;
+	if (mt == MIPS_CONF_MT_TLB)
 		c->options |= MIPS_CPU_TLB;
+	else if (mt == MIPS_CONF_MT_FTLB)
+		c->options |= MIPS_CPU_TLB | MIPS_CPU_FTLB;
 
 	isa = (config0 & MIPS_CONF_AT) >> 13;
 	switch (isa) {
@@ -559,15 +561,18 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c)
 	if (cpu_has_tlb) {
 		if (((config4 & MIPS_CONF4_IE) >> 29) == 2)
 			c->options |= MIPS_CPU_TLBINV;
+
 		/*
-		 * This is a bit ugly. R6 has dropped that field from
-		 * config4 and the only valid configuration is VTLB+FTLB so
-		 * set a good value for mmuextdef for that case.
+		 * R6 has dropped the MMUExtDef field from config4.
+		 * On R6 the fields always describe the FTLB, and only if it is
+		 * present according to Config.MT.
 		 */
-		if (cpu_has_mips_r6)
+		if (!cpu_has_mips_r6)
+			mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF;
+		else if (cpu_has_ftlb)
 			mmuextdef = MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT;
 		else
-			mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF;
+			mmuextdef = 0;
 
 		switch (mmuextdef) {
 		case MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT:
diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S
index 423ae83af1fb..3375745b9198 100644
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@@ -18,7 +18,7 @@
 	.set pop
 /*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti, int usedfpu)
+ *		       struct thread_info *next_ti)
  */
 	.align	7
 	LEAF(resume)
@@ -28,30 +28,6 @@
 	cpu_save_nonscratch a0
 	LONG_S	ra, THREAD_REG31(a0)
 
-	/*
-	 * check if we need to save FPU registers
-	 */
-	.set push
-	.set noreorder
-	beqz	a3, 1f
-	 PTR_L	t3, TASK_THREAD_INFO(a0)
-	.set pop
-
-	/*
-	 * clear saved user stack CU1 bit
-	 */
-	LONG_L	t0, ST_OFF(t3)
-	li	t1, ~ST0_CU1
-	and	t0, t0, t1
-	LONG_S	t0, ST_OFF(t3)
-
-	.set push
-	.set arch=mips64r2
-	fpu_save_double a0 t0 t1		# c0_status passed in t0
-						# clobbers t1
-	.set pop
-1:
-
 #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
 	/* Check if we need to store CVMSEG state */
 	dmfc0	t0, $11,7	/* CvmMemCtl */
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 5087a4b72e6b..ac27ef7d4d0e 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -31,18 +31,8 @@
 #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
 
 /*
- * FPU context is saved iff the process has used it's FPU in the current
- * time slice as indicated by TIF_USEDFPU.  In any case, the CU1 bit for user
- * space STATUS register should be 0, so that a process *always* starts its
- * userland with FPU disabled after each context switch.
- *
- * FPU will be enabled as soon as the process accesses FPU again, through
- * do_cpu() trap.
- */
-
-/*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti, int usedfpu)
+ *		       struct thread_info *next_ti)
  */
 LEAF(resume)
 	mfc0	t1, CP0_STATUS
@@ -50,22 +40,6 @@ LEAF(resume)
 	cpu_save_nonscratch a0
 	sw	ra, THREAD_REG31(a0)
 
-	beqz	a3, 1f
-
-	PTR_L	t3, TASK_THREAD_INFO(a0)
-
-	/*
-	 * clear saved user stack CU1 bit
-	 */
-	lw	t0, ST_OFF(t3)
-	li	t1, ~ST0_CU1
-	and	t0, t0, t1
-	sw	t0, ST_OFF(t3)
-
-	fpu_save_single a0, t0			# clobbers t0
-
-1:
-
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	PTR_LA	t8, __stack_chk_guard
 	LONG_L	t9, TASK_STACK_CANARY(a1)
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 4cc13508d967..65a74e4f0f45 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -36,16 +36,8 @@ NESTED(handle_sys, PT_SIZE, sp)
 	lw	t1, PT_EPC(sp)		# skip syscall on return
 
 	subu	v0, v0, __NR_O32_Linux	# check syscall number
-	sltiu	t0, v0, __NR_O32_Linux_syscalls + 1
 	addiu	t1, 4			# skip to next instruction
 	sw	t1, PT_EPC(sp)
-	beqz	t0, illegal_syscall
-
-	sll	t0, v0, 2
-	la	t1, sys_call_table
-	addu	t1, t0
-	lw	t2, (t1)		# syscall routine
-	beqz	t2, illegal_syscall
 
 	sw	a3, PT_R26(sp)		# save a3 for syscall restarting
 
@@ -96,6 +88,16 @@ loads_done:
 	li	t1, _TIF_WORK_SYSCALL_ENTRY
 	and	t0, t1
 	bnez	t0, syscall_trace_entry # -> yes
+syscall_common:
+	sltiu	t0, v0, __NR_O32_Linux_syscalls + 1
+	beqz	t0, illegal_syscall
+
+	sll	t0, v0, 2
+	la	t1, sys_call_table
+	addu	t1, t0
+	lw	t2, (t1)		# syscall routine
+
+	beqz	t2, illegal_syscall
 
 	jalr	t2			# Do The Real Thing (TM)
 
@@ -116,7 +118,7 @@ o32_syscall_exit:
 
 syscall_trace_entry:
 	SAVE_STATIC
-	move	s0, t2
+	move	s0, v0
 	move	a0, sp
 
 	/*
@@ -129,27 +131,18 @@ syscall_trace_entry:
 
 1:	jal	syscall_trace_enter
 
-	bltz	v0, 2f			# seccomp failed? Skip syscall
+	bltz	v0, 1f			# seccomp failed? Skip syscall
+
+	move	v0, s0			# restore syscall
 
-	move	t0, s0
 	RESTORE_STATIC
 	lw	a0, PT_R4(sp)		# Restore argument registers
 	lw	a1, PT_R5(sp)
 	lw	a2, PT_R6(sp)
 	lw	a3, PT_R7(sp)
-	jalr	t0
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sw	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	lw	t1, PT_R2(sp)		# syscall number
-	negu	v0			# error
-	sw	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sw	v0, PT_R2(sp)		# result
+	j	syscall_common
 
-2:	j	syscall_exit
+1:	j	syscall_exit
 
 /* ------------------------------------------------------------------------ */
 
@@ -599,3 +592,5 @@ EXPORT(sys_call_table)
 	PTR	sys_memfd_create
 	PTR	sys_bpf				/* 4355 */
 	PTR	sys_execveat
+	PTR	sys_userfaultfd
+	PTR	sys_membarrier
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index a6f6b762c47a..e732981cf99f 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -39,18 +39,11 @@ NESTED(handle_sys64, PT_SIZE, sp)
 	.set	at
 #endif
 
-	dsubu	t0, v0, __NR_64_Linux	# check syscall number
-	sltiu	t0, t0, __NR_64_Linux_syscalls + 1
 #if !defined(CONFIG_MIPS32_O32) && !defined(CONFIG_MIPS32_N32)
 	ld	t1, PT_EPC(sp)		# skip syscall on return
 	daddiu	t1, 4			# skip to next instruction
 	sd	t1, PT_EPC(sp)
 #endif
-	beqz	t0, illegal_syscall
-
-	dsll	t0, v0, 3		# offset into table
-	ld	t2, (sys_call_table - (__NR_64_Linux * 8))(t0)
-					# syscall routine
 
 	sd	a3, PT_R26(sp)		# save a3 for syscall restarting
 
@@ -59,6 +52,17 @@ NESTED(handle_sys64, PT_SIZE, sp)
 	and	t0, t1, t0
 	bnez	t0, syscall_trace_entry
 
+syscall_common:
+	dsubu	t2, v0, __NR_64_Linux
+	sltiu   t0, t2, __NR_64_Linux_syscalls + 1
+	beqz	t0, illegal_syscall
+
+	dsll	t0, t2, 3		# offset into table
+	dla	t2, sys_call_table
+	daddu	t0, t2, t0
+	ld	t2, (t0)		# syscall routine
+	beqz	t2, illegal_syscall
+
 	jalr	t2			# Do The Real Thing (TM)
 
 	li	t0, -EMAXERRNO - 1	# error?
@@ -78,14 +82,14 @@ n64_syscall_exit:
 
 syscall_trace_entry:
 	SAVE_STATIC
-	move	s0, t2
+	move	s0, v0
 	move	a0, sp
 	move	a1, v0
 	jal	syscall_trace_enter
 
-	bltz	v0, 2f			# seccomp failed? Skip syscall
+	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	t0, s0
+	move	v0, s0
 	RESTORE_STATIC
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
@@ -93,19 +97,9 @@ syscall_trace_entry:
 	ld	a3, PT_R7(sp)
 	ld	a4, PT_R8(sp)
 	ld	a5, PT_R9(sp)
-	jalr	t0
-
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
+	j	syscall_common
 
-2:	j	syscall_exit
+1:	j	syscall_exit
 
 illegal_syscall:
 	/* This also isn't a 64-bit syscall, throw an error.  */
@@ -436,4 +430,6 @@ EXPORT(sys_call_table)
 	PTR	sys_memfd_create
 	PTR	sys_bpf				/* 5315 */
 	PTR	sys_execveat
+	PTR	sys_userfaultfd
+	PTR	sys_membarrier
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 4b2010654c46..c79484397584 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -52,6 +52,7 @@ NESTED(handle_sysn32, PT_SIZE, sp)
 	and	t0, t1, t0
 	bnez	t0, n32_syscall_trace_entry
 
+syscall_common:
 	jalr	t2			# Do The Real Thing (TM)
 
 	li	t0, -EMAXERRNO - 1	# error?
@@ -75,9 +76,9 @@ n32_syscall_trace_entry:
 	move	a1, v0
 	jal	syscall_trace_enter
 
-	bltz	v0, 2f			# seccomp failed? Skip syscall
+	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	t0, s0
+	move	t2, s0
 	RESTORE_STATIC
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
@@ -85,19 +86,9 @@ n32_syscall_trace_entry:
 	ld	a3, PT_R7(sp)
 	ld	a4, PT_R8(sp)
 	ld	a5, PT_R9(sp)
-	jalr	t0
+	j	syscall_common
 
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
-
-2:	j	syscall_exit
+1:	j	syscall_exit
 
 not_n32_scall:
 	/* This is not an n32 compatibility syscall, pass it on to
@@ -429,4 +420,6 @@ EXPORT(sysn32_call_table)
 	PTR	sys_memfd_create
 	PTR	sys_bpf
 	PTR	compat_sys_execveat		/* 6320 */
+	PTR	sys_userfaultfd
+	PTR	sys_membarrier
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f543ff4feef9..6369cfd390c6 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -87,6 +87,7 @@ loads_done:
 	and	t0, t1, t0
 	bnez	t0, trace_a_syscall
 
+syscall_common:
 	jalr	t2			# Do The Real Thing (TM)
 
 	li	t0, -EMAXERRNO - 1	# error?
@@ -130,9 +131,9 @@ trace_a_syscall:
 
 1:	jal	syscall_trace_enter
 
-	bltz	v0, 2f			# seccomp failed? Skip syscall
+	bltz	v0, 1f			# seccomp failed? Skip syscall
 
-	move	t0, s0
+	move	t2, s0
 	RESTORE_STATIC
 	ld	a0, PT_R4(sp)		# Restore argument registers
 	ld	a1, PT_R5(sp)
@@ -142,19 +143,9 @@ trace_a_syscall:
 	ld	a5, PT_R9(sp)
 	ld	a6, PT_R10(sp)
 	ld	a7, PT_R11(sp)		# For indirect syscalls
-	jalr	t0
+	j	syscall_common
 
-	li	t0, -EMAXERRNO - 1	# error?
-	sltu	t0, t0, v0
-	sd	t0, PT_R7(sp)		# set error flag
-	beqz	t0, 1f
-
-	ld	t1, PT_R2(sp)		# syscall number
-	dnegu	v0			# error
-	sd	t1, PT_R0(sp)		# save it for syscall restarting
-1:	sd	v0, PT_R2(sp)		# result
-
-2:	j	syscall_exit
+1:	j	syscall_exit
 
 /* ------------------------------------------------------------------------ */
 
@@ -584,4 +575,6 @@ EXPORT(sys32_call_table)
 	PTR	sys_memfd_create
 	PTR	sys_bpf				/* 4355 */
 	PTR	compat_sys_execveat
+	PTR	sys_userfaultfd
+	PTR	sys_membarrier
 	.size	sys32_call_table,.-sys32_call_table
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 35b8316002f8..479515109e5b 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -338,7 +338,7 @@ static void __init bootmem_init(void)
 		if (end <= reserved_end)
 			continue;
 #ifdef CONFIG_BLK_DEV_INITRD
-		/* mapstart should be after initrd_end */
+		/* Skip zones before initrd and initrd itself */
 		if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
 			continue;
 #endif
@@ -371,6 +371,14 @@ static void __init bootmem_init(void)
 		max_low_pfn = PFN_DOWN(HIGHMEM_START);
 	}
 
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * mapstart should be after initrd_end
+	 */
+	if (initrd_end)
+		mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end)));
+#endif
+
 	/*
 	 * Initialize the boot-time allocator with low memory only.
 	 */
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index a31896c33716..bd4385a8e6e8 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -42,6 +42,7 @@
 #include <asm/mmu_context.h>
 #include <asm/time.h>
 #include <asm/setup.h>
+#include <asm/maar.h>
 
 cpumask_t cpu_callin_map;		/* Bitmask of started secondaries */
 
@@ -157,6 +158,7 @@ asmlinkage void start_secondary(void)
 	mips_clockevent_init();
 	mp_ops->init_secondary();
 	cpu_report();
+	maar_init();
 
 	/*
 	 * XXX parity protection should be folded in here when it's converted
diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c
index f6c44dd332e2..d6d07ad56180 100644
--- a/arch/mips/loongson64/common/env.c
+++ b/arch/mips/loongson64/common/env.c
@@ -64,6 +64,9 @@ void __init prom_init_env(void)
 	}
 	if (memsize == 0)
 		memsize = 256;
+
+	loongson_sysconf.nr_uarts = 1;
+
 	pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize);
 #else
 	struct boot_params *boot_p;
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index a914dc1cb6d1..d8117be729a2 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
 	else
 #endif
 #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32)
-	     if (dev->coherent_dma_mask < DMA_BIT_MASK(64))
+	     if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8))
 		dma_flag = __GFP_DMA;
 	else
 #endif
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 66d0f49c5bec..8770e619185e 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -44,6 +44,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
+#include <asm/maar.h>
 
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
@@ -252,6 +253,119 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 #endif
 }
 
+unsigned __weak platform_maar_init(unsigned num_pairs)
+{
+	struct maar_config cfg[BOOT_MEM_MAP_MAX];
+	unsigned i, num_configured, num_cfg = 0;
+	phys_addr_t skip;
+
+	for (i = 0; i < boot_mem_map.nr_map; i++) {
+		switch (boot_mem_map.map[i].type) {
+		case BOOT_MEM_RAM:
+		case BOOT_MEM_INIT_RAM:
+			break;
+		default:
+			continue;
+		}
+
+		skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff);
+
+		cfg[num_cfg].lower = boot_mem_map.map[i].addr;
+		cfg[num_cfg].lower += skip;
+
+		cfg[num_cfg].upper = cfg[num_cfg].lower;
+		cfg[num_cfg].upper += boot_mem_map.map[i].size - 1;
+		cfg[num_cfg].upper -= skip;
+
+		cfg[num_cfg].attrs = MIPS_MAAR_S;
+		num_cfg++;
+	}
+
+	num_configured = maar_config(cfg, num_cfg, num_pairs);
+	if (num_configured < num_cfg)
+		pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n",
+			num_pairs, num_cfg);
+
+	return num_configured;
+}
+
+void maar_init(void)
+{
+	unsigned num_maars, used, i;
+	phys_addr_t lower, upper, attr;
+	static struct {
+		struct maar_config cfgs[3];
+		unsigned used;
+	} recorded = { { { 0 } }, 0 };
+
+	if (!cpu_has_maar)
+		return;
+
+	/* Detect the number of MAARs */
+	write_c0_maari(~0);
+	back_to_back_c0_hazard();
+	num_maars = read_c0_maari() + 1;
+
+	/* MAARs should be in pairs */
+	WARN_ON(num_maars % 2);
+
+	/* Set MAARs using values we recorded already */
+	if (recorded.used) {
+		used = maar_config(recorded.cfgs, recorded.used, num_maars / 2);
+		BUG_ON(used != recorded.used);
+	} else {
+		/* Configure the required MAARs */
+		used = platform_maar_init(num_maars / 2);
+	}
+
+	/* Disable any further MAARs */
+	for (i = (used * 2); i < num_maars; i++) {
+		write_c0_maari(i);
+		back_to_back_c0_hazard();
+		write_c0_maar(0);
+		back_to_back_c0_hazard();
+	}
+
+	if (recorded.used)
+		return;
+
+	pr_info("MAAR configuration:\n");
+	for (i = 0; i < num_maars; i += 2) {
+		write_c0_maari(i);
+		back_to_back_c0_hazard();
+		upper = read_c0_maar();
+
+		write_c0_maari(i + 1);
+		back_to_back_c0_hazard();
+		lower = read_c0_maar();
+
+		attr = lower & upper;
+		lower = (lower & MIPS_MAAR_ADDR) << 4;
+		upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
+
+		pr_info("  [%d]: ", i / 2);
+		if (!(attr & MIPS_MAAR_V)) {
+			pr_cont("disabled\n");
+			continue;
+		}
+
+		pr_cont("%pa-%pa", &lower, &upper);
+
+		if (attr & MIPS_MAAR_S)
+			pr_cont(" speculate");
+
+		pr_cont("\n");
+
+		/* Record the setup for use on secondary CPUs */
+		if (used <= ARRAY_SIZE(recorded.cfgs)) {
+			recorded.cfgs[recorded.used].lower = lower;
+			recorded.cfgs[recorded.used].upper = upper;
+			recorded.cfgs[recorded.used].attrs = attr;
+			recorded.used++;
+		}
+	}
+}
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 int page_is_ram(unsigned long pagenr)
 {
@@ -334,69 +448,6 @@ static inline void mem_init_free_highmem(void)
 #endif
 }
 
-unsigned __weak platform_maar_init(unsigned num_pairs)
-{
-	struct maar_config cfg[BOOT_MEM_MAP_MAX];
-	unsigned i, num_configured, num_cfg = 0;
-	phys_addr_t skip;
-
-	for (i = 0; i < boot_mem_map.nr_map; i++) {
-		switch (boot_mem_map.map[i].type) {
-		case BOOT_MEM_RAM:
-		case BOOT_MEM_INIT_RAM:
-			break;
-		default:
-			continue;
-		}
-
-		skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff);
-
-		cfg[num_cfg].lower = boot_mem_map.map[i].addr;
-		cfg[num_cfg].lower += skip;
-
-		cfg[num_cfg].upper = cfg[num_cfg].lower;
-		cfg[num_cfg].upper += boot_mem_map.map[i].size - 1;
-		cfg[num_cfg].upper -= skip;
-
-		cfg[num_cfg].attrs = MIPS_MAAR_S;
-		num_cfg++;
-	}
-
-	num_configured = maar_config(cfg, num_cfg, num_pairs);
-	if (num_configured < num_cfg)
-		pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n",
-			num_pairs, num_cfg);
-
-	return num_configured;
-}
-
-static void maar_init(void)
-{
-	unsigned num_maars, used, i;
-
-	if (!cpu_has_maar)
-		return;
-
-	/* Detect the number of MAARs */
-	write_c0_maari(~0);
-	back_to_back_c0_hazard();
-	num_maars = read_c0_maari() + 1;
-
-	/* MAARs should be in pairs */
-	WARN_ON(num_maars % 2);
-
-	/* Configure the required MAARs */
-	used = platform_maar_init(num_maars / 2);
-
-	/* Disable any further MAARs */
-	for (i = (used * 2); i < num_maars; i++) {
-		write_c0_maari(i);
-		back_to_back_c0_hazard();
-		write_c0_maar(0);
-		back_to_back_c0_hazard();
-	}
-}
-
 void __init mem_init(void)
 {
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
index 2e52cbd20ceb..7a584e0bf933 100644
--- a/arch/mips/mti-sead3/Makefile
+++ b/arch/mips/mti-sead3/Makefile
@@ -12,6 +12,4 @@ obj-y				:= sead3-lcd.o sead3-display.o sead3-init.o \
 				   sead3-int.o sead3-platform.o sead3-reset.o \
 				   sead3-setup.o sead3-time.o
 
-obj-y				+= leds-sead3.o
-
 obj-$(CONFIG_EARLY_PRINTK)	+= sead3-console.o
diff --git a/arch/mips/mti-sead3/leds-sead3.c b/arch/mips/mti-sead3/leds-sead3.c
deleted file mode 100644
index c938ceeb8848..000000000000
--- a/arch/mips/mti-sead3/leds-sead3.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- * Copyright (C) 2015 Imagination Technologies, Inc.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/leds.h>
-#include <linux/err.h>
-#include <linux/io.h>
-
-#include <asm/mips-boards/sead3-addr.h>
-
-static void sead3_pled_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
-{
-	writel(value, (void __iomem *)SEAD3_CPLD_P_LED);
-}
-
-static void sead3_fled_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
-{
-	writel(value, (void __iomem *)SEAD3_CPLD_F_LED);
-}
-
-static struct led_classdev sead3_pled = {
-	.name		= "sead3::pled",
-	.brightness_set = sead3_pled_set,
-	.flags		= LED_CORE_SUSPENDRESUME,
-};
-
-static struct led_classdev sead3_fled = {
-	.name		= "sead3::fled",
-	.brightness_set = sead3_fled_set,
-	.flags		= LED_CORE_SUSPENDRESUME,
-};
-
-static int sead3_led_probe(struct platform_device *pdev)
-{
-	int ret;
-
-	ret = led_classdev_register(&pdev->dev, &sead3_pled);
-	if (ret < 0)
-		return ret;
-
-	ret = led_classdev_register(&pdev->dev, &sead3_fled);
-	if (ret < 0)
-		led_classdev_unregister(&sead3_pled);
-
-	return ret;
-}
-
-static int sead3_led_remove(struct platform_device *pdev)
-{
-	led_classdev_unregister(&sead3_pled);
-	led_classdev_unregister(&sead3_fled);
-	return 0;
-}
-
-static struct platform_driver sead3_led_driver = {
-	.probe		= sead3_led_probe,
-	.remove		= sead3_led_remove,
-	.driver		= {
-		.name		= "sead3-led",
-	},
-};
-
-module_platform_driver(sead3_led_driver);
-
-MODULE_AUTHOR("Kristian Kielhofner <kris@krisk.org>");
-MODULE_DESCRIPTION("SEAD3 LED driver");
-MODULE_LICENSE("GPL");
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0c4a133f6216..77cb27309db2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1251,7 +1251,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
 
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = true;
+	fp->jited = 1;
 
 out:
 	kfree(ctx.offsets);
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index e92726099be0..5d2e0c8d29c0 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -57,15 +57,28 @@
 
 LEAF(sk_load_word)
 	is_offset_negative(word)
-	.globl sk_load_word_positive
-sk_load_word_positive:
+FEXPORT(sk_load_word_positive)
 	is_offset_in_header(4, word)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
+	.set	reorder
 	lw	$r_A, 0(t1)
+	.set	noreorder
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
 	wsbh	t0, $r_A
 	rotr	$r_A, t0, 16
+# else
+	sll	t0, $r_A, 24
+	srl	t1, $r_A, 24
+	srl	t2, $r_A, 8
+	or	t0, t0, t1
+	andi	t2, t2, 0xff00
+	andi	t1, $r_A, 0xff00
+	or	t0, t0, t2
+	sll	t1, t1, 8
+	or	$r_A, t0, t1
+# endif
 #endif
 	jr	$r_ra
 	 move	$r_ret, zero
@@ -73,15 +86,24 @@ sk_load_word_positive:
 
 LEAF(sk_load_half)
 	is_offset_negative(half)
-	.globl sk_load_half_positive
-sk_load_half_positive:
+FEXPORT(sk_load_half_positive)
 	is_offset_in_header(2, half)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
+	.set	reorder
 	lh	$r_A, 0(t1)
+	.set	noreorder
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
 	wsbh	t0, $r_A
 	seh	$r_A, t0
+# else
+	sll	t0, $r_A, 24
+	andi	t1, $r_A, 0xff00
+	sra	t0, t0, 16
+	srl	t1, t1, 8
+	or	$r_A, t0, t1
+# endif
 #endif
 	jr	$r_ra
 	 move	$r_ret, zero
@@ -89,8 +111,7 @@ sk_load_half_positive:
 
 LEAF(sk_load_byte)
 	is_offset_negative(byte)
-	.globl sk_load_byte_positive
-sk_load_byte_positive:
+FEXPORT(sk_load_byte_positive)
 	is_offset_in_header(1, byte)
 	/* Offset within header boundaries */
 	PTR_ADDU t1, $r_skb_data, offset
@@ -148,23 +169,47 @@ sk_load_byte_positive:
 NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(4)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
 	wsbh	t0, $r_s0
 	jr	$r_ra
 	 rotr	$r_A, t0, 16
-#endif
+# else
+	sll	t0, $r_s0, 24
+	srl	t1, $r_s0, 24
+	srl	t2, $r_s0, 8
+	or	t0, t0, t1
+	andi	t2, t2, 0xff00
+	andi	t1, $r_s0, 0xff00
+	or	t0, t0, t2
+	sll	t1, t1, 8
+	jr	$r_ra
+	 or	$r_A, t0, t1
+# endif
+#else
 	jr	$r_ra
-	move	$r_A, $r_s0
+	 move	$r_A, $r_s0
+#endif
 
 	END(bpf_slow_path_word)
 
 NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(2)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
 	jr	$r_ra
 	 wsbh	$r_A, $r_s0
-#endif
+# else
+	sll	t0, $r_s0, 8
+	andi	t1, $r_s0, 0xff00
+	andi	t0, t0, 0xff00
+	srl	t1, t1, 8
+	jr	$r_ra
+	 or	$r_A, t0, t1
+# endif
+#else
 	jr	$r_ra
 	 move	$r_A, $r_s0
+#endif
 
 	END(bpf_slow_path_half)
 
diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c
index 8a377346f0ca..1022201b2beb 100644
--- a/arch/mips/pistachio/time.c
+++ b/arch/mips/pistachio/time.c
@@ -39,7 +39,7 @@ void __init plat_time_init(void)
 	struct clk *clk;
 
 	of_clk_init(NULL);
-	clocksource_of_init();
+	clocksource_probe();
 
 	np = of_get_cpu_node(0, NULL);
 	if (!np) {
diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c
index feb5a9bf98b4..25c4a61779f1 100644
--- a/arch/mips/ralink/clk.c
+++ b/arch/mips/ralink/clk.c
@@ -75,5 +75,5 @@ void __init plat_time_init(void)
 	pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
 	mips_hpt_frequency = clk_get_rate(clk) / 2;
 	clk_put(clk);
-	clocksource_of_init();
+	clocksource_probe();
 }
diff --git a/arch/mips/txx9/generic/spi_eeprom.c b/arch/mips/txx9/generic/spi_eeprom.c
index 3dbad99d5611..d833dd2c9b55 100644
--- a/arch/mips/txx9/generic/spi_eeprom.c
+++ b/arch/mips/txx9/generic/spi_eeprom.c
@@ -80,7 +80,6 @@ static int __init early_seeprom_probe(struct spi_device *spi)
 static struct spi_driver early_seeprom_driver __initdata = {
 	.driver = {
 		.name	= "at25",
-		.owner	= THIS_MODULE,
 	},
 	.probe	= early_seeprom_probe,
 };
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 6edb9ee6128e..1c8dd0f5cd5d 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 375e59140c9c..ce318d5ab23b 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -34,7 +34,7 @@
  *
  * Atomically reads the value of @v.  Note that the guaranteed
  */
-#define atomic_read(v)	(ACCESS_ONCE((v)->counter))
+#define atomic_read(v)	READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -43,7 +43,7 @@
  *
  * Atomically sets the value of @v to @i.  Note that the guaranteed
  */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 914864eb5a25..d63330e88379 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -61,4 +61,5 @@ generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index bbc3f9157f9c..e835dda2bfe2 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -324,7 +324,7 @@ void __init time_init(void)
 	if (count < 2)
 		panic("%d timer is found, it needs 2 timers in system\n", count);
 
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 2536965d00ea..1d109990a022 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -67,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 
 static __inline__ int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /* exported interface */
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 47f11c707b65..3d0e17bcc8e9 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -7,20 +7,12 @@
 
 
 /*
- * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have
- * 32-byte cachelines.  The default configuration is not for SMP anyway,
- * so if you're building for SMP, you should select the appropriate
- * processor type.  There is a potential livelock danger when running
- * a machine with this value set too small, but it's more probable you'll
- * just ruin performance.
+ * PA 2.0 processors have 64 and 128-byte L2 cachelines; PA 1.1 processors
+ * have 32-byte cachelines.  The L1 length appears to be 16 bytes but this
+ * is not clearly documented.
  */
-#ifdef CONFIG_PA20
-#define L1_CACHE_BYTES 64
-#define L1_CACHE_SHIFT 6
-#else
-#define L1_CACHE_BYTES 32
-#define L1_CACHE_SHIFT 5
-#endif
+#define L1_CACHE_BYTES 16
+#define L1_CACHE_SHIFT 4
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 294d251ca7b2..ecc3ae1ca28e 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -31,6 +31,9 @@
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
 
 #define MADV_NORMAL     0               /* no further special treatment */
 #define MADV_RANDOM     1               /* expect random page references */
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 2e639d7604f6..33170384d3ac 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -358,8 +358,10 @@
 #define __NR_memfd_create	(__NR_Linux + 340)
 #define __NR_bpf		(__NR_Linux + 341)
 #define __NR_execveat		(__NR_Linux + 342)
+#define __NR_membarrier		(__NR_Linux + 343)
+#define __NR_userfaultfd	(__NR_Linux + 344)
 
-#define __NR_Linux_syscalls	(__NR_execveat + 1)
+#define __NR_Linux_syscalls	(__NR_userfaultfd + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 8eefb12d1d33..78c3ef8c348d 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -438,6 +438,8 @@
 	ENTRY_SAME(memfd_create)	/* 340 */
 	ENTRY_SAME(bpf)
 	ENTRY_COMP(execveat)
+	ENTRY_SAME(membarrier)
+	ENTRY_SAME(userfaultfd)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
index 426bf4103b9e..5f51b7bfc064 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -224,10 +224,12 @@
 
 /include/ "pq3-etsec2-0.dtsi"
 	enet0: enet0_grp2: ethernet@b0000 {
+		fsl,wake-on-filer;
 	};
 
 /include/ "pq3-etsec2-1.dtsi"
 	enet1: enet1_grp2: ethernet@b1000 {
+		fsl,wake-on-filer;
 	};
 
 	global-utilities@e0000 {
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 6bc0ee4b1070..2c041b535a64 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -111,7 +111,7 @@ CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
 CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_ALUA=m
 CONFIG_ATA=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 7991f37e5fe2..36871a4bfa54 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -114,7 +114,7 @@ CONFIG_SCSI_QLA_FC=m
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_LPFC=m
 CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_ALUA=m
 CONFIG_ATA=y
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 0dc42c5082b7..5f8229e24fe6 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -3,7 +3,6 @@
 
 #ifdef __KERNEL__
 
-#include <asm/reg.h>
 
 /* bytes per L1 cache line */
 #if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
@@ -40,12 +39,6 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
-
-static inline void logmpp(u64 x)
-{
-	asm volatile(PPC_LOGMPP(R1) : : "r" (x));
-}
-
 #endif /* __powerpc64__ && ! __ASSEMBLY__ */
 
 #if defined(__ASSEMBLY__)
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 6330a61b875a..4852e849128b 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst)
 	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
 }
 
+static inline unsigned int get_tmrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
 static inline unsigned int get_rt(u32 inst)
 {
 	return (inst >> 21) & 0x1f;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 827a38d7a9db..cfa758c6b4f6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -297,8 +297,6 @@ struct kvmppc_vcore {
 	u32 arch_compat;
 	ulong pcr;
 	ulong dpdes;		/* doorbell state (POWER8) */
-	void *mpp_buffer; /* Micro Partition Prefetch buffer */
-	bool mpp_buffer_is_valid;
 	ulong conferring_threads;
 };
 
@@ -718,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index cab6753f1be5..3f191f573d4f 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -61,8 +61,13 @@ struct machdep_calls {
 					       unsigned long addr,
 					       unsigned char *hpte_slot_array,
 					       int psize, int ssize, int local);
-	/* special for kexec, to be called in real mode, linear mapping is
-	 * destroyed as well */
+	/*
+	 * Special for kexec.
+	 * To be called in real mode with interrupts disabled. No locks are
+	 * taken as such, concurrent access on pre POWER5 hardware could result
+	 * in a deadlock.
+	 * The linear mapping is destroyed as well.
+	 */
 	void		(*hpte_clear_all)(void);
 
 	void __iomem *	(*ioremap)(phys_addr_t addr, unsigned long size,
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 790f5d1d9a46..7ab04fc59e24 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -141,7 +141,6 @@
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
-#define PPC_INST_LOGMPP			0x7c0007e4
 #define PPC_INST_LSWI			0x7c0004aa
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
@@ -285,20 +284,6 @@
 #define __PPC_EH(eh)	0
 #endif
 
-/* POWER8 Micro Partition Prefetch (MPP) parameters */
-/* Address mask is common for LOGMPP instruction and MPPR SPR */
-#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL
-
-/* Bits 60 and 61 of MPP SPR should be set to one of the following */
-/* Aborting the fetch is indeed setting 00 in the table size bits */
-#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60)
-#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60)
-
-/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */
-#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54)
-#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54)
-#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54)
-
 /* Deal with instructions that older assemblers aren't aware of */
 #define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_INST_DCBAL | \
 					__PPC_RA(a) | __PPC_RB(b))
@@ -307,8 +292,6 @@
 #define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LDARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_LOGMPP(b)		stringify_in_c(.long PPC_INST_LOGMPP | \
-					__PPC_RB(b))
 #define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LWARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index aa1cc5f015ee..a908ada8e0a5 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -226,7 +226,6 @@
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DAWR	0xB4
-#define SPRN_MPPR	0xB8	/* Micro Partition Prefetch Register */
 #define SPRN_RPR	0xBA	/* Relative Priority Register */
 #define SPRN_CIABR	0xBB
 #define   CIABR_PRIV		0x3
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 16547efa2d5a..2fef74b474f0 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -742,6 +742,12 @@
 #define MMUBE1_VBE4		0x00000002
 #define MMUBE1_VBE5		0x00000001
 
+#define TMRN_TMCFG0      16	/* Thread Management Configuration Register 0 */
+#define TMRN_TMCFG0_NPRIBITS       0x003f0000 /* Bits of thread priority */
+#define TMRN_TMCFG0_NPRIBITS_SHIFT 16
+#define TMRN_TMCFG0_NATHRD         0x00003f00 /* Number of active threads */
+#define TMRN_TMCFG0_NATHRD_SHIFT   8
+#define TMRN_TMCFG0_NTHRD          0x0000003f /* Number of threads */
 #define TMRN_IMSR0	0x120	/* Initial MSR Register 0 (e6500) */
 #define TMRN_IMSR1	0x121	/* Initial MSR Register 1 (e6500) */
 #define TMRN_INIA0	0x140	/* Next Instruction Address Register 0 */
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 5b3a903adae6..e4396a7d0f7c 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -40,6 +40,11 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 	return (val + c->high_bits) & ~rhs;
 }
 
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+	return ~1ul << __fls(mask);
+}
+
 #else
 
 #ifdef CONFIG_64BIT
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 6ea26df0a73c..03c06ba7464f 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -22,6 +22,7 @@
 
 #define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
 #define MCL_FUTURE      0x4000          /* lock all additions to address space */
+#define MCL_ONFAULT	0x8000		/* lock all pages that are faulted in */
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 59503ed98e5f..3f1472a78f39 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -303,7 +303,7 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 	dev->coherent_dma_mask = mask;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dma_set_coherent_mask);
+EXPORT_SYMBOL(dma_set_coherent_mask);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
 
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 15099c41622e..92dea8df6b26 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1425,27 +1425,45 @@ static void __init prom_instantiate_sml(void)
 {
 	phandle ibmvtpm_node;
 	ihandle ibmvtpm_inst;
-	u32 entry = 0, size = 0;
+	u32 entry = 0, size = 0, succ = 0;
 	u64 base;
+	__be32 val;
 
 	prom_debug("prom_instantiate_sml: start...\n");
 
-	ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/ibm,vtpm"));
+	ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/vdevice/vtpm"));
 	prom_debug("ibmvtpm_node: %x\n", ibmvtpm_node);
 	if (!PHANDLE_VALID(ibmvtpm_node))
 		return;
 
-	ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/ibm,vtpm"));
+	ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/vdevice/vtpm"));
 	if (!IHANDLE_VALID(ibmvtpm_inst)) {
 		prom_printf("opening vtpm package failed (%x)\n", ibmvtpm_inst);
 		return;
 	}
 
-	if (call_prom_ret("call-method", 2, 2, &size,
-			  ADDR("sml-get-handover-size"),
-			  ibmvtpm_inst) != 0 || size == 0) {
-		prom_printf("SML get handover size failed\n");
-		return;
+	if (prom_getprop(ibmvtpm_node, "ibm,sml-efi-reformat-supported",
+			 &val, sizeof(val)) != PROM_ERROR) {
+		if (call_prom_ret("call-method", 2, 2, &succ,
+				  ADDR("reformat-sml-to-efi-alignment"),
+				  ibmvtpm_inst) != 0 || succ == 0) {
+			prom_printf("Reformat SML to EFI alignment failed\n");
+			return;
+		}
+
+		if (call_prom_ret("call-method", 2, 2, &size,
+				  ADDR("sml-get-allocated-size"),
+				  ibmvtpm_inst) != 0 || size == 0) {
+			prom_printf("SML get allocated size failed\n");
+			return;
+		}
+	} else {
+		if (call_prom_ret("call-method", 2, 2, &size,
+				  ADDR("sml-get-handover-size"),
+				  ibmvtpm_inst) != 0 || size == 0) {
+			prom_printf("SML get handover size failed\n");
+			return;
+		}
 	}
 
 	base = alloc_down(size, PAGE_SIZE, 0);
@@ -1454,6 +1472,8 @@ static void __init prom_instantiate_sml(void)
 
 	prom_printf("instantiating sml at 0x%x...", base);
 
+	memset((void *)base, 0, size);
+
 	if (call_prom_ret("call-method", 4, 2, &entry,
 			  ADDR("sml-handover"),
 			  ibmvtpm_inst, size, base) != 0 || entry == 0) {
@@ -1464,9 +1484,9 @@ static void __init prom_instantiate_sml(void)
 
 	reserve_mem(base, size);
 
-	prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-base",
+	prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base",
 		     &base, sizeof(base));
-	prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-size",
+	prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size",
 		     &size, sizeof(size));
 
 	prom_debug("sml base     = 0x%x\n", base);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 84bf934cf748..5a753fae8265 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1043,6 +1043,9 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	if (!rtas.entry)
+		return -EINVAL;
+
 	if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
 		return -EFAULT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3fc2ba784a71..fb37290a57b4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
-	while (!hpt && order > PPC_MIN_HPT_ORDER) {
+	/* Only do this if userspace didn't specify a size via ioctl */
+	while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) {
 		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
 				       __GFP_NOWARN, order - PAGE_SHIFT);
 		if (!hpt)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 228049786888..9c26c5a96ea2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -36,7 +36,6 @@
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
-#include <asm/cache.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -75,12 +74,6 @@
 
 static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 
-#if defined(CONFIG_PPC_64K_PAGES)
-#define MPP_BUFFER_ORDER	0
-#elif defined(CONFIG_PPC_4K_PAGES)
-#define MPP_BUFFER_ORDER	3
-#endif
-
 static int dynamic_mt_modes = 6;
 module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
@@ -1455,13 +1448,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 	vcore->kvm = kvm;
 	INIT_LIST_HEAD(&vcore->preempt_list);
 
-	vcore->mpp_buffer_is_valid = false;
-
-	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		vcore->mpp_buffer = (void *)__get_free_pages(
-			GFP_KERNEL|__GFP_ZERO,
-			MPP_BUFFER_ORDER);
-
 	return vcore;
 }
 
@@ -1894,33 +1880,6 @@ static int on_primary_thread(void)
 	return 1;
 }
 
-static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc)
-{
-	phys_addr_t phy_addr, mpp_addr;
-
-	phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer);
-	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
-
-	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT);
-	logmpp(mpp_addr | PPC_LOGMPP_LOG_L2);
-
-	vc->mpp_buffer_is_valid = true;
-}
-
-static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
-{
-	phys_addr_t phy_addr, mpp_addr;
-
-	phy_addr = virt_to_phys(vc->mpp_buffer);
-	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
-
-	/* We must abort any in-progress save operations to ensure
-	 * the table is valid so that prefetch engine knows when to
-	 * stop prefetching. */
-	logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT);
-	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
-}
-
 /*
  * A list of virtual cores for each physical CPU.
  * These are vcores that could run but their runner VCPU tasks are
@@ -2471,14 +2430,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
-	if (vc->mpp_buffer_is_valid)
-		kvmppc_start_restoring_l2_cache(vc);
-
 	__kvmppc_vcore_entry();
 
-	if (vc->mpp_buffer)
-		kvmppc_start_saving_l2_cache(vc);
-
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
 	spin_lock(&vc->lock);
@@ -3073,14 +3026,8 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 {
 	long int i;
 
-	for (i = 0; i < KVM_MAX_VCORES; ++i) {
-		if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) {
-			struct kvmppc_vcore *vc = kvm->arch.vcores[i];
-			free_pages((unsigned long)vc->mpp_buffer,
-				   MPP_BUFFER_ORDER);
-		}
+	for (i = 0; i < KVM_MAX_VCORES; ++i)
 		kfree(kvm->arch.vcores[i]);
-	}
 	kvm->arch.online_vcores = 0;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 0bce4fffcb2e..91700518bbf3 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -472,6 +472,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	note_hpte_modification(kvm, rev);
 	unlock_hpte(hpte, 0);
 
+	if (v & HPTE_V_ABSENT)
+		v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 	hpret[0] = v;
 	hpret[1] = r;
 	return H_SUCCESS;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b98889e9851d..b1dab8d1d885 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	beq	11f
+	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
+	beq 	15f	/* Invoke the H_DOORBELL handler */
 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
 	beq	cr2, 14f			/* HMI check */
 
@@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_HSRR1, r7
 	b	hmi_exception_after_realmode
 
+15:	mtspr SPRN_HSRR0, r8
+	mtspr SPRN_HSRR1, r7
+	ba    0xe80
+
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -2377,7 +2383,6 @@ machine_check_realmode:
 	mr	r3, r9		/* get vcpu pointer */
 	bl	kvmppc_realmode_machine_check
 	nop
-	cmpdi	r3, 0		/* Did we handle MCE ? */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	/*
@@ -2390,13 +2395,18 @@ machine_check_realmode:
 	 * The old code used to return to host for unhandled errors which
 	 * was causing guest to hang with soft lockups inside guest and
 	 * makes it difficult to recover guest instance.
+	 *
+	 * if we receive machine check with MSR(RI=0) then deliver it to
+	 * guest as machine check causing guest to crash.
 	 */
-	ld	r10, VCPU_PC(r9)
 	ld	r11, VCPU_MSR(r9)
+	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
+	beq	1f			/* Deliver a machine check to guest */
+	ld	r10, VCPU_PC(r9)
+	cmpdi	r3, 0		/* Did we handle MCE ? */
 	bne	2f	/* Continue guest execution. */
 	/* If not, deliver a machine check.  SRR0/1 are already set */
-	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-	ld	r11, VCPU_MSR(r9)
+1:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 	bl	kvmppc_msr_interrupt
 2:	b	fast_interrupt_c_return
 
@@ -2436,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* hypervisor doorbell */
 3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
+
+	/*
+	 * Clear the doorbell as we will invoke the handler
+	 * explicitly in the guest exit path.
+	 */
+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
+	PPC_MSGCLR(6)
 	/* see if it's a host IPI */
 	li	r3, 1
 	lbz	r0, HSTATE_HOST_IPI(r13)
 	cmpwi	r0, 0
 	bnelr
-	/* if not, clear it and return -1 */
-	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
-	PPC_MSGCLR(6)
+	/* if not, return -1 */
 	li	r3, -1
 	blr
 
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index b29ce752c7d6..32fdab57d604 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
                            struct kvm_book3e_206_tlb_entry *gtlbe)
 {
 	struct vcpu_id_table *idt = vcpu_e500->idt;
-	unsigned int pr, tid, ts, pid;
+	unsigned int pr, tid, ts;
+	int pid;
 	u32 val, eaddr;
 	unsigned long flags;
 
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index ce7291c79f6c..990db69a1d0b 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -15,6 +15,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/disassemble.h>
 #include <asm/dbell.h>
+#include <asm/reg_booke.h>
 
 #include "booke.h"
 #include "e500.h"
@@ -22,6 +23,7 @@
 #define XOP_DCBTLS  166
 #define XOP_MSGSND  206
 #define XOP_MSGCLR  238
+#define XOP_MFTMR   366
 #define XOP_TLBIVAX 786
 #define XOP_TLBSX   914
 #define XOP_TLBRE   946
@@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
+				  int rt)
+{
+	/* Expose one thread per vcpu */
+	if (get_tmrn(inst) == TMRN_TMCFG0) {
+		kvmppc_set_gpr(vcpu, rt,
+			       1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT));
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_FAIL;
+}
+
 int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned int inst, int *advance)
 {
@@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
 			break;
 
+		case XOP_MFTMR:
+			emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt);
+			break;
+
 		case XOP_EHPRIV:
 			emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
 							   advance);
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 805fee9beefa..34c43fff4adb 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 			for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
 				unsigned long gfn_start, gfn_end;
-				tsize_pages = 1 << (tsize - 2);
+				tsize_pages = 1UL << (tsize - 2);
 
 				gfn_start = gfn & ~(tsize_pages - 1);
 				gfn_end = gfn_start + tsize_pages;
@@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	}
 
 	if (likely(!pfnmap)) {
-		tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+		tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
 		pfn = gfn_to_pfn_memslot(slot, gfn);
 		if (is_error_noslot_pfn(pfn)) {
 			if (printk_ratelimit())
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2e51289610e4..6fd2405c7f4a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		else
 			r = num_online_cpus();
 		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 13befa35d8a8..c8822af10a58 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -582,13 +582,21 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  * be when they isi), and we are the only one left.  We rely on our kernel
  * mapping being 0xC0's and the hardware ignoring those two real bits.
  *
+ * This must be called with interrupts disabled.
+ *
+ * Taking the native_tlbie_lock is unsafe here due to the possibility of
+ * lockdep being on. On pre POWER5 hardware, not taking the lock could
+ * cause deadlock. POWER5 and newer not taking the lock is fine. This only
+ * gets called during boot before secondary CPUs have come up and during
+ * crashdump and all bets are off anyway.
+ *
  * TODO: add batching support when enabled.  remember, no dynamic memory here,
  * athough there is the control page available...
  */
 static void native_hpte_clear(void)
 {
 	unsigned long vpn = 0;
-	unsigned long slot, slots, flags;
+	unsigned long slot, slots;
 	struct hash_pte *hptep = htab_address;
 	unsigned long hpte_v;
 	unsigned long pteg_count;
@@ -596,13 +604,6 @@ static void native_hpte_clear(void)
 
 	pteg_count = htab_hash_mask + 1;
 
-	local_irq_save(flags);
-
-	/* we take the tlbie lock and hold it.  Some hardware will
-	 * deadlock if we try to tlbie from two processors at once.
-	 */
-	raw_spin_lock(&native_tlbie_lock);
-
 	slots = pteg_count * HPTES_PER_GROUP;
 
 	for (slot = 0; slot < slots; slot++, hptep++) {
@@ -614,8 +615,8 @@ static void native_hpte_clear(void)
 		hpte_v = be64_to_cpu(hptep->v);
 
 		/*
-		 * Call __tlbie() here rather than tlbie() since we
-		 * already hold the native_tlbie_lock.
+		 * Call __tlbie() here rather than tlbie() since we can't take the
+		 * native_tlbie_lock.
 		 */
 		if (hpte_v & HPTE_V_VALID) {
 			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
@@ -625,8 +626,6 @@ static void native_hpte_clear(void)
 	}
 
 	asm volatile("eieio; tlbsync; ptesync":::"memory");
-	raw_spin_unlock(&native_tlbie_lock);
-	local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b85d44271c3b..669a15e7fa76 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -80,7 +80,7 @@ static void __init setup_node_to_cpumask_map(void)
 		setup_nr_node_ids();
 
 	/* allocate the map */
-	for (node = 0; node < nr_node_ids; node++)
+	for_each_node(node)
 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 
 	/* cpumask_of_node() will now work */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 17cea18a09d3..04782164ee67 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -679,7 +679,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0382f3f1095..d1e65ce545b3 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -48,7 +48,7 @@ struct cpu_hw_events {
 	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
 
-	unsigned int group_flag;
+	unsigned int txn_flags;
 	int n_txn_start;
 
 	/* BHRB bits */
@@ -1441,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuhw->group_flag & PERF_EVENT_TXN)
+	if (cpuhw->txn_flags & PERF_PMU_TXN_ADD)
 		goto nocheck;
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -1586,13 +1586,22 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags)
  * Start group events scheduling transaction
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void power_pmu_start_txn(struct pmu *pmu)
+static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
-	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
 
@@ -1604,8 +1613,15 @@ static void power_pmu_start_txn(struct pmu *pmu)
 static void power_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -1621,7 +1637,15 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 
 	if (!ppmu)
 		return -EAGAIN;
+
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
@@ -1632,7 +1656,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 	for (i = cpuhw->n_txn_start; i < n; ++i)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 527c8b98e97e..9f9dfda9ed2c 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = {
 
 static struct kmem_cache *hv_page_cache;
 
+DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+DEFINE_PER_CPU(int, hv_24x7_txn_err);
+
+struct hv_24x7_hw {
+	struct perf_event *events[255];
+};
+
+DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+
 /*
  * request_buffer and result_buffer are not required to be 4k aligned,
  * but are not allowed to cross any 4k boundary. Aligning them to 4k is
@@ -1231,9 +1240,48 @@ static void update_event_count(struct perf_event *event, u64 now)
 static void h_24x7_event_read(struct perf_event *event)
 {
 	u64 now;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_hw *h24x7hw;
+	int txn_flags;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+
+	/*
+	 * If in a READ transaction, add this counter to the list of
+	 * counters to read during the next HCALL (i.e commit_txn()).
+	 * If not in a READ transaction, go ahead and make the HCALL
+	 * to read this counter by itself.
+	 */
+
+	if (txn_flags & PERF_PMU_TXN_READ) {
+		int i;
+		int ret;
 
-	now = h_24x7_get_value(event);
-	update_event_count(event, now);
+		if (__this_cpu_read(hv_24x7_txn_err))
+			return;
+
+		request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+
+		ret = add_event_to_24x7_request(event, request_buffer);
+		if (ret) {
+			__this_cpu_write(hv_24x7_txn_err, ret);
+		} else {
+			/*
+			 * Assoicate the event with the HCALL request index,
+			 * so ->commit_txn() can quickly find/update count.
+			 */
+			i = request_buffer->num_requests - 1;
+
+			h24x7hw = &get_cpu_var(hv_24x7_hw);
+			h24x7hw->events[i] = event;
+			put_cpu_var(h24x7hw);
+		}
+
+		put_cpu_var(hv_24x7_reqb);
+	} else {
+		now = h_24x7_get_value(event);
+		update_event_count(event, now);
+	}
 }
 
 static void h_24x7_event_start(struct perf_event *event, int flags)
@@ -1255,6 +1303,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags)
 	return 0;
 }
 
+/*
+ * 24x7 counters only support READ transactions. They are
+ * always counting and dont need/support ADD transactions.
+ * Cache the flags, but otherwise ignore transactions that
+ * are not PERF_PMU_TXN_READ.
+ */
+static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+
+	/* We should not be called if we are already in a txn */
+	WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
+
+	__this_cpu_write(hv_24x7_txn_flags, flags);
+	if (flags & ~PERF_PMU_TXN_READ)
+		return;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	init_24x7_request(request_buffer, result_buffer);
+
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+}
+
+/*
+ * Clean up transaction state.
+ *
+ * NOTE: Ignore state of request and result buffers for now.
+ *	 We will initialize them during the next read/txn.
+ */
+static void reset_txn(void)
+{
+	__this_cpu_write(hv_24x7_txn_flags, 0);
+	__this_cpu_write(hv_24x7_txn_err, 0);
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
+ * ignore transactions that are not of type PERF_PMU_TXN_READ.
+ *
+ * For READ transactions, submit all pending 24x7 requests (i.e requests
+ * that were queued by h_24x7_event_read()), to the hypervisor and update
+ * the event counts.
+ */
+static int h_24x7_event_commit_txn(struct pmu *pmu)
+{
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *resb;
+	struct perf_event *event;
+	u64 count;
+	int i, ret, txn_flags;
+	struct hv_24x7_hw *h24x7hw;
+
+	txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+	WARN_ON_ONCE(!txn_flags);
+
+	ret = 0;
+	if (txn_flags & ~PERF_PMU_TXN_READ)
+		goto out;
+
+	ret = __this_cpu_read(hv_24x7_txn_err);
+	if (ret)
+		goto out;
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	ret = make_24x7_request(request_buffer, result_buffer);
+	if (ret) {
+		log_24x7_hcall(request_buffer, result_buffer, ret);
+		goto put_reqb;
+	}
+
+	h24x7hw = &get_cpu_var(hv_24x7_hw);
+
+	/* Update event counts from hcall */
+	for (i = 0; i < request_buffer->num_requests; i++) {
+		resb = &result_buffer->results[i];
+		count = be64_to_cpu(resb->elements[0].element_data[0]);
+		event = h24x7hw->events[i];
+		h24x7hw->events[i] = NULL;
+		update_event_count(event, count);
+	}
+
+	put_cpu_var(hv_24x7_hw);
+
+put_reqb:
+	put_cpu_var(hv_24x7_resb);
+	put_cpu_var(hv_24x7_reqb);
+out:
+	reset_txn();
+	return ret;
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. However, regardless of type
+ * of transaction, all we need to do is cleanup, so we don't have to check
+ * the type of transaction.
+ */
+static void h_24x7_event_cancel_txn(struct pmu *pmu)
+{
+	WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
+	reset_txn();
+}
+
 static struct pmu h_24x7_pmu = {
 	.task_ctx_nr = perf_invalid_context,
 
@@ -1266,6 +1425,9 @@ static struct pmu h_24x7_pmu = {
 	.start       = h_24x7_event_start,
 	.stop        = h_24x7_event_stop,
 	.read        = h_24x7_event_read,
+	.start_txn   = h_24x7_event_start_txn,
+	.commit_txn  = h_24x7_event_commit_txn,
+	.cancel_txn  = h_24x7_event_cancel_txn,
 };
 
 static int hv_24x7_init(void)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351db601b..7d5e295255b7 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -676,6 +676,9 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
 		return -1;
 
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
 		pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
 		return pmu_bhrb_filter;
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index e0e68a1c0d3c..aed7714495c1 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -327,7 +327,7 @@ static void axon_msi_shutdown(struct platform_device *device)
 	u32 tmp;
 
 	pr_devel("axon_msi: disabling %s\n",
-		  msic->irq_domain->of_node->full_name);
+		 irq_domain_get_of_node(msic->irq_domain)->full_name);
 	tmp  = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
 	tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
 	msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 9d27de62dc62..54ee5743cb72 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -231,20 +231,23 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 	const u32 *imap, *tmp;
 	int imaplen, intsize, unit;
 	struct device_node *iic;
+	struct device_node *of_node;
+
+	of_node = irq_domain_get_of_node(pic->host);
 
 	/* First, we check whether we have a real "interrupts" in the device
 	 * tree in case the device-tree is ever fixed
 	 */
-	virq = irq_of_parse_and_map(pic->host->of_node, 0);
+	virq = irq_of_parse_and_map(of_node, 0);
 	if (virq)
 		return virq;
 
 	/* Now do the horrible hacks */
-	tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL);
+	tmp = of_get_property(of_node, "#interrupt-cells", NULL);
 	if (tmp == NULL)
 		return NO_IRQ;
 	intsize = *tmp;
-	imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen);
+	imap = of_get_property(of_node, "interrupt-map", &imaplen);
 	if (imap == NULL || imaplen < (intsize + 1))
 		return NO_IRQ;
 	iic = of_find_node_by_phandle(imap[intsize]);
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
index b304a9fe55cc..d9af76342d99 100644
--- a/arch/powerpc/platforms/pasemi/msi.c
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -144,9 +144,11 @@ int mpic_pasemi_msi_init(struct mpic *mpic)
 {
 	int rc;
 	struct pci_controller *phb;
+	struct device_node *of_node;
 
-	if (!mpic->irqhost->of_node ||
-	    !of_device_is_compatible(mpic->irqhost->of_node,
+	of_node = irq_domain_get_of_node(mpic->irqhost);
+	if (!of_node ||
+	    !of_device_is_compatible(of_node,
 				     "pasemi,pwrficient-openpic"))
 		return -ENODEV;
 
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 2c91ee7800b9..6ccfb6c1c707 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -137,7 +137,7 @@ static void opal_handle_irq_work(struct irq_work *work)
 static int opal_event_match(struct irq_domain *h, struct device_node *node,
 			    enum irq_domain_bus_token bus_token)
 {
-	return h->of_node == node;
+	return irq_domain_get_of_node(h) == node;
 }
 
 static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 230f3a7cdea4..4296d55e88f3 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -487,9 +487,12 @@ int opal_machine_check(struct pt_regs *regs)
 	 *    PRD component would have already got notified about this
 	 *    error through other channels.
 	 *
-	 * In any case, let us just fall through. We anyway heading
-	 * down to panic path.
+	 * If hardware marked this as an unrecoverable MCE, we are
+	 * going to panic anyway. Even if it didn't, it's not safe to
+	 * continue at this point, so we should explicitly panic.
 	 */
+
+	panic("PowerNV Unrecovered Machine Check");
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 8f70ba681a78..ca264833ee64 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void)
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
+
+	/*
+	 * Hard-disable interrupts, and then clear irq_happened flags
+	 * that we can safely ignore while off-line, since they
+	 * are for things for which we do no processing when off-line
+	 * (or in the case of HMI, all the processing we need to do
+	 * is done in lower-level real-mode code).
+	 */
+	hard_irq_disable();
+	local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
+
 	while (!generic_check_cpu_restart(cpu)) {
+		/*
+		 * Clear IPI flag, since we don't handle IPIs while
+		 * offline, except for those when changing micro-threading
+		 * mode, which are handled explicitly below, and those
+		 * for coming online, which are handled via
+		 * generic_check_cpu_restart() calls.
+		 */
+		kvmppc_set_host_ipi(cpu, 0);
 
 		ppc64_runlatch_off();
 
@@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void)
 		 * having finished executing in a KVM guest, then srr1
 		 * contains 0.
 		 */
-		if ((srr1 & wmask) == SRR1_WAKEEE) {
+		if (((srr1 & wmask) == SRR1_WAKEEE) ||
+		    (local_paca->irq_happened & PACA_IRQ_EE)) {
 			icp_native_flush_interrupt();
-			local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
-			smp_mb();
 		} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
 			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 			asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
-			kvmppc_set_host_ipi(cpu, 0);
 		}
+		local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
+		smp_mb();
 
 		if (cpu_core_split_required())
 			continue;
 
-		if (!generic_check_cpu_restart(cpu))
+		if (srr1 && !generic_check_cpu_restart(cpu))
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
 	}
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 09787139834d..3db53e8aff92 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -194,11 +194,6 @@ static const struct os_area_db_id os_area_db_id_rtc_diff = {
 	.key = OS_AREA_DB_KEY_RTC_DIFF
 };
 
-static const struct os_area_db_id os_area_db_id_video_mode = {
-	.owner = OS_AREA_DB_OWNER_LINUX,
-	.key = OS_AREA_DB_KEY_VIDEO_MODE
-};
-
 #define SECONDS_FROM_1970_TO_2000 946684800LL
 
 /**
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index eca0b00794fa..bffcc7a486a1 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -181,7 +181,8 @@ static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node,
 			      enum irq_domain_bus_token bus_token)
 {
 	/* Exact match, unless ehv_pic node is NULL */
-	return h->of_node == NULL || h->of_node == node;
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
 }
 
 static int ehv_pic_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 48a576aa47b9..3a2be3676f43 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -110,7 +110,7 @@ static int fsl_msi_init_allocator(struct fsl_msi *msi_data)
 	int rc, hwirq;
 
 	rc = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS_MAX,
-			      msi_data->irqhost->of_node);
+			      irq_domain_get_of_node(msi_data->irqhost));
 	if (rc)
 		return rc;
 
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 1c65ef92768d..610f472f91d1 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1037,7 +1037,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs)
 			ret = get_user(regs->nip, &inst);
 			pagefault_enable();
 		} else {
-			ret = probe_kernel_address(regs->nip, inst);
+			ret = probe_kernel_address((void *)regs->nip, inst);
 		}
 
 		if (!ret && mcheck_handle_load(regs, inst)) {
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index e1a9c2c2d5d3..6f99ed3967fd 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -165,7 +165,8 @@ static struct resource pic_edgectrl_iores = {
 static int i8259_host_match(struct irq_domain *h, struct device_node *node,
 			    enum irq_domain_bus_token bus_token)
 {
-	return h->of_node == NULL || h->of_node == node;
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
 }
 
 static int i8259_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c
index b1297ab1599b..f76ee39cb337 100644
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -675,7 +675,8 @@ static int ipic_host_match(struct irq_domain *h, struct device_node *node,
 			   enum irq_domain_bus_token bus_token)
 {
 	/* Exact match, unless ipic node is NULL */
-	return h->of_node == NULL || h->of_node == node;
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
 }
 
 static int ipic_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 123e43612f0a..2a0452e364ba 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -993,7 +993,8 @@ static int mpic_host_match(struct irq_domain *h, struct device_node *node,
 			   enum irq_domain_bus_token bus_token)
 {
 	/* Exact match, unless mpic node is NULL */
-	return h->of_node == NULL || h->of_node == node;
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
 }
 
 static int mpic_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c
index 7dc39f35a4cc..1d48a5385905 100644
--- a/arch/powerpc/sysdev/mpic_msi.c
+++ b/arch/powerpc/sysdev/mpic_msi.c
@@ -84,7 +84,7 @@ int mpic_msi_init_allocator(struct mpic *mpic)
 	int rc;
 
 	rc = msi_bitmap_alloc(&mpic->msi_bitmap, mpic->num_sources,
-			      mpic->irqhost->of_node);
+			      irq_domain_get_of_node(mpic->irqhost));
 	if (rc)
 		return rc;
 
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
index fbcc1f855a7f..ef36f16f9f6f 100644
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c
@@ -248,7 +248,8 @@ static int qe_ic_host_match(struct irq_domain *h, struct device_node *node,
 			    enum irq_domain_bus_token bus_token)
 {
 	/* Exact match, unless qe_ic node is NULL */
-	return h->of_node == NULL || h->of_node == node;
+	struct device_node *of_node = irq_domain_get_of_node(h);
+	return of_node == NULL || of_node == node;
 }
 
 static int qe_ic_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1d57000b1b24..3a55f493c7da 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -101,6 +101,7 @@ config S390
 	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select ARCH_WANT_IPC_PARSE_VERSION
@@ -118,6 +119,7 @@ config S390
 	select HAVE_ARCH_EARLY_PFN_TO_NID
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_SOFT_DIRTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
@@ -582,6 +584,7 @@ menuconfig PCI
 	bool "PCI support"
 	select HAVE_DMA_ATTRS
 	select PCI_MSI
+	select IOMMU_SUPPORT
 	help
 	  Enable PCI support.
 
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index d4788111c161..fac6ac9790fa 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o
 
 KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float
 KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 0c98f1508542..ed7da281df66 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -381,7 +381,7 @@ CONFIG_ISCSI_TCP=m
 CONFIG_SCSI_DEBUG=m
 CONFIG_ZFCP=y
 CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_HP_SW=m
 CONFIG_SCSI_DH_EMC=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 82083e1fbdc4..9858b14cde1e 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -377,7 +377,7 @@ CONFIG_ISCSI_TCP=m
 CONFIG_SCSI_DEBUG=m
 CONFIG_ZFCP=y
 CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_HP_SW=m
 CONFIG_SCSI_DH_EMC=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index c05c9e0821e3..7f14f80717d4 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -377,7 +377,7 @@ CONFIG_ISCSI_TCP=m
 CONFIG_SCSI_DEBUG=m
 CONFIG_ZFCP=y
 CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
 CONFIG_SCSI_DH_RDAC=m
 CONFIG_SCSI_DH_HP_SW=m
 CONFIG_SCSI_DH_EMC=m
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index f4e9dc71675f..10f200790079 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -19,7 +19,7 @@
 #include <crypto/sha.h>
 
 /* must be big enough for the largest SHA variant */
-#define SHA_MAX_STATE_SIZE	16
+#define SHA_MAX_STATE_SIZE	(SHA512_DIGEST_SIZE / 4)
 #define SHA_MAX_BLOCK_SIZE      SHA512_BLOCK_SIZE
 
 struct s390_sha_ctx {
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 5eeffeefae06..045035796ca7 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
@@ -336,7 +337,7 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 /* Diagnose 204 functions */
 
-static int diag204(unsigned long subcode, unsigned long size, void *addr)
+static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
 {
 	register unsigned long _subcode asm("0") = subcode;
 	register unsigned long _size asm("1") = size;
@@ -351,6 +352,12 @@ static int diag204(unsigned long subcode, unsigned long size, void *addr)
 	return _size;
 }
 
+static int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X204);
+	return __diag204(subcode, size, addr);
+}
+
 /*
  * For the old diag subcode 4 with simple data format we have to use real
  * memory. If we use subcode 6 or 7 with extended data format, we can (and
@@ -505,6 +512,7 @@ static int diag224(void *ptr)
 {
 	int rc = -EOPNOTSUPP;
 
+	diag_stat_inc(DIAG_STAT_X224);
 	asm volatile(
 		"	diag	%1,%2,0x224\n"
 		"0:	lhi	%0,0x0\n"
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 24c747a0fcc3..0f1927cbba31 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -8,6 +8,7 @@
 
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <asm/diag.h>
 #include <asm/hypfs.h>
 #include "hypfs.h"
 
@@ -18,6 +19,7 @@
  */
 static void diag0c(struct hypfs_diag0c_entry *entry)
 {
+	diag_stat_inc(DIAG_STAT_X00C);
 	asm volatile (
 		"	sam31\n"
 		"	diag	%0,%0,0x0c\n"
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index dd42a26d049d..c9e5c72f78bd 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <asm/compat.h>
+#include <asm/diag.h>
 #include <asm/sclp.h>
 #include "hypfs.h"
 
@@ -22,7 +23,7 @@
 
 #define DIAG304_CMD_MAX		2
 
-static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd)
 {
 	register unsigned long _data asm("2") = (unsigned long) data;
 	register unsigned long _rc asm("3");
@@ -34,6 +35,12 @@ static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
 	return _rc;
 }
 
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	diag_stat_inc(DIAG_STAT_X304);
+	return __hypfs_sprp_diag304(data, cmd);
+}
+
 static void hypfs_sprp_free(const void *data)
 {
 	free_page((unsigned long) data);
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index afbe07907c10..44feac38ccfc 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/timex.h>
 #include "hypfs.h"
@@ -66,6 +67,7 @@ static int diag2fc(int size, char* query, void *addr)
 	memset(parm_list.aci_grp, 0x40, NAME_LEN);
 	rc = -1;
 
+	diag_stat_inc(DIAG_STAT_X2FC);
 	asm volatile(
 		"	diag    %0,%1,0x2fc\n"
 		"0:\n"
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 5ad26dd94d77..9043d2e1e2ae 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
index 16887c5fd989..a6263d4e8e56 100644
--- a/arch/s390/include/asm/appldata.h
+++ b/arch/s390/include/asm/appldata.h
@@ -7,6 +7,7 @@
 #ifndef _ASM_S390_APPLDATA_H
 #define _ASM_S390_APPLDATA_H
 
+#include <asm/diag.h>
 #include <asm/io.h>
 
 #define APPLDATA_START_INTERVAL_REC	0x80
@@ -53,6 +54,7 @@ static inline int appldata_asm(struct appldata_product_id *id,
 	parm_list.buffer_length = length;
 	parm_list.product_id_addr = (unsigned long) id;
 	parm_list.buffer_addr = virt_to_phys(buffer);
+	diag_stat_inc(DIAG_STAT_X0DC);
 	asm volatile(
 		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 117fa5c921c1..911064aa59b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -36,7 +36,6 @@
 									\
 	typecheck(atomic_t *, ptr);					\
 	asm volatile(							\
-		__barrier						\
 		op_string "	%0,%2,%1\n"				\
 		__barrier						\
 		: "=d" (old_val), "+Q" ((ptr)->counter)			\
@@ -180,7 +179,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 									\
 	typecheck(atomic64_t *, ptr);					\
 	asm volatile(							\
-		__barrier						\
 		op_string "	%0,%2,%1\n"				\
 		__barrier						\
 		: "=d" (old_val), "+Q" ((ptr)->counter)			\
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index d48fe0162331..d68e11e0df5e 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -22,10 +22,10 @@
 
 #define mb() do {  asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
 
-#define rmb()				mb()
-#define wmb()				mb()
-#define dma_rmb()			rmb()
-#define dma_wmb()			wmb()
+#define rmb()				barrier()
+#define wmb()				barrier()
+#define dma_rmb()			mb()
+#define dma_wmb()			mb()
 #define smp_mb()			mb()
 #define smp_rmb()			rmb()
 #define smp_wmb()			wmb()
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 9b68e98a724f..8043f10da6b5 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -11,30 +11,25 @@
  * big-endian system because, unlike little endian, the number of each
  * bit depends on the word size.
  *
- * The bitop functions are defined to work on unsigned longs, so for an
- * s390x system the bits end up numbered:
+ * The bitop functions are defined to work on unsigned longs, so the bits
+ * end up numbered:
  *   |63..............0|127............64|191...........128|255...........192|
- * and on s390:
- *   |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
  *
  * There are a few little-endian macros used mostly for filesystem
- * bitmaps, these work on similar bit arrays layouts, but
- * byte-oriented:
+ * bitmaps, these work on similar bit array layouts, but byte-oriented:
  *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
  *
- * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
- * number field needs to be reversed compared to the big-endian bit
- * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ * The main difference is that bit 3-5 in the bit number field needs to be
+ * reversed compared to the big-endian bit fields. This can be achieved by
+ * XOR with 0x38.
  *
- * We also have special functions which work with an MSB0 encoding:
- * on an s390x system the bits are numbered:
+ * We also have special functions which work with an MSB0 encoding.
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  *
- * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
- * number field needs to be reversed compared to the LSB0 encoded bit
- * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
+ * The main difference is that bit 0-63 in the bit number field needs to be
+ * reversed compared to the LSB0 encoded bit fields. This can be achieved by
+ * XOR with 0x3f.
  *
  */
 
@@ -64,7 +59,6 @@
 								\
 	typecheck(unsigned long *, (__addr));			\
 	asm volatile(						\
-		__barrier					\
 		__op_string "	%0,%2,%1\n"			\
 		__barrier					\
 		: "=d" (__old),	"+Q" (*(__addr))		\
@@ -276,12 +270,32 @@ static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
 	return (*addr >> (nr & 7)) & 1;
 }
 
+static inline int test_and_set_bit_lock(unsigned long nr,
+					volatile unsigned long *ptr)
+{
+	if (test_bit(nr, ptr))
+		return 1;
+	return test_and_set_bit(nr, ptr);
+}
+
+static inline void clear_bit_unlock(unsigned long nr,
+				    volatile unsigned long *ptr)
+{
+	smp_mb__before_atomic();
+	clear_bit(nr, ptr);
+}
+
+static inline void __clear_bit_unlock(unsigned long nr,
+				      volatile unsigned long *ptr)
+{
+	smp_mb();
+	__clear_bit(nr, ptr);
+}
+
 /*
  * Functions which use MSB0 bit numbering.
- * On an s390x system the bits are numbered:
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  */
 unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
 unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
@@ -446,7 +460,6 @@ static inline int fls(int word)
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/hweight.h>
-#include <asm-generic/bitops/lock.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 096339207764..0c5d8ee657f0 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -5,6 +5,7 @@
 #define _ASM_S390_CIO_H_
 
 #include <linux/spinlock.h>
+#include <linux/bitops.h>
 #include <asm/types.h>
 
 #define LPM_ANYPATH 0xff
@@ -296,6 +297,15 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
 	return 0;
 }
 
+/**
+ * pathmask_to_pos() - find the position of the left-most bit in a pathmask
+ * @mask: pathmask with at least one bit set
+ */
+static inline u8 pathmask_to_pos(u8 mask)
+{
+	return 8 - ffs(mask);
+}
+
 void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
index 806eac12e3bd..ed2630c23f90 100644
--- a/arch/s390/include/asm/cmb.h
+++ b/arch/s390/include/asm/cmb.h
@@ -6,6 +6,7 @@
 struct ccw_device;
 extern int enable_cmf(struct ccw_device *cdev);
 extern int disable_cmf(struct ccw_device *cdev);
+extern int __disable_cmf(struct ccw_device *cdev);
 extern u64 cmf_read(struct ccw_device *cdev, int index);
 extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
 
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 411464f4c97a..24ea6948e32b 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,7 +32,7 @@
 	__old;								\
 })
 
-#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
 ({									\
 	register __typeof__(*(p1)) __old1 asm("2") = (o1);		\
 	register __typeof__(*(p2)) __old2 asm("3") = (o2);		\
@@ -40,7 +40,7 @@
 	register __typeof__(*(p2)) __new2 asm("5") = (n2);		\
 	int cc;								\
 	asm volatile(							\
-			insn   " %[old],%[new],%[ptr]\n"		\
+		"	cdsg	%[old],%[new],%[ptr]\n"			\
 		"	ipm	%[cc]\n"				\
 		"	srl	%[cc],28"				\
 		: [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2)	\
@@ -50,30 +50,6 @@
 	!cc;								\
 })
 
-#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
-
-#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
-
-extern void __cmpxchg_double_called_with_bad_pointer(void);
-
-#define __cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
-({									\
-	int __ret;							\
-	switch (sizeof(*(p1))) {					\
-	case 4:								\
-		__ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2);	\
-		break;							\
-	case 8:								\
-		__ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2);	\
-		break;							\
-	default:							\
-		__cmpxchg_double_called_with_bad_pointer();		\
-	}								\
-	__ret;								\
-})
-
 #define cmpxchg_double(p1, p2, o1, o2, n1, n2)				\
 ({									\
 	__typeof__(p1) __p1 = (p1);					\
@@ -81,7 +57,7 @@ extern void __cmpxchg_double_called_with_bad_pointer(void);
 	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
 	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
 	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
-	__cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2);			\
+	__cmpxchg_double(__p1, __p2, o1, o2, n1, n2);			\
 })
 
 #define system_has_cmpxchg_double()	1
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 5243a8679a1d..9dd04b9e9782 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -22,15 +22,10 @@
 #define CPU_MF_INT_SF_LSDA	(1 << 22)	/* loss of sample data alert */
 #define CPU_MF_INT_CF_CACA	(1 <<  7)	/* counter auth. change alert */
 #define CPU_MF_INT_CF_LCDA	(1 <<  6)	/* loss of counter data alert */
-#define CPU_MF_INT_RI_HALTED	(1 <<  5)	/* run-time instr. halted */
-#define CPU_MF_INT_RI_BUF_FULL	(1 <<  4)	/* run-time instr. program
-						   buffer full */
-
 #define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
 #define CPU_MF_INT_SF_MASK	(CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE|	\
 				 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|	\
 				 CPU_MF_INT_SF_LSDA)
-#define CPU_MF_INT_RI_MASK	(CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
 
 /* CPU measurement facility support */
 static inline int cpum_cf_avail(void)
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 17a373576868..d7697ab802f6 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,8 +46,6 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
 	__ctl_load(reg, cr, cr);
 }
 
-void __ctl_set_vx(void);
-
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 7e91c58072e2..5fac921c1c42 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -8,6 +8,34 @@
 #ifndef _ASM_S390_DIAG_H
 #define _ASM_S390_DIAG_H
 
+#include <linux/percpu.h>
+
+enum diag_stat_enum {
+	DIAG_STAT_X008,
+	DIAG_STAT_X00C,
+	DIAG_STAT_X010,
+	DIAG_STAT_X014,
+	DIAG_STAT_X044,
+	DIAG_STAT_X064,
+	DIAG_STAT_X09C,
+	DIAG_STAT_X0DC,
+	DIAG_STAT_X204,
+	DIAG_STAT_X210,
+	DIAG_STAT_X224,
+	DIAG_STAT_X250,
+	DIAG_STAT_X258,
+	DIAG_STAT_X288,
+	DIAG_STAT_X2C4,
+	DIAG_STAT_X2FC,
+	DIAG_STAT_X304,
+	DIAG_STAT_X308,
+	DIAG_STAT_X500,
+	NR_DIAG_STAT
+};
+
+void diag_stat_inc(enum diag_stat_enum nr);
+void diag_stat_inc_norecursion(enum diag_stat_enum nr);
+
 /*
  * Diagnose 10: Release page range
  */
@@ -18,6 +46,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
 	start_addr = start_pfn << PAGE_SHIFT;
 	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
 
+	diag_stat_inc(DIAG_STAT_X010);
 	asm volatile(
 		"0:	diag	%0,%1,0x10\n"
 		"1:\n"
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
index f7e5c36688c3..105f90e63a0e 100644
--- a/arch/s390/include/asm/etr.h
+++ b/arch/s390/include/asm/etr.h
@@ -211,8 +211,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func)
 #define ETR_PTFF_SGS	0x43	/* set gross steering rate */
 
 /* Functions needed by the machine check handler */
-void etr_switch_to_local(void);
-void etr_sync_check(void);
+int etr_switch_to_local(void);
+int etr_sync_check(void);
+void etr_queue_work(void);
 
 /* notifier for syncs */
 extern struct atomic_notifier_head s390_epoch_delta_notifier;
@@ -253,7 +254,8 @@ struct stp_sstpi {
 } __attribute__ ((packed));
 
 /* Functions needed by the machine check handler */
-void stp_sync_check(void);
-void stp_island_check(void);
+int stp_sync_check(void);
+int stp_island_check(void);
+void stp_queue_work(void);
 
 #endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
new file mode 100644
index 000000000000..5e04f3cbd320
--- /dev/null
+++ b/arch/s390/include/asm/fpu/api.h
@@ -0,0 +1,30 @@
+/*
+ * In-kernel FPU support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_API_H
+#define _ASM_S390_FPU_API_H
+
+void save_fpu_regs(void);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+#endif /* _ASM_S390_FPU_API_H */
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu/internal.h
index 55dc2c0fb40a..2559b16da525 100644
--- a/arch/s390/include/asm/fpu-internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -1,5 +1,5 @@
 /*
- * General floating pointer and vector register helpers
+ * FPU state and register content conversion primitives
  *
  * Copyright IBM Corp. 2015
  * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
@@ -8,50 +8,9 @@
 #ifndef _ASM_S390_FPU_INTERNAL_H
 #define _ASM_S390_FPU_INTERNAL_H
 
-#define FPU_USE_VX		1	/* Vector extension is active */
-
-#ifndef __ASSEMBLY__
-
-#include <linux/errno.h>
 #include <linux/string.h>
-#include <asm/linkage.h>
 #include <asm/ctl_reg.h>
-#include <asm/sigcontext.h>
-
-struct fpu {
-	__u32 fpc;			/* Floating-point control */
-	__u32 flags;
-	union {
-		void *regs;
-		freg_t *fprs;		/* Floating-point register save area */
-		__vector128 *vxrs;	/* Vector register save area */
-	};
-};
-
-void save_fpu_regs(void);
-
-#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
-#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
-
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array { __vector128 _[__NUM_VXRS]; };
-
-static inline int test_fp_ctl(u32 fpc)
-{
-	u32 orig_fpc;
-	int rc;
-
-	asm volatile(
-		"	efpc    %1\n"
-		"	sfpc	%2\n"
-		"0:	sfpc	%1\n"
-		"	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
-		: "d" (fpc), "0" (-EINVAL));
-	return rc;
-}
+#include <asm/fpu/types.h>
 
 static inline void save_vx_regs_safe(__vector128 *vxrs)
 {
@@ -89,7 +48,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
 	fpregs->pad = 0;
-	if (is_vx_fpu(fpu))
+	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
 	else
 		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
@@ -98,13 +57,11 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
-	if (is_vx_fpu(fpu))
+	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
 	else
 		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
 		       sizeof(fpregs->fprs));
 }
 
-#endif
-
 #endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
new file mode 100644
index 000000000000..14a8b0c14f87
--- /dev/null
+++ b/arch/s390/include/asm/fpu/types.h
@@ -0,0 +1,25 @@
+/*
+ * FPU data structures
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_FPU_TYPES_H
+#define _ASM_S390_FPU_TYPES_H
+
+#include <asm/sigcontext.h>
+
+struct fpu {
+	__u32 fpc;			/* Floating-point control */
+	union {
+		void *regs;
+		freg_t *fprs;		/* Floating-point register save area */
+		__vector128 *vxrs;	/* Vector register save area */
+	};
+};
+
+/* VX array structure for address operand constraints in inline assemblies */
+struct vx_array { __vector128 _[__NUM_VXRS]; };
+
+#endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
index 113cd963dbbe..51ff96d9f287 100644
--- a/arch/s390/include/asm/idle.h
+++ b/arch/s390/include/asm/idle.h
@@ -24,4 +24,6 @@ struct s390_idle_data {
 extern struct device_attribute dev_attr_idle_count;
 extern struct device_attribute dev_attr_idle_time_us;
 
+void psw_idle(struct s390_idle_data *, unsigned long);
+
 #endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index ff95d15a2384..f97b055de76a 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@ enum interruption_class {
 	IRQEXT_IUC,
 	IRQEXT_CMS,
 	IRQEXT_CMC,
-	IRQEXT_CMR,
 	IRQEXT_FTP,
 	IRQIO_CIO,
 	IRQIO_QAI,
@@ -96,6 +95,19 @@ enum irq_subclass {
 	IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
 };
 
+#define CR0_IRQ_SUBCLASS_MASK					  \
+	((1UL << (63 - 30))  /* Warning Track */		| \
+	 (1UL << (63 - 48))  /* Malfunction Alert */		| \
+	 (1UL << (63 - 49))  /* Emergency Signal */		| \
+	 (1UL << (63 - 50))  /* External Call */		| \
+	 (1UL << (63 - 52))  /* Clock Comparator */		| \
+	 (1UL << (63 - 53))  /* CPU Timer */			| \
+	 (1UL << (63 - 54))  /* Service Signal */		| \
+	 (1UL << (63 - 57))  /* Interrupt Key */		| \
+	 (1UL << (63 - 58))  /* Measurement Alert */		| \
+	 (1UL << (63 - 59))  /* Timing Alert */			| \
+	 (1UL << (63 - 62))) /* IUCV */
+
 void irq_subclass_register(enum irq_subclass subclass);
 void irq_subclass_unregister(enum irq_subclass subclass);
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8ced426091e1..efaac2c3bb77 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -22,7 +22,7 @@
 #include <linux/kvm.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/isc.h>
 
 #define KVM_MAX_VCPUS 64
@@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index e0f842308a68..41393052ac57 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -27,10 +27,9 @@
 #define __S390_KVM_PARA_H
 
 #include <uapi/asm/kvm_para.h>
+#include <asm/diag.h>
 
-
-
-static inline long kvm_hypercall0(unsigned long nr)
+static inline long __kvm_hypercall0(unsigned long nr)
 {
 	register unsigned long __nr asm("1") = nr;
 	register long __rc asm("2");
@@ -40,7 +39,13 @@ static inline long kvm_hypercall0(unsigned long nr)
 	return __rc;
 }
 
-static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+static inline long kvm_hypercall0(unsigned long nr)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall0(nr);
+}
+
+static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1)
 {
 	register unsigned long __nr asm("1") = nr;
 	register unsigned long __p1 asm("2") = p1;
@@ -51,7 +56,13 @@ static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
 	return __rc;
 }
 
-static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall1(nr, p1);
+}
+
+static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1,
 			       unsigned long p2)
 {
 	register unsigned long __nr asm("1") = nr;
@@ -65,7 +76,14 @@ static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
 	return __rc;
 }
 
-static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+			       unsigned long p2)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall2(nr, p1, p2);
+}
+
+static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3)
 {
 	register unsigned long __nr asm("1") = nr;
@@ -80,8 +98,14 @@ static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
 	return __rc;
 }
 
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall3(nr, p1, p2, p3);
+}
 
-static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4)
 {
@@ -98,7 +122,15 @@ static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
 	return __rc;
 }
 
-static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall4(nr, p1, p2, p3, p4);
+}
+
+static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4, unsigned long p5)
 {
@@ -116,7 +148,15 @@ static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
 	return __rc;
 }
 
-static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall5(nr, p1, p2, p3, p4, p5);
+}
+
+static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1,
 			       unsigned long p2, unsigned long p3,
 			       unsigned long p4, unsigned long p5,
 			       unsigned long p6)
@@ -137,6 +177,15 @@ static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
 	return __rc;
 }
 
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5,
+			       unsigned long p6)
+{
+	diag_stat_inc(DIAG_STAT_X500);
+	return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6);
+}
+
 /* kvm on s390 is always paravirtualization enabled */
 static inline int kvm_para_available(void)
 {
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 663f23e37460..afe1cfebf1a4 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,7 +67,7 @@ struct _lowcore {
 	__u8	pad_0x00c4[0x00c8-0x00c4];	/* 0x00c4 */
 	__u32	stfl_fac_list;			/* 0x00c8 */
 	__u8	pad_0x00cc[0x00e8-0x00cc];	/* 0x00cc */
-	__u32	mcck_interruption_code[2];	/* 0x00e8 */
+	__u64	mcck_interruption_code;		/* 0x00e8 */
 	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
 	__u32	external_damage_code;		/* 0x00f4 */
 	__u64	failing_storage_address;	/* 0x00f8 */
@@ -132,7 +132,14 @@ struct _lowcore {
 	/* Address space pointer. */
 	__u64	kernel_asce;			/* 0x0358 */
 	__u64	user_asce;			/* 0x0360 */
-	__u64	current_pid;			/* 0x0368 */
+
+	/*
+	 * The lpp and current_pid fields form a
+	 * 64-bit value that is set as program
+	 * parameter with the LPP instruction.
+	 */
+	__u32	lpp;				/* 0x0368 */
+	__u32	current_pid;			/* 0x036c */
 
 	/* SMP info area */
 	__u32	cpu_nr;				/* 0x0370 */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 3027a5a72b74..b75fd910386a 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -11,51 +11,62 @@
 #ifndef _ASM_S390_NMI_H
 #define _ASM_S390_NMI_H
 
+#include <linux/const.h>
 #include <linux/types.h>
 
-struct mci {
-	__u32 sd :  1; /* 00 system damage */
-	__u32 pd :  1; /* 01 instruction-processing damage */
-	__u32 sr :  1; /* 02 system recovery */
-	__u32	 :  1; /* 03 */
-	__u32 cd :  1; /* 04 timing-facility damage */
-	__u32 ed :  1; /* 05 external damage */
-	__u32	 :  1; /* 06 */
-	__u32 dg :  1; /* 07 degradation */
-	__u32 w  :  1; /* 08 warning pending */
-	__u32 cp :  1; /* 09 channel-report pending */
-	__u32 sp :  1; /* 10 service-processor damage */
-	__u32 ck :  1; /* 11 channel-subsystem damage */
-	__u32	 :  2; /* 12-13 */
-	__u32 b  :  1; /* 14 backed up */
-	__u32	 :  1; /* 15 */
-	__u32 se :  1; /* 16 storage error uncorrected */
-	__u32 sc :  1; /* 17 storage error corrected */
-	__u32 ke :  1; /* 18 storage-key error uncorrected */
-	__u32 ds :  1; /* 19 storage degradation */
-	__u32 wp :  1; /* 20 psw mwp validity */
-	__u32 ms :  1; /* 21 psw mask and key validity */
-	__u32 pm :  1; /* 22 psw program mask and cc validity */
-	__u32 ia :  1; /* 23 psw instruction address validity */
-	__u32 fa :  1; /* 24 failing storage address validity */
-	__u32 vr :  1; /* 25 vector register validity */
-	__u32 ec :  1; /* 26 external damage code validity */
-	__u32 fp :  1; /* 27 floating point register validity */
-	__u32 gr :  1; /* 28 general register validity */
-	__u32 cr :  1; /* 29 control register validity */
-	__u32	 :  1; /* 30 */
-	__u32 st :  1; /* 31 storage logical validity */
-	__u32 ie :  1; /* 32 indirect storage error */
-	__u32 ar :  1; /* 33 access register validity */
-	__u32 da :  1; /* 34 delayed access exception */
-	__u32	 :  7; /* 35-41 */
-	__u32 pr :  1; /* 42 tod programmable register validity */
-	__u32 fc :  1; /* 43 fp control register validity */
-	__u32 ap :  1; /* 44 ancillary report */
-	__u32	 :  1; /* 45 */
-	__u32 ct :  1; /* 46 cpu timer validity */
-	__u32 cc :  1; /* 47 clock comparator validity */
-	__u32	 : 16; /* 47-63 */
+#define MCCK_CODE_SYSTEM_DAMAGE		_BITUL(63)
+#define MCCK_CODE_CPU_TIMER_VALID	_BITUL(63 - 46)
+#define MCCK_CODE_PSW_MWP_VALID		_BITUL(63 - 20)
+#define MCCK_CODE_PSW_IA_VALID		_BITUL(63 - 23)
+
+#ifndef __ASSEMBLY__
+
+union mci {
+	unsigned long val;
+	struct {
+		u64 sd :  1; /* 00 system damage */
+		u64 pd :  1; /* 01 instruction-processing damage */
+		u64 sr :  1; /* 02 system recovery */
+		u64    :  1; /* 03 */
+		u64 cd :  1; /* 04 timing-facility damage */
+		u64 ed :  1; /* 05 external damage */
+		u64    :  1; /* 06 */
+		u64 dg :  1; /* 07 degradation */
+		u64 w  :  1; /* 08 warning pending */
+		u64 cp :  1; /* 09 channel-report pending */
+		u64 sp :  1; /* 10 service-processor damage */
+		u64 ck :  1; /* 11 channel-subsystem damage */
+		u64    :  2; /* 12-13 */
+		u64 b  :  1; /* 14 backed up */
+		u64    :  1; /* 15 */
+		u64 se :  1; /* 16 storage error uncorrected */
+		u64 sc :  1; /* 17 storage error corrected */
+		u64 ke :  1; /* 18 storage-key error uncorrected */
+		u64 ds :  1; /* 19 storage degradation */
+		u64 wp :  1; /* 20 psw mwp validity */
+		u64 ms :  1; /* 21 psw mask and key validity */
+		u64 pm :  1; /* 22 psw program mask and cc validity */
+		u64 ia :  1; /* 23 psw instruction address validity */
+		u64 fa :  1; /* 24 failing storage address validity */
+		u64 vr :  1; /* 25 vector register validity */
+		u64 ec :  1; /* 26 external damage code validity */
+		u64 fp :  1; /* 27 floating point register validity */
+		u64 gr :  1; /* 28 general register validity */
+		u64 cr :  1; /* 29 control register validity */
+		u64    :  1; /* 30 */
+		u64 st :  1; /* 31 storage logical validity */
+		u64 ie :  1; /* 32 indirect storage error */
+		u64 ar :  1; /* 33 access register validity */
+		u64 da :  1; /* 34 delayed access exception */
+		u64    :  7; /* 35-41 */
+		u64 pr :  1; /* 42 tod programmable register validity */
+		u64 fc :  1; /* 43 fp control register validity */
+		u64 ap :  1; /* 44 ancillary report */
+		u64    :  1; /* 45 */
+		u64 ct :  1; /* 46 cpu timer validity */
+		u64 cc :  1; /* 47 clock comparator validity */
+		u64    : 16; /* 47-63 */
+	};
 };
 
 struct pt_regs;
@@ -63,4 +74,5 @@ struct pt_regs;
 extern void s390_handle_mcck(void);
 extern void s390_do_machine_check(struct pt_regs *regs);
 
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index 2a0efc63b9e5..dc19ee0c92aa 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -19,7 +19,7 @@ int numa_pfn_to_nid(unsigned long pfn);
 int __node_distance(int a, int b);
 void numa_update_cpu_topology(void);
 
-extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
 extern int numa_debug_enabled;
 
 #else
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 34d960353a08..c873e682b67f 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -62,6 +62,8 @@ struct zpci_bar_struct {
 	u8		size;		/* order 2 exponent */
 };
 
+struct s390_domain;
+
 /* Private data per function */
 struct zpci_dev {
 	struct pci_dev	*pdev;
@@ -118,6 +120,8 @@ struct zpci_dev {
 
 	struct dentry	*debugfs_dev;
 	struct dentry	*debugfs_perf;
+
+	struct s390_domain *s390_domain; /* s390 IOMMU domain data */
 };
 
 static inline bool zdev_enabled(struct zpci_dev *zdev)
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 30b4c179c38c..7a7abf1a5537 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -192,5 +192,8 @@ static inline unsigned long *get_st_pto(unsigned long entry)
 /* Prototypes */
 int zpci_dma_init_device(struct zpci_dev *);
 void zpci_dma_exit_device(struct zpci_dev *);
-
+void dma_free_seg_table(unsigned long);
+unsigned long *dma_alloc_cpu_table(void);
+void dma_cleanup_tables(unsigned long *);
+void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int);
 #endif
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index bdb2f51124ed..024f85f947ae 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -193,9 +193,15 @@ static inline int is_module_addr(void *addr)
 #define _PAGE_UNUSED	0x080		/* SW bit for pgste usage state */
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY 0x002		/* SW pte soft dirty bit */
+#else
+#define _PAGE_SOFT_DIRTY 0x000
+#endif
+
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
-				 _PAGE_YOUNG)
+				 _PAGE_YOUNG | _PAGE_SOFT_DIRTY)
 
 /*
  * handle_pte_fault uses pte_present and pte_none to find out the pte type
@@ -285,6 +291,12 @@ static inline int is_module_addr(void *addr)
 #define _SEGMENT_ENTRY_READ	0x0002	/* SW segment read bit */
 #define _SEGMENT_ENTRY_WRITE	0x0001	/* SW segment write bit */
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */
+#else
+#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */
+#endif
+
 /*
  * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
  *				dy..R...I...wr
@@ -589,6 +601,43 @@ static inline int pmd_protnone(pmd_t pmd)
 }
 #endif
 
+static inline int pte_soft_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SOFT_DIRTY;
+}
+#define pte_swp_soft_dirty pte_soft_dirty
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_SOFT_DIRTY;
+	return pte;
+}
+#define pte_swp_mksoft_dirty pte_mksoft_dirty
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_SOFT_DIRTY;
+	return pte;
+}
+#define pte_swp_clear_soft_dirty pte_clear_soft_dirty
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY;
+	return pmd;
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY;
+	return pmd;
+}
+
 static inline pgste_t pgste_get_lock(pte_t *ptep)
 {
 	unsigned long new = 0;
@@ -889,7 +938,7 @@ static inline pte_t pte_mkclean(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte_val(pte) |= _PAGE_DIRTY;
+	pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY;
 	if (pte_val(pte) & _PAGE_WRITE)
 		pte_val(pte) &= ~_PAGE_PROTECT;
 	return pte;
@@ -1218,8 +1267,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 					pte_t entry, int dirty)
 {
 	pgste_t pgste;
+	pte_t oldpte;
 
-	if (pte_same(*ptep, entry))
+	oldpte = *ptep;
+	if (pte_same(oldpte, entry))
 		return 0;
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
@@ -1229,7 +1280,8 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 	ptep_flush_direct(vma->vm_mm, address, ptep);
 
 	if (mm_has_pgste(vma->vm_mm)) {
-		pgste_set_key(ptep, pgste, entry, vma->vm_mm);
+		if (pte_val(oldpte) & _PAGE_INVALID)
+			pgste_set_key(ptep, pgste, entry, vma->vm_mm);
 		pgste = pgste_set_pte(ptep, pgste, entry);
 		pgste_set_unlock(ptep, pgste);
 	} else
@@ -1340,7 +1392,8 @@ static inline pmd_t pmd_mkclean(pmd_t pmd)
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
 	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
+				_SEGMENT_ENTRY_SOFT_DIRTY;
 		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
 			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	}
@@ -1371,7 +1424,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 	if (pmd_large(pmd)) {
 		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
 			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
-			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT |
+			_SEGMENT_ENTRY_SOFT_DIRTY;
 		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
 		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
 			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 085fb0d3c54e..b16c3d0a1b9f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -11,15 +11,19 @@
 #ifndef __ASM_S390_PROCESSOR_H
 #define __ASM_S390_PROCESSOR_H
 
+#include <linux/const.h>
+
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
-#define CIF_FPU			3	/* restore vector registers */
+#define CIF_FPU			3	/* restore FPU registers */
+#define CIF_IGNORE_IRQ		4	/* ignore interrupt (for udelay) */
 
-#define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
-#define _CIF_ASCE		(1<<CIF_ASCE)
-#define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
-#define _CIF_FPU		(1<<CIF_FPU)
+#define _CIF_MCCK_PENDING	_BITUL(CIF_MCCK_PENDING)
+#define _CIF_ASCE		_BITUL(CIF_ASCE)
+#define _CIF_NOHZ_DELAY		_BITUL(CIF_NOHZ_DELAY)
+#define _CIF_FPU		_BITUL(CIF_FPU)
+#define _CIF_IGNORE_IRQ		_BITUL(CIF_IGNORE_IRQ)
 
 #ifndef __ASSEMBLY__
 
@@ -30,21 +34,22 @@
 #include <asm/ptrace.h>
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/internal.h>
 
 static inline void set_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags |= (1U << flag);
+	S390_lowcore.cpu_flags |= (1UL << flag);
 }
 
 static inline void clear_cpu_flag(int flag)
 {
-	S390_lowcore.cpu_flags &= ~(1U << flag);
+	S390_lowcore.cpu_flags &= ~(1UL << flag);
 }
 
 static inline int test_cpu_flag(int flag)
 {
-	return !!(S390_lowcore.cpu_flags & (1U << flag));
+	return !!(S390_lowcore.cpu_flags & (1UL << flag));
 }
 
 #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -102,7 +107,6 @@ struct thread_struct {
 	struct list_head list;
 	/* cpu runtime instrumentation */
 	struct runtime_instr_cb *ri_cb;
-	int ri_signum;
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
 };
 
@@ -139,8 +143,10 @@ struct stack_frame {
 
 #define ARCH_MIN_TASKALIGN	8
 
+extern __vector128 init_task_fpu_regs[__NUM_VXRS];
 #define INIT_THREAD {							\
 	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
+	.fpu.regs = (void *)&init_task_fpu_regs,			\
 }
 
 /*
@@ -217,7 +223,7 @@ static inline void __load_psw(psw_t psw)
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-static inline void __load_psw_mask (unsigned long mask)
+static inline void __load_psw_mask(unsigned long mask)
 {
 	unsigned long addr;
 	psw_t psw;
@@ -243,6 +249,16 @@ static inline unsigned long __extract_psw(void)
 	return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
 }
 
+static inline void local_mcck_enable(void)
+{
+	__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
+}
+
+static inline void local_mcck_disable(void)
+{
+	__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
+}
+
 /*
  * Rewind PSW instruction address by specified number of bytes.
  */
@@ -266,65 +282,14 @@ void enabled_wait(void);
  */
 static inline void __noreturn disabled_wait(unsigned long code)
 {
-        unsigned long ctl_buf;
-        psw_t dw_psw;
-
-	dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
-        dw_psw.addr = code;
-        /* 
-         * Store status and then load disabled wait psw,
-         * the processor is dead afterwards
-         */
-	asm volatile(
-		"	stctg	0,0,0(%2)\n"
-		"	ni	4(%2),0xef\n"	/* switch off protection */
-		"	lctlg	0,0,0(%2)\n"
-		"	lghi	1,0x1000\n"
-		"	stpt	0x328(1)\n"	/* store timer */
-		"	stckc	0x330(1)\n"	/* store clock comparator */
-		"	stpx	0x318(1)\n"	/* store prefix register */
-		"	stam	0,15,0x340(1)\n"/* store access registers */
-		"	stfpc	0x31c(1)\n"	/* store fpu control */
-		"	std	0,0x200(1)\n"	/* store f0 */
-		"	std	1,0x208(1)\n"	/* store f1 */
-		"	std	2,0x210(1)\n"	/* store f2 */
-		"	std	3,0x218(1)\n"	/* store f3 */
-		"	std	4,0x220(1)\n"	/* store f4 */
-		"	std	5,0x228(1)\n"	/* store f5 */
-		"	std	6,0x230(1)\n"	/* store f6 */
-		"	std	7,0x238(1)\n"	/* store f7 */
-		"	std	8,0x240(1)\n"	/* store f8 */
-		"	std	9,0x248(1)\n"	/* store f9 */
-		"	std	10,0x250(1)\n"	/* store f10 */
-		"	std	11,0x258(1)\n"	/* store f11 */
-		"	std	12,0x260(1)\n"	/* store f12 */
-		"	std	13,0x268(1)\n"	/* store f13 */
-		"	std	14,0x270(1)\n"	/* store f14 */
-		"	std	15,0x278(1)\n"	/* store f15 */
-		"	stmg	0,15,0x280(1)\n"/* store general registers */
-		"	stctg	0,15,0x380(1)\n"/* store control registers */
-		"	oi	0x384(1),0x10\n"/* fake protection bit */
-		"	lpswe	0(%1)"
-		: "=m" (ctl_buf)
-		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
-	while (1);
-}
+	psw_t psw;
 
-/*
- * Use to set psw mask except for the first byte which
- * won't be changed by this function.
- */
-static inline void
-__set_psw_mask(unsigned long mask)
-{
-	__load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
+	psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+	psw.addr = code;
+	__load_psw(psw);
+	while (1);
 }
 
-#define local_mcck_enable() \
-	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
-#define local_mcck_disable() \
-	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
-
 /*
  * Basic Machine Check/Program Check Handler.
  */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 6feda2599282..37cbc50947f2 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -6,13 +6,14 @@
 #ifndef _S390_PTRACE_H
 #define _S390_PTRACE_H
 
+#include <linux/const.h>
 #include <uapi/asm/ptrace.h>
 
 #define PIF_SYSCALL		0	/* inside a system call */
 #define PIF_PER_TRAP		1	/* deliver sigtrap on return to user */
 
-#define _PIF_SYSCALL		(1<<PIF_SYSCALL)
-#define _PIF_PER_TRAP		(1<<PIF_PER_TRAP)
+#define _PIF_SYSCALL		_BITUL(PIF_SYSCALL)
+#define _PIF_PER_TRAP		_BITUL(PIF_PER_TRAP)
 
 #ifndef __ASSEMBLY__
 
@@ -128,17 +129,17 @@ struct per_struct_kernel {
 
 static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	regs->flags |= (1U << flag);
+	regs->flags |= (1UL << flag);
 }
 
 static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	regs->flags &= ~(1U << flag);
+	regs->flags &= ~(1UL << flag);
 }
 
 static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
 {
-	return !!(regs->flags & (1U << flag));
+	return !!(regs->flags & (1UL << flag));
 }
 
 /*
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b8ffc1bd0a9f..23537661da0e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -5,11 +5,38 @@
 #ifndef _ASM_S390_SETUP_H
 #define _ASM_S390_SETUP_H
 
+#include <linux/const.h>
 #include <uapi/asm/setup.h>
 
 
 #define PARMAREA		0x10400
 
+/*
+ * Machine features detected in head.S
+ */
+
+#define MACHINE_FLAG_VM		_BITUL(0)
+#define MACHINE_FLAG_IEEE	_BITUL(1)
+#define MACHINE_FLAG_CSP	_BITUL(2)
+#define MACHINE_FLAG_MVPG	_BITUL(3)
+#define MACHINE_FLAG_DIAG44	_BITUL(4)
+#define MACHINE_FLAG_IDTE	_BITUL(5)
+#define MACHINE_FLAG_DIAG9C	_BITUL(6)
+#define MACHINE_FLAG_KVM	_BITUL(8)
+#define MACHINE_FLAG_ESOP	_BITUL(9)
+#define MACHINE_FLAG_EDAT1	_BITUL(10)
+#define MACHINE_FLAG_EDAT2	_BITUL(11)
+#define MACHINE_FLAG_LPAR	_BITUL(12)
+#define MACHINE_FLAG_LPP	_BITUL(13)
+#define MACHINE_FLAG_TOPOLOGY	_BITUL(14)
+#define MACHINE_FLAG_TE		_BITUL(15)
+#define MACHINE_FLAG_TLB_LC	_BITUL(17)
+#define MACHINE_FLAG_VX		_BITUL(18)
+#define MACHINE_FLAG_CAD	_BITUL(19)
+
+#define LPP_MAGIC		_BITUL(31)
+#define LPP_PFAULT_PID_MASK	_AC(0xffffffff, UL)
+
 #ifndef __ASSEMBLY__
 
 #include <asm/lowcore.h>
@@ -28,29 +55,6 @@ extern unsigned long max_physmem_end;
 
 extern void detect_memory_memblock(void);
 
-/*
- * Machine features detected in head.S
- */
-
-#define MACHINE_FLAG_VM		(1UL << 0)
-#define MACHINE_FLAG_IEEE	(1UL << 1)
-#define MACHINE_FLAG_CSP	(1UL << 2)
-#define MACHINE_FLAG_MVPG	(1UL << 3)
-#define MACHINE_FLAG_DIAG44	(1UL << 4)
-#define MACHINE_FLAG_IDTE	(1UL << 5)
-#define MACHINE_FLAG_DIAG9C	(1UL << 6)
-#define MACHINE_FLAG_KVM	(1UL << 8)
-#define MACHINE_FLAG_ESOP	(1UL << 9)
-#define MACHINE_FLAG_EDAT1	(1UL << 10)
-#define MACHINE_FLAG_EDAT2	(1UL << 11)
-#define MACHINE_FLAG_LPAR	(1UL << 12)
-#define MACHINE_FLAG_LPP	(1UL << 13)
-#define MACHINE_FLAG_TOPOLOGY	(1UL << 14)
-#define MACHINE_FLAG_TE		(1UL << 15)
-#define MACHINE_FLAG_TLB_LC	(1UL << 17)
-#define MACHINE_FLAG_VX		(1UL << 18)
-#define MACHINE_FLAG_CAD	(1UL << 19)
-
 #define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
 #define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
 #define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 0e37cd041241..63ebf37d3143 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -87,7 +87,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	typecheck(unsigned int, lp->lock);
 	asm volatile(
-		__ASM_BARRIER
 		"st	%1,%0\n"
 		: "+Q" (lp->lock)
 		: "d" (0)
@@ -169,7 +168,6 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
 							\
 	typecheck(unsigned int *, ptr);			\
 	asm volatile(					\
-		"bcr	14,0\n"				\
 		op_string "	%0,%2,%1\n"		\
 		: "=d" (old_val), "+Q" (*ptr)		\
 		: "d" (op_val)				\
@@ -243,7 +241,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 
 	rw->owner = 0;
 	asm volatile(
-		__ASM_BARRIER
 		"st	%1,%0\n"
 		: "+Q" (rw->lock)
 		: "d" (0)
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index dcadfde32265..12d45f0cfdd9 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,7 +8,7 @@
 #define __ASM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/ptrace.h>
 
 extern struct task_struct *__switch_to(void *, void *);
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 4c27ec764c36..692b9247c019 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -7,6 +7,8 @@
 #ifndef _ASM_THREAD_INFO_H
 #define _ASM_THREAD_INFO_H
 
+#include <linux/const.h>
+
 /*
  * Size of kernel stack for each process
  */
@@ -83,16 +85,16 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_BLOCK_STEP		20	/* This task is block stepped */
 #define TIF_UPROBE_SINGLESTEP	21	/* This task is uprobe single stepped */
 
-#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
-#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
-#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
-#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
-#define _TIF_UPROBE		(1<<TIF_UPROBE)
-#define _TIF_31BIT		(1<<TIF_31BIT)
-#define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
+#define _TIF_NOTIFY_RESUME	_BITUL(TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		_BITUL(TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	_BITUL(TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE	_BITUL(TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	_BITUL(TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		_BITUL(TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT	_BITUL(TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE		_BITUL(TIF_UPROBE)
+#define _TIF_31BIT		_BITUL(TIF_31BIT)
+#define _TIF_SINGLE_STEP	_BITUL(TIF_SINGLE_STEP)
 
 #define is_32bit_task()		(test_thread_flag(TIF_31BIT))
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 27ebde643933..94fc55fc72ce 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -68,7 +68,7 @@ static inline int cpu_to_node(int cpu)
 #define cpumask_of_node cpumask_of_node
 static inline const struct cpumask *cpumask_of_node(int node)
 {
-	return node_to_cpumask_map[node];
+	return &node_to_cpumask_map[node];
 }
 
 /*
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
new file mode 100644
index 000000000000..776f307960cc
--- /dev/null
+++ b/arch/s390/include/asm/trace/diag.h
@@ -0,0 +1,43 @@
+/*
+ * Tracepoint header for s390 diagnose calls
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM s390
+
+#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_S390_DIAG_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm/trace
+#define TRACE_INCLUDE_FILE diag
+
+TRACE_EVENT(diagnose,
+	TP_PROTO(unsigned short nr),
+	TP_ARGS(nr),
+	TP_STRUCT__entry(
+		__field(unsigned short, nr)
+	),
+	TP_fast_assign(
+		__entry->nr = nr;
+	),
+	TP_printk("nr=0x%x", __entry->nr)
+);
+
+#ifdef CONFIG_TRACEPOINTS
+void trace_diagnose_norecursion(int diag_nr);
+#else
+static inline void trace_diagnose_norecursion(int diag_nr) { }
+#endif
+
+#endif /* _TRACE_S390_DIAG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index b756c6348ac6..dc167a23b920 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -66,6 +66,8 @@ obj-$(CONFIG_UPROBES)		+= uprobes.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o
 
+obj-$(CONFIG_TRACEPOINTS)	+= trace.o
+
 # vdso
 obj-y				+= vdso64/
 obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 48c9af7a7683..9cd248f637c7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -23,59 +23,64 @@
 
 int main(void)
 {
-	DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack));
-	DEFINE(__TASK_thread, offsetof(struct task_struct, thread));
-	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+	/* task struct offsets */
+	OFFSET(__TASK_thread_info, task_struct, stack);
+	OFFSET(__TASK_thread, task_struct, thread);
+	OFFSET(__TASK_pid, task_struct, pid);
 	BLANK();
-	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
-	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
-	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
-	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
-	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
-	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
-	DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid));
-	DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb));
+	/* thread struct offsets */
+	OFFSET(__THREAD_ksp, thread_struct, ksp);
+	OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
+	OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
+	OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
+	OFFSET(__THREAD_per_address, thread_struct, per_event.address);
+	OFFSET(__THREAD_per_paid, thread_struct, per_event.paid);
+	OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
 	BLANK();
-	DEFINE(__TI_task, offsetof(struct thread_info, task));
-	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
-	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
-	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
-	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
-	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
-	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
-	DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
+	/* thread info offsets */
+	OFFSET(__TI_task, thread_info, task);
+	OFFSET(__TI_flags, thread_info, flags);
+	OFFSET(__TI_sysc_table, thread_info, sys_call_table);
+	OFFSET(__TI_cpu, thread_info, cpu);
+	OFFSET(__TI_precount, thread_info, preempt_count);
+	OFFSET(__TI_user_timer, thread_info, user_timer);
+	OFFSET(__TI_system_timer, thread_info, system_timer);
+	OFFSET(__TI_last_break, thread_info, last_break);
 	BLANK();
-	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
-	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
-	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
-	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
-	DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
-	DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
-	DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
-	DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
+	/* pt_regs offsets */
+	OFFSET(__PT_ARGS, pt_regs, args);
+	OFFSET(__PT_PSW, pt_regs, psw);
+	OFFSET(__PT_GPRS, pt_regs, gprs);
+	OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+	OFFSET(__PT_INT_CODE, pt_regs, int_code);
+	OFFSET(__PT_INT_PARM, pt_regs, int_parm);
+	OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long);
+	OFFSET(__PT_FLAGS, pt_regs, flags);
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
-	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
-	DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
-	DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+	/* stack_frame offsets */
+	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+	OFFSET(__SF_GPRS, stack_frame, gprs);
+	OFFSET(__SF_EMPTY, stack_frame, empty1);
 	BLANK();
 	/* timeval/timezone offsets for use by vdso */
-	DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
-	DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
-	DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
-	DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
-	DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
-	DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
-	DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
-	DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
-	DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
-	DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
-	DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
-	DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
-	DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
-	DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
-	DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
-	DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+	OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
+	OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
+	OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
+	OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
+	OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
+	OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
+	OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
+	OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
+	OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
+	OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
+	OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
+	OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
+	OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
+	OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
+	OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
+	OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
+	BLANK();
 	/* constants used by the vdso */
 	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
 	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
@@ -86,101 +91,105 @@ int main(void)
 	DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
 	BLANK();
 	/* idle data offsets */
-	DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
-	DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
-	DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
-	DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
-	/* lowcore offsets */
-	DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
-	DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
-	DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
-	DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
-	DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
-	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
-	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
-	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-	DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
-	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
-	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
-	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-	DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
-	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
-	DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
-	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
-	DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
-	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
-	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
-	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
-	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
-	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
-	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
-	DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
-	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
-	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
-	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
-	DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
-	DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
-	DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
-	DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
-	DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
-	DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
-	DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
-	DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
-	DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+	OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
+	OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+	OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit);
 	BLANK();
-	DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
-	DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
-	DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
-	DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
-	DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
-	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
-	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
-	DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
-	DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
-	DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
-	DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
-	DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
-	DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
-	DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
-	DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
-	DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
-	DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
-	DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
-	DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
-	DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
-	DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
-	DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
-	DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
-	DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
-	DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
-	DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
-	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
-	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
-	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
-	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
-	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+	/* hardware defined lowcore locations 0x000 - 0x1ff */
+	OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params);
+	OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr);
+	OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code);
+	OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc);
+	OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code);
+	OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc);
+	OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code);
+	OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code);
+	OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num);
+	OFFSET(__LC_PER_CODE, _lowcore, per_code);
+	OFFSET(__LC_PER_ATMID, _lowcore, per_atmid);
+	OFFSET(__LC_PER_ADDRESS, _lowcore, per_address);
+	OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id);
+	OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id);
+	OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id);
+	OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id);
+	OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code);
+	OFFSET(__LC_MON_CODE, _lowcore, monitor_code);
+	OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id);
+	OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr);
+	OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm);
+	OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word);
+	OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list);
+	OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code);
+	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address);
+	OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr);
+	OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw);
+	OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw);
+	OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw);
+	OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw);
+	OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw);
+	OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw);
+	OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw);
+	OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw);
+	OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw);
+	OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw);
+	OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw);
+	OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw);
+	/* software defined lowcore locations 0x200 - 0xdff*/
+	OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync);
+	OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart);
+	OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags);
+	OFFSET(__LC_RETURN_PSW, _lowcore, return_psw);
+	OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw);
+	OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer);
+	OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer);
+	OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer);
+	OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer);
+	OFFSET(__LC_USER_TIMER, _lowcore, user_timer);
+	OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer);
+	OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer);
+	OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer);
+	OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock);
+	OFFSET(__LC_INT_CLOCK, _lowcore, int_clock);
+	OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock);
+	OFFSET(__LC_CURRENT, _lowcore, current_task);
+	OFFSET(__LC_THREAD_INFO, _lowcore, thread_info);
+	OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack);
+	OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack);
+	OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack);
+	OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack);
+	OFFSET(__LC_RESTART_FN, _lowcore, restart_fn);
+	OFFSET(__LC_RESTART_DATA, _lowcore, restart_data);
+	OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source);
+	OFFSET(__LC_USER_ASCE, _lowcore, user_asce);
+	OFFSET(__LC_LPP, _lowcore, lpp);
+	OFFSET(__LC_CURRENT_PID, _lowcore, current_pid);
+	OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset);
+	OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data);
+	OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags);
+	OFFSET(__LC_GMAP, _lowcore, gmap);
+	OFFSET(__LC_PASTE, _lowcore, paste);
+	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+	OFFSET(__LC_DUMP_REIPL, _lowcore, ipib);
+	/* hardware defined lowcore locations 0x1000 - 0x18ff */
+	OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr);
+	OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2);
+	OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area);
+	OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area);
+	OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area);
+	OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area);
+	OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area);
+	OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area);
+	OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area);
+	OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area);
+	OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area);
+	OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area);
+	OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb);
 	BLANK();
-	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
-	DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
-	DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
-	DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
-	DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
-	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
-	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
-	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
-	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
-	DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
-	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
-	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
-	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
-	DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
-	DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
-	DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
-	DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
-	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
-	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
-	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
-	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+	/* gmap/sie offsets */
+	OFFSET(__GMAP_ASCE, gmap, asce);
+	OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+	OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
 	return 0;
 }
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index e0f9d270b30f..66c94417c0ba 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -249,7 +249,7 @@ static int save_sigregs_ext32(struct pt_regs *regs,
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -277,7 +277,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -470,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
 			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 199ec92ef4fe..7f768914fb4f 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/cpcmd.h>
 #include <asm/io.h>
@@ -70,6 +71,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 	memcpy(cpcmd_buf, cmd, cmdlen);
 	ASCEBC(cpcmd_buf, cmdlen);
 
+	diag_stat_inc(DIAG_STAT_X008);
 	if (response) {
 		memset(response, 0, rlen);
 		response_len = rlen;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 0c6c01eb3613..171e09bb8ea2 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -32,16 +32,6 @@ static struct memblock_type oldmem_type = {
 	.regions = &oldmem_region,
 };
 
-#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid)		\
-	for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE,		\
-				     &memblock.physmem,			\
-				     &oldmem_type, p_start,		\
-				     p_end, p_nid);			\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\
-			      &oldmem_type,				\
-			      p_start, p_end, p_nid))
-
 struct dump_save_areas dump_save_areas;
 
 /*
@@ -515,7 +505,8 @@ static int get_mem_chunk_cnt(void)
 	int cnt = 0;
 	u64 idx;
 
-	for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+			   MEMBLOCK_NONE, NULL, NULL, NULL)
 		cnt++;
 	return cnt;
 }
@@ -528,7 +519,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
 	phys_addr_t start, end;
 	u64 idx;
 
-	for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
+			   MEMBLOCK_NONE, &start, &end, NULL) {
 		phdr->p_filesz = end - start;
 		phdr->p_type = PT_LOAD;
 		phdr->p_offset = start;
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 2f69243bf700..f98766ede4e1 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -6,12 +6,137 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <asm/diag.h>
+#include <asm/trace/diag.h>
+
+struct diag_stat {
+	unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+	int code;
+	char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+	[DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+	[DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+	[DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+	[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+	[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+	[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+	[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+	[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+	[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+	[DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+	[DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+	[DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+	[DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+	[DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+	[DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+	struct diag_stat *stat;
+	unsigned long n = (unsigned long) v - 1;
+	int cpu, prec, tmp;
+
+	get_online_cpus();
+	if (n == 0) {
+		seq_puts(m, "         ");
+
+		for_each_online_cpu(cpu) {
+			prec = 10;
+			for (tmp = 10; cpu >= tmp; tmp *= 10)
+				prec--;
+			seq_printf(m, "%*s%d", prec, "CPU", cpu);
+		}
+		seq_putc(m, '\n');
+	} else if (n <= NR_DIAG_STAT) {
+		seq_printf(m, "diag %03x:", diag_map[n-1].code);
+		for_each_online_cpu(cpu) {
+			stat = &per_cpu(diag_stat, cpu);
+			seq_printf(m, " %10u", stat->counter[n-1]);
+		}
+		seq_printf(m, "    %s\n", diag_map[n-1].name);
+	}
+	put_online_cpus();
+	return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+	.start	= show_diag_stat_start,
+	.next	= show_diag_stat_next,
+	.stop	= show_diag_stat_stop,
+	.show	= show_diag_stat,
+};
+
+static int show_diag_stat_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &show_diag_stat_sops);
+}
+
+static const struct file_operations show_diag_stat_fops = {
+	.open		= show_diag_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+
+static int __init show_diag_stat_init(void)
+{
+	debugfs_create_file("diag_stat", 0400, NULL, NULL,
+			    &show_diag_stat_fops);
+	return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
 
 /*
  * Diagnose 14: Input spool file manipulation
  */
-int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+static inline int __diag14(unsigned long rx, unsigned long ry1,
+			   unsigned long subcode)
 {
 	register unsigned long _ry1 asm("2") = ry1;
 	register unsigned long _ry2 asm("3") = subcode;
@@ -29,6 +154,12 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 
 	return rc;
 }
+
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+	diag_stat_inc(DIAG_STAT_X014);
+	return __diag14(rx, ry1, subcode);
+}
 EXPORT_SYMBOL(diag14);
 
 /*
@@ -48,6 +179,7 @@ int diag210(struct diag210 *addr)
 	spin_lock_irqsave(&diag210_lock, flags);
 	diag210_tmp = *addr;
 
+	diag_stat_inc(DIAG_STAT_X210);
 	asm volatile(
 		"	lhi	%0,-1\n"
 		"	sam31\n"
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 549a73a4b543..3c31609df959 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -17,6 +17,7 @@
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
+#include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
@@ -286,6 +287,7 @@ static __init void detect_diag9c(void)
 	int rc;
 
 	cpu_address = stap();
+	diag_stat_inc(DIAG_STAT_X09C);
 	asm volatile(
 		"	diag	%2,0,0x9c\n"
 		"0:	la	%0,0\n"
@@ -300,6 +302,7 @@ static __init void detect_diag44(void)
 {
 	int rc;
 
+	diag_stat_inc(DIAG_STAT_X044);
 	asm volatile(
 		"	diag	0,0,0x44\n"
 		"0:	la	%0,0\n"
@@ -326,9 +329,19 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
 	if (test_facility(51))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
-	if (test_facility(129))
+	if (test_facility(129)) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+		__ctl_set_bit(0, 17);
+	}
+}
+
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	__ctl_clear_bit(0, 17);
+	return 1;
 }
+early_param("novx", disable_vector_extension);
 
 static int __init cad_setup(char *str)
 {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 09b039d7983d..857b6526d298 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,8 +20,9 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
-#include <asm/fpu-internal.h>
 #include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
 
 __PT_R0      =	__PT_GPRS
 __PT_R1      =	__PT_GPRS + 8
@@ -139,6 +140,28 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 #endif
 	.endm
 
+	/*
+	 * The TSTMSK macro generates a test-under-mask instruction by
+	 * calculating the memory offset for the specified mask value.
+	 * Mask value can be any constant.  The macro shifts the mask
+	 * value to calculate the memory offset for the test-under-mask
+	 * instruction.
+	 */
+	.macro TSTMSK addr, mask, size=8, bytepos=0
+		.if (\bytepos < \size) && (\mask >> 8)
+			.if (\mask & 0xff)
+				.error "Mask exceeds byte boundary"
+			.endif
+			TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+			.exitm
+		.endif
+		.ifeq \mask
+			.error "Mask must not be zero"
+		.endif
+		off = \size - \bytepos - 1
+		tm	off+\addr, \mask
+	.endm
+
 	.section .kprobes.text, "ax"
 
 /*
@@ -164,8 +187,11 @@ ENTRY(__switch_to)
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1)		# load kernel stack of next
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
-	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+	bzr	%r14
+	.insn	s,0xb2800000,__LC_LPP		# set program parameter
 	br	%r14
 
 .L__critical_start:
@@ -180,8 +206,8 @@ ENTRY(sie64a)
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
 	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
 	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU	# load guest fp/vx registers ?
+	xc	__SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU		# load guest fp/vx registers ?
 	jno	.Lsie_load_guest_gprs
 	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
 .Lsie_load_guest_gprs:
@@ -195,16 +221,9 @@ ENTRY(sie64a)
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
 	jnz	.Lsie_skip
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsie_skip			# exit if fp/vx regs changed
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.Lsie_enter
-	.insn	s,0xb2800000,__LC_CURRENT_PID	# set guest id to pid
-.Lsie_enter:
 	sie	0(%r14)
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	.Lsie_skip
-	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
 .Lsie_skip:
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
@@ -221,11 +240,11 @@ sie_exit:
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
 	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	lg	%r2,__SF_EMPTY+16(%r15)		# return exit reason code
 	br	%r14
 .Lsie_fault:
 	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
 	j	sie_exit
 
 	EX_TABLE(.Lrewind_pad,.Lsie_fault)
@@ -271,7 +290,7 @@ ENTRY(system_call)
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lgf	%r9,0(%r8,%r10)			# get system call add.
-	tm	__TI_flags+7(%r12),_TIF_TRACE
+	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jnz	.Lsysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	stg	%r2,__PT_R2(%r11)		# store return value
@@ -279,11 +298,11 @@ ENTRY(system_call)
 .Lsysc_return:
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
-	tm	__PT_FLAGS+7(%r11),_PIF_WORK
+	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
 	jnz	.Lsysc_work
-	tm	__TI_flags+7(%r12),_TIF_WORK
+	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lsysc_work			# check for work
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lsysc_work
 .Lsysc_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
@@ -299,23 +318,23 @@ ENTRY(system_call)
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jo	.Lsysc_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lsysc_reschedule
 #ifdef CONFIG_UPROBES
-	tm	__TI_flags+7(%r12),_TIF_UPROBE
+	TSTMSK	__TI_flags(%r12),_TIF_UPROBE
 	jo	.Lsysc_uprobe_notify
 #endif
-	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
+	TSTMSK	__PT_FLAGS(%r11),_PIF_PER_TRAP
 	jo	.Lsysc_singlestep
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
 	jo	.Lsysc_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsysc_vxrs
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE
 	jo	.Lsysc_uaccess
 	j	.Lsysc_return		# beware of critical section cleanup
 
@@ -354,7 +373,7 @@ ENTRY(system_call)
 .Lsysc_sigpending:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
-	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
+	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL
 	jno	.Lsysc_return
 	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
@@ -414,7 +433,7 @@ ENTRY(system_call)
 	basr	%r14,%r9		# call sys_xxx
 	stg	%r2,__PT_R2(%r11)	# store return value
 .Lsysc_tracenogo:
-	tm	__TI_flags+7(%r12),_TIF_TRACE
+	TSTMSK	__TI_flags(%r12),_TIF_TRACE
 	jz	.Lsysc_return
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	larl	%r14,.Lsysc_return
@@ -544,6 +563,8 @@ ENTRY(io_int_handler)
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+	jo	.Lio_restore
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
@@ -554,7 +575,7 @@ ENTRY(io_int_handler)
 	lghi	%r3,THIN_INTERRUPT
 .Lio_call:
 	brasl	%r14,do_IRQ
-	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR
 	jz	.Lio_return
 	tpi	0
 	jz	.Lio_return
@@ -564,9 +585,9 @@ ENTRY(io_int_handler)
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 .Lio_tif:
-	tm	__TI_flags+7(%r12),_TIF_WORK
+	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lio_work		# there is work to do (signals etc.)
-	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lio_work
 .Lio_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
@@ -594,7 +615,7 @@ ENTRY(io_int_handler)
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r12)
 	jnz	.Lio_restore		# preemption is disabled
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jno	.Lio_restore
 	# switch to kernel stack
 	lg	%r1,__PT_R15(%r11)
@@ -626,17 +647,17 @@ ENTRY(io_int_handler)
 # One of the work bits is on. Find out which one.
 #
 .Lio_work_tif:
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jo	.Lio_mcck_pending
-	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lio_reschedule
-	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	TSTMSK	__TI_flags(%r12),_TIF_SIGPENDING
 	jo	.Lio_sigpending
-	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lio_notify_resume
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lio_vxrs
-	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	TSTMSK	__LC_CPU_FLAGS,_CIF_ASCE
 	jo	.Lio_uaccess
 	j	.Lio_return		# beware of critical section cleanup
 
@@ -719,6 +740,8 @@ ENTRY(ext_int_handler)
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
+	jo	.Lio_restore
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
@@ -733,6 +756,14 @@ ENTRY(psw_idle)
 	stg	%r3,__SF_EMPTY(%r15)
 	larl	%r1,.Lpsw_idle_lpsw+4
 	stg	%r1,__SF_EMPTY+8(%r15)
+#ifdef CONFIG_SMP
+	larl	%r1,smp_cpu_mtid
+	llgf	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	.Lpsw_idle_stcctm
+	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
+.Lpsw_idle_stcctm:
+#endif
 	STCK	__CLOCK_IDLE_ENTER(%r2)
 	stpt	__TIMER_IDLE_ENTER(%r2)
 .Lpsw_idle_lpsw:
@@ -740,27 +771,22 @@ ENTRY(psw_idle)
 	br	%r14
 .Lpsw_idle_end:
 
-/* Store floating-point controls and floating-point or vector extension
- * registers instead.  A critical section cleanup assures that the registers
- * are stored even if interrupted for some other work.	The register %r2
- * designates a struct fpu to store register contents.	If the specified
- * structure does not contain a register save area, the register store is
- * omitted (see also comments in arch_dup_task_struct()).
- *
- * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
- * of the register contents at system call or io return.
+/*
+ * Store floating-point controls and floating-point or vector register
+ * depending whether the vector facility is available.	A critical section
+ * cleanup assures that the registers are stored even if interrupted for
+ * some other work.  The CIF_FPU flag is set to trigger a lazy restore
+ * of the register contents at return from io or a system call.
  */
 ENTRY(save_fpu_regs)
 	lg	%r2,__LC_CURRENT
 	aghi	%r2,__TASK_thread
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bor	%r14
 	stfpc	__THREAD_FPU_fpc(%r2)
 .Lsave_fpu_regs_fpc_end:
 	lg	%r3,__THREAD_FPU_regs(%r2)
-	ltgr	%r3,%r3
-	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
 .Lsave_fpu_regs_vx_low:
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -789,41 +815,30 @@ ENTRY(save_fpu_regs)
 	br	%r14
 .Lsave_fpu_regs_end:
 
-/* Load floating-point controls and floating-point or vector extension
- * registers.  A critical section cleanup assures that the register contents
- * are loaded even if interrupted for some other work.	Depending on the saved
- * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+/*
+ * Load floating-point controls and floating-point or vector registers.
+ * A critical section cleanup assures that the register contents are
+ * loaded even if interrupted for some other work.
  *
  * There are special calling conventions to fit into sysc and io return work:
  *	%r15:	<kernel stack>
  * The function requires:
- *	%r4 and __SF_EMPTY+32(%r15)
+ *	%r4
  */
 load_fpu_regs:
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bnor	%r14
 	lfpc	__THREAD_FPU_fpc(%r4)
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
-.Lload_fpu_regs_vx_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	.Lload_fpu_regs_vx
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+	jz	.Lload_fpu_regs_fp		# -> no VX, load FP regs
 .Lload_fpu_regs_vx:
 	VLM	%v0,%v15,0,%r4
 .Lload_fpu_regs_vx_high:
 	VLM	%v16,%v31,256,%r4
 	j	.Lload_fpu_regs_done
-.Lload_fpu_regs_fp_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	.Lload_fpu_regs_fp
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 .Lload_fpu_regs_fp:
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -846,16 +861,6 @@ load_fpu_regs:
 	br	%r14
 .Lload_fpu_regs_end:
 
-/* Test and set the vector enablement control in CR0.46 */
-ENTRY(__ctl_set_vx)
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	br	%r14
-.L__ctl_set_vx_end:
-
 .L__critical_end:
 
 /*
@@ -870,11 +875,11 @@ ENTRY(mcck_int_handler)
 	lg	%r12,__LC_THREAD_INFO
 	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
-	tm	__LC_MCCK_CODE,0x80	# system damage?
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
 	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
 	jo	3f
 	la	%r14,__LC_SYNC_ENTER_TIMER
 	clc	0(8,%r14),__LC_ASYNC_ENTER_TIMER
@@ -888,7 +893,7 @@ ENTRY(mcck_int_handler)
 	la	%r14,__LC_LAST_UPDATE_TIMER
 2:	spt	0(%r14)
 	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
-3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
+3:	TSTMSK	__LC_MCCK_CODE,(MCCK_CODE_PSW_MWP_VALID|MCCK_CODE_PSW_IA_VALID)
 	jno	.Lmcck_panic		# no -> skip cleanup critical
 	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
 .Lmcck_skip:
@@ -908,7 +913,7 @@ ENTRY(mcck_int_handler)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
 	jno	.Lmcck_return
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
@@ -933,7 +938,10 @@ ENTRY(mcck_int_handler)
 # PSW restart interrupt handler
 #
 ENTRY(restart_int_handler)
-	stg	%r15,__LC_SAVE_AREA_RESTART
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
+	jz	0f
+	.insn	s,0xb2800000,__LC_LPP
+0:	stg	%r15,__LC_SAVE_AREA_RESTART
 	lg	%r15,__LC_RESTART_STACK
 	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
 	xc	0(__PT_SIZE,%r15),0(%r15)
@@ -1011,10 +1019,6 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
-	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
 	.align	8
@@ -1033,8 +1037,6 @@ cleanup_critical:
 	.quad	.Lsave_fpu_regs_end
 	.quad	load_fpu_regs
 	.quad	.Lload_fpu_regs_end
-	.quad	__ctl_set_vx
-	.quad	.L__ctl_set_vx_end
 
 #if IS_ENABLED(CONFIG_KVM)
 .Lcleanup_table_sie:
@@ -1043,10 +1045,7 @@ cleanup_critical:
 
 .Lcleanup_sie:
 	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
-	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
-	jz	0f
-	.insn	s,0xb2800000,__SF_EMPTY+16(%r15)# set host id
-0:	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	br	%r14
@@ -1159,7 +1158,27 @@ cleanup_critical:
 	jhe	1f
 	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
 	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1:	# account system time going idle
+1:	# calculate idle cycles
+#ifdef CONFIG_SMP
+	clg	%r9,BASED(.Lcleanup_idle_insn)
+	jl	3f
+	larl	%r1,smp_cpu_mtid
+	llgf	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	3f
+	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
+	larl	%r3,mt_cycles
+	ag	%r3,__LC_PERCPU_OFFSET
+	la	%r4,__SF_EMPTY+16(%r15)
+2:	lg	%r0,0(%r3)
+	slg	%r0,0(%r4)
+	alg	%r0,64(%r4)
+	stg	%r0,0(%r3)
+	la	%r3,8(%r3)
+	la	%r4,8(%r4)
+	brct	%r1,2b
+#endif
+3:	# account system time going idle
 	lg	%r9,__LC_STEAL_TIMER
 	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
 	slg	%r9,__LC_LAST_UPDATE_CLOCK
@@ -1178,7 +1197,7 @@ cleanup_critical:
 	.quad	.Lpsw_idle_lpsw
 
 .Lcleanup_save_fpu_regs:
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bor	%r14
 	clg	%r9,BASED(.Lcleanup_save_fpu_regs_done)
 	jhe	5f
@@ -1196,9 +1215,7 @@ cleanup_critical:
 	stfpc	__THREAD_FPU_fpc(%r2)
 1:	# Load register save area and check if VX is active
 	lg	%r3,__THREAD_FPU_regs(%r2)
-	ltgr	%r3,%r3
-	jz	5f			  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	jz	4f			  # no VX -> store FP regs
 2:	# Store vector registers (V0-V15)
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -1238,43 +1255,27 @@ cleanup_critical:
 	.quad	.Lsave_fpu_regs_done
 
 .Lcleanup_load_fpu_regs:
-	tm	__LC_CPU_FLAGS+7,_CIF_FPU
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	bnor	%r14
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_done)
 	jhe	1f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
 	jhe	2f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
-	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
-	jhe	4f
+	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
-	jhe	5f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
-	jhe	6f
+	jhe	4f
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
 	lfpc	__THREAD_FPU_fpc(%r4)
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	3f				# -> no VX, load FP regs
-6:	# Set VX-enablement control
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	5f
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
-5:	# Load V0 ..V15 registers
+	jz	2f				# -> no VX, load FP regs
+4:	# Load V0 ..V15 registers
 	VLM	%v0,%v15,0,%r4
-4:	# Load V16..V31 registers
+3:	# Load V16..V31 registers
 	VLM	%v16,%v31,256,%r4
 	j	1f
-3:	# Clear VX-enablement control for FP
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	2f
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 2:	# Load floating-point registers
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -1296,28 +1297,15 @@ cleanup_critical:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 	lg	%r9,48(%r11)		# return from load_fpu_regs
 	br	%r14
-.Lcleanup_load_fpu_regs_vx_ctl:
-	.quad	.Lload_fpu_regs_vx_ctl
 .Lcleanup_load_fpu_regs_vx:
 	.quad	.Lload_fpu_regs_vx
 .Lcleanup_load_fpu_regs_vx_high:
 	.quad	.Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp_ctl:
-	.quad	.Lload_fpu_regs_fp_ctl
 .Lcleanup_load_fpu_regs_fp:
 	.quad	.Lload_fpu_regs_fp
 .Lcleanup_load_fpu_regs_done:
 	.quad	.Lload_fpu_regs_done
 
-.Lcleanup___ctl_set_vx:
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	lg	%r9,48(%r11)		# return from __ctl_set_vx
-	br	%r14
-
 /*
  * Integer constants
  */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 834df047d35f..b7019ab74070 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -16,13 +16,10 @@ void io_int_handler(void);
 void mcck_int_handler(void);
 void restart_int_handler(void);
 void restart_call_handler(void);
-void psw_idle(struct s390_idle_data *, unsigned long);
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
-int alloc_vector_registers(struct task_struct *tsk);
-
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
 
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index d7c00507568a..58b719fa8067 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -16,7 +16,12 @@
 
 __HEAD
 ENTRY(startup_continue)
-	larl	%r1,sched_clock_base_cc
+	tm	__LC_STFL_FAC_LIST+6,0x80	# LPP available ?
+	jz	0f
+	xc	__LC_LPP+1(7,0),__LC_LPP+1	# clear lpp and current_pid
+	mvi	__LC_LPP,0x80			#   and set LPP_MAGIC
+	.insn	s,0xb2800000,__LC_LPP		# load program parameter
+0:	larl	%r1,sched_clock_base_cc
 	mvc	0(8,%r1),__LC_LAST_UPDATE_CLOCK
 	larl	%r13,.LPG1		# get base
 	lctlg	%c0,%c15,.Lctl-.LPG1(%r13)	# load control registers
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 52fbef91d1d9..f6d8acd7e136 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -17,6 +17,7 @@
 #include <linux/gfp.h>
 #include <linux/crash_dump.h>
 #include <linux/debug_locks.h>
+#include <asm/diag.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -165,7 +166,7 @@ static struct ipl_parameter_block *dump_block_ccw;
 
 static struct sclp_ipl_info sclp_ipl_info;
 
-int diag308(unsigned long subcode, void *addr)
+static inline int __diag308(unsigned long subcode, void *addr)
 {
 	register unsigned long _addr asm("0") = (unsigned long) addr;
 	register unsigned long _rc asm("1") = 0;
@@ -178,6 +179,12 @@ int diag308(unsigned long subcode, void *addr)
 		: "d" (subcode) : "cc", "memory");
 	return _rc;
 }
+
+int diag308(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X308);
+	return __diag308(subcode, addr);
+}
 EXPORT_SYMBOL_GPL(diag308);
 
 /* SYSFS */
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e9d9addfaa44..f41d5208aaf7 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -69,7 +69,6 @@ static const struct irq_class irqclass_sub_desc[] = {
 	{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
 	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
 	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
-	{.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
 	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
 	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
 	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0ae6f8e74840..07302ce37648 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -21,19 +21,20 @@
 #include <asm/nmi.h>
 #include <asm/crw.h>
 #include <asm/switch_to.h>
-#include <asm/fpu-internal.h>
 #include <asm/ctl_reg.h>
 
 struct mcck_struct {
-	int kill_task;
-	int channel_report;
-	int warning;
-	unsigned long long mcck_code;
+	unsigned int kill_task : 1;
+	unsigned int channel_report : 1;
+	unsigned int warning : 1;
+	unsigned int etr_queue : 1;
+	unsigned int stp_queue : 1;
+	unsigned long mcck_code;
 };
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
 
-static void s390_handle_damage(char *msg)
+static void s390_handle_damage(void)
 {
 	smp_send_stop();
 	disabled_wait((unsigned long) __builtin_return_address(0));
@@ -81,10 +82,14 @@ void s390_handle_mcck(void)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
+	if (mcck.etr_queue)
+		etr_queue_work();
+	if (mcck.stp_queue)
+		stp_queue_work();
 	if (mcck.kill_task) {
 		local_irq_enable();
 		printk(KERN_EMERG "mcck: Terminating task because of machine "
-		       "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+		       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 		       current->comm, current->pid);
 		do_exit(SIGSEGV);
@@ -96,7 +101,7 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
  * returns 0 if all registers could be validated
  * returns 1 otherwise
  */
-static int notrace s390_revalidate_registers(struct mci *mci)
+static int notrace s390_validate_registers(union mci mci)
 {
 	int kill_task;
 	u64 zero;
@@ -105,14 +110,14 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 	kill_task = 0;
 	zero = 0;
 
-	if (!mci->gr) {
+	if (!mci.gr) {
 		/*
 		 * General purpose registers couldn't be restored and have
 		 * unknown contents. Process needs to be terminated.
 		 */
 		kill_task = 1;
 	}
-	if (!mci->fp) {
+	if (!mci.fp) {
 		/*
 		 * Floating point registers can't be restored and
 		 * therefore the process needs to be terminated.
@@ -121,7 +126,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 	}
 	fpt_save_area = &S390_lowcore.floating_pt_save_area;
 	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
-	if (!mci->fc) {
+	if (!mci.fc) {
 		/*
 		 * Floating point control register can't be restored.
 		 * Task will be terminated.
@@ -132,7 +137,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
 	if (!MACHINE_HAS_VX) {
-		/* Revalidate floating point registers */
+		/* Validate floating point registers */
 		asm volatile(
 			"	ld	0,0(%0)\n"
 			"	ld	1,8(%0)\n"
@@ -152,10 +157,10 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			"	ld	15,120(%0)\n"
 			: : "a" (fpt_save_area));
 	} else {
-		/* Revalidate vector registers */
+		/* Validate vector registers */
 		union ctlreg0 cr0;
 
-		if (!mci->vr) {
+		if (!mci.vr) {
 			/*
 			 * Vector registers can't be restored and therefore
 			 * the process needs to be terminated.
@@ -173,38 +178,38 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 				 &S390_lowcore.vector_save_area) : "1");
 		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
 	}
-	/* Revalidate access registers */
+	/* Validate access registers */
 	asm volatile(
 		"	lam	0,15,0(%0)"
 		: : "a" (&S390_lowcore.access_regs_save_area));
-	if (!mci->ar) {
+	if (!mci.ar) {
 		/*
 		 * Access registers have unknown contents.
 		 * Terminating task.
 		 */
 		kill_task = 1;
 	}
-	/* Revalidate control registers */
-	if (!mci->cr) {
+	/* Validate control registers */
+	if (!mci.cr) {
 		/*
 		 * Control registers have unknown contents.
 		 * Can't recover and therefore stopping machine.
 		 */
-		s390_handle_damage("invalid control registers.");
+		s390_handle_damage();
 	} else {
 		asm volatile(
 			"	lctlg	0,15,0(%0)"
 			: : "a" (&S390_lowcore.cregs_save_area));
 	}
 	/*
-	 * We don't even try to revalidate the TOD register, since we simply
+	 * We don't even try to validate the TOD register, since we simply
 	 * can't write something sensible into that register.
 	 */
 	/*
-	 * See if we can revalidate the TOD programmable register with its
+	 * See if we can validate the TOD programmable register with its
 	 * old contents (should be zero) otherwise set it to zero.
 	 */
-	if (!mci->pr)
+	if (!mci.pr)
 		asm volatile(
 			"	sr	0,0\n"
 			"	sckpf"
@@ -215,17 +220,17 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			"	sckpf"
 			: : "a" (&S390_lowcore.tod_progreg_save_area)
 			: "0", "cc");
-	/* Revalidate clock comparator register */
+	/* Validate clock comparator register */
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	/* Check if old PSW is valid */
-	if (!mci->wp)
+	if (!mci.wp)
 		/*
 		 * Can't tell if we come from user or kernel mode
 		 * -> stopping machine.
 		 */
-		s390_handle_damage("old psw invalid.");
+		s390_handle_damage();
 
-	if (!mci->ms || !mci->pm || !mci->ia)
+	if (!mci.ms || !mci.pm || !mci.ia)
 		kill_task = 1;
 
 	return kill_task;
@@ -249,21 +254,21 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	static unsigned long long last_ipd;
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
-	struct mci *mci;
+	union mci mci;
 	int umode;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
-	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	mci.val = S390_lowcore.mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
 	umode = user_mode(regs);
 
-	if (mci->sd) {
+	if (mci.sd) {
 		/* System damage -> stopping machine */
-		s390_handle_damage("received system damage machine check.");
+		s390_handle_damage();
 	}
-	if (mci->pd) {
-		if (mci->b) {
+	if (mci.pd) {
+		if (mci.b) {
 			/* Processing backup -> verify if we can survive this */
 			u64 z_mcic, o_mcic, t_mcic;
 			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
@@ -271,12 +276,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 				  1ULL<<16);
-			t_mcic = *(u64 *)mci;
+			t_mcic = mci.val;
 
 			if (((t_mcic & z_mcic) != 0) ||
 			    ((t_mcic & o_mcic) != o_mcic)) {
-				s390_handle_damage("processing backup machine "
-						   "check with damage.");
+				s390_handle_damage();
 			}
 
 			/*
@@ -291,64 +295,62 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 				ipd_count = 1;
 			last_ipd = tmp;
 			if (ipd_count == MAX_IPD_COUNT)
-				s390_handle_damage("too many ipd retries.");
+				s390_handle_damage();
 			spin_unlock(&ipd_lock);
 		} else {
 			/* Processing damage -> stopping machine */
-			s390_handle_damage("received instruction processing "
-					   "damage machine check.");
+			s390_handle_damage();
 		}
 	}
-	if (s390_revalidate_registers(mci)) {
+	if (s390_validate_registers(mci)) {
 		if (umode) {
 			/*
 			 * Couldn't restore all register contents while in
 			 * user mode -> mark task for termination.
 			 */
 			mcck->kill_task = 1;
-			mcck->mcck_code = *(unsigned long long *) mci;
+			mcck->mcck_code = mci.val;
 			set_cpu_flag(CIF_MCCK_PENDING);
 		} else {
 			/*
 			 * Couldn't restore all register contents while in
 			 * kernel mode -> stopping machine.
 			 */
-			s390_handle_damage("unable to revalidate registers.");
+			s390_handle_damage();
 		}
 	}
-	if (mci->cd) {
+	if (mci.cd) {
 		/* Timing facility damage */
-		s390_handle_damage("TOD clock damaged");
+		s390_handle_damage();
 	}
-	if (mci->ed && mci->ec) {
+	if (mci.ed && mci.ec) {
 		/* External damage */
 		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			etr_sync_check();
+			mcck->etr_queue |= etr_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			etr_switch_to_local();
+			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
-			stp_sync_check();
+			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
-			stp_island_check();
+			mcck->stp_queue |= stp_island_check();
+		if (mcck->etr_queue || mcck->stp_queue)
+			set_cpu_flag(CIF_MCCK_PENDING);
 	}
-	if (mci->se)
+	if (mci.se)
 		/* Storage error uncorrected */
-		s390_handle_damage("received storage error uncorrected "
-				   "machine check.");
-	if (mci->ke)
+		s390_handle_damage();
+	if (mci.ke)
 		/* Storage key-error uncorrected */
-		s390_handle_damage("received storage key-error uncorrected "
-				   "machine check.");
-	if (mci->ds && mci->fa)
+		s390_handle_damage();
+	if (mci.ds && mci.fa)
 		/* Storage degradation */
-		s390_handle_damage("received storage degradation machine "
-				   "check.");
-	if (mci->cp) {
+		s390_handle_damage();
+	if (mci.cp) {
 		/* Channel report word pending */
 		mcck->channel_report = 1;
 		set_cpu_flag(CIF_MCCK_PENDING);
 	}
-	if (mci->w) {
+	if (mci.w) {
 		/* Warning pending */
 		mcck->warning = 1;
 		set_cpu_flag(CIF_MCCK_PENDING);
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index a9563409c36e..929c147e07b4 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -72,6 +72,7 @@ struct cpu_hw_events {
 	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
 	u64			state, tx_state;
 	unsigned int		flags;
+	unsigned int		txn_flags;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.ctr_set = {
@@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	},
 	.state = 0,
 	.flags = 0,
+	.txn_flags = 0,
 };
 
 static int get_counter_set(u64 event)
@@ -538,7 +540,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
 	 * For group events transaction, the authorization check is
 	 * done in cpumf_pmu_commit_txn().
 	 */
-	if (!(cpuhw->flags & PERF_EVENT_TXN))
+	if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
 		if (validate_ctr_auth(&event->hw))
 			return -ENOENT;
 
@@ -576,13 +578,22 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
 /*
  * Start group events scheduling transaction.
  * Set flags to perform a single test at commit time.
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void cpumf_pmu_start_txn(struct pmu *pmu)
+static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
-	cpuhw->flags |= PERF_EVENT_TXN;
 	cpuhw->tx_state = cpuhw->state;
 }
 
@@ -593,11 +604,18 @@ static void cpumf_pmu_start_txn(struct pmu *pmu)
  */
 static void cpumf_pmu_cancel_txn(struct pmu *pmu)
 {
+	unsigned int txn_flags;
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	WARN_ON(cpuhw->tx_state != cpuhw->state);
 
-	cpuhw->flags &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -611,13 +629,20 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 	u64 state;
 
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuhw->txn_flags = 0;
+		return 0;
+	}
+
 	/* check if the updated state can be scheduled */
 	state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 	state >>= CPUMF_LCCTL_ENABLE_SHIFT;
 	if ((state & cpuhw->info.auth_ctl) != state)
 		return -ENOENT;
 
-	cpuhw->flags &= ~PERF_EVENT_TXN;
+	cpuhw->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b973972f6ba5..3d8da1e742c2 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1019,11 +1019,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 		break;
 	}
 
-	/* The host-program-parameter (hpp) contains the pid of
-	 * the CPU thread as set by sie64a() in entry.S.
-	 * If non-zero assume a guest sample.
+	/*
+	 * A non-zero guest program parameter indicates a guest
+	 * sample.
+	 * Note that some early samples might be misaccounted to
+	 * the host.
 	 */
-	if (sfr->basic.hpp)
+	if (sfr->basic.gpp)
 		sde_regs->in_guest = 1;
 
 	overflow = 0;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f2dac9f0799d..688a3aad9c79 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/init_task.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/vtimer.h>
@@ -36,6 +37,9 @@
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
+/* FPU save area for the init task */
+__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data;
+
 /*
  * Return saved PC of a blocked thread. used in kernel/sched.
  * resume in entry.S does not create a new stack frame, it
@@ -87,31 +91,29 @@ void arch_release_task_struct(struct task_struct *tsk)
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
+	size_t fpu_regs_size;
+
 	*dst = *src;
 
-	/* Set up a new floating-point register save area */
-	dst->thread.fpu.fpc = 0;
-	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
-	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-				       GFP_KERNEL|__GFP_REPEAT);
-	if (!dst->thread.fpu.fprs)
+	/*
+	 * If the vector extension is available, it is enabled for all tasks,
+	 * and, thus, the FPU register save area must be allocated accordingly.
+	 */
+	fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
+				       : sizeof(freg_t) * __NUM_FPRS;
+	dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.regs)
 		return -ENOMEM;
 
 	/*
 	 * Save the floating-point or vector register state of the current
-	 * task.  The state is not saved for early kernel threads, for example,
-	 * the init_task, which do not have an allocated save area.
-	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
-	 * state when switching to a different task or returning to user space.
+	 * task and set the CIF_FPU flag to lazy restore the FPU register
+	 * state when returning to user space.
 	 */
 	save_fpu_regs();
 	dst->thread.fpu.fpc = current->thread.fpu.fpc;
-	if (is_vx_task(current))
-		convert_vx_to_fp(dst->thread.fpu.fprs,
-				 current->thread.fpu.vxrs);
-	else
-		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
-		       sizeof(freg_t) * __NUM_FPRS);
+	memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
+
 	return 0;
 }
 
@@ -169,7 +171,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 
 	/* Don't copy runtime instrumentation info */
 	p->thread.ri_cb = NULL;
-	p->thread.ri_signum = 0;
 	frame->childregs.psw.mask &= ~PSW_MASK_RI;
 
 	/* Set a new TLS ?  */
@@ -199,7 +200,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 	save_fpu_regs();
 	fpregs->fpc = current->thread.fpu.fpc;
 	fpregs->pad = 0;
-	if (is_vx_task(current))
+	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs,
 				 current->thread.fpu.vxrs);
 	else
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index e6e077ae3990..7ce00e7a709a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -11,6 +11,7 @@
 #include <linux/seq_file.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
+#include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
@@ -20,8 +21,10 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
 
 void notrace cpu_relax(void)
 {
-	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
+	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
+		diag_stat_inc(DIAG_STAT_X044);
 		asm volatile("diag 0,0,0x44");
+	}
 	barrier();
 }
 EXPORT_SYMBOL(cpu_relax);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8b1c8e33f184..01c37b36caf9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -239,12 +239,12 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(addr_t *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -383,12 +383,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(addr_t *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fpu.fprs + offset) = data;
+				child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -617,12 +617,12 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(__u32 *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -742,12 +742,12 @@ static int __poke_user_compat(struct task_struct *child,
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(__u32 *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fpu.fprs + offset) = tmp;
+				child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -981,7 +981,7 @@ static int s390_fpregs_set(struct task_struct *target,
 	if (rc)
 		return rc;
 
-	if (is_vx_task(target))
+	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
 	else
 		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
@@ -1047,13 +1047,10 @@ static int s390_vxrs_low_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1067,11 +1064,7 @@ static int s390_vxrs_low_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1091,13 +1084,10 @@ static int s390_vxrs_high_get(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
-		       sizeof(vxrs));
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
+
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1110,11 +1100,7 @@ static int s390_vxrs_high_set(struct task_struct *target,
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 26b4ae96fdd7..fffa0e5462af 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,11 +18,6 @@
 /* empty control block to disable RI by loading it */
 struct runtime_instr_cb runtime_instr_empty_cb;
 
-static int runtime_instr_avail(void)
-{
-	return test_facility(64);
-}
-
 static void disable_runtime_instr(void)
 {
 	struct pt_regs *regs = task_pt_regs(current);
@@ -40,7 +35,6 @@ static void disable_runtime_instr(void)
 static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
 {
 	cb->buf_limit = 0xfff;
-	cb->int_requested = 1;
 	cb->pstate = 1;
 	cb->pstate_set_buf = 1;
 	cb->pstate_sample = 1;
@@ -57,46 +51,14 @@ void exit_thread_runtime_instr(void)
 		return;
 	disable_runtime_instr();
 	kfree(task->thread.ri_cb);
-	task->thread.ri_signum = 0;
 	task->thread.ri_cb = NULL;
 }
 
-static void runtime_instr_int_handler(struct ext_code ext_code,
-				unsigned int param32, unsigned long param64)
-{
-	struct siginfo info;
-
-	if (!(param32 & CPU_MF_INT_RI_MASK))
-		return;
-
-	inc_irq_stat(IRQEXT_CMR);
-
-	if (!current->thread.ri_cb)
-		return;
-	if (current->thread.ri_signum < SIGRTMIN ||
-	    current->thread.ri_signum > SIGRTMAX) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
-	memset(&info, 0, sizeof(info));
-	info.si_signo = current->thread.ri_signum;
-	info.si_code = SI_QUEUE;
-	if (param32 & CPU_MF_INT_RI_BUF_FULL)
-		info.si_int = ENOBUFS;
-	else if (param32 & CPU_MF_INT_RI_HALTED)
-		info.si_int = ECANCELED;
-	else
-		return; /* unknown reason */
-
-	send_sig_info(current->thread.ri_signum, &info, current);
-}
-
-SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+SYSCALL_DEFINE1(s390_runtime_instr, int, command)
 {
 	struct runtime_instr_cb *cb;
 
-	if (!runtime_instr_avail())
+	if (!test_facility(64))
 		return -EOPNOTSUPP;
 
 	if (command == S390_RUNTIME_INSTR_STOP) {
@@ -106,8 +68,7 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
 		return 0;
 	}
 
-	if (command != S390_RUNTIME_INSTR_START ||
-	    (signum < SIGRTMIN || signum > SIGRTMAX))
+	if (command != S390_RUNTIME_INSTR_START)
 		return -EINVAL;
 
 	if (!current->thread.ri_cb) {
@@ -120,7 +81,6 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
 	}
 
 	init_runtime_instr_cb(cb);
-	current->thread.ri_signum = signum;
 
 	/* now load the control block to make it available */
 	preempt_disable();
@@ -129,21 +89,3 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
 	preempt_enable();
 	return 0;
 }
-
-static int __init runtime_instr_init(void)
-{
-	int rc;
-
-	if (!runtime_instr_avail())
-		return 0;
-
-	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
-				   runtime_instr_int_handler);
-	if (rc)
-		irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-	else
-		pr_info("Runtime instrumentation facility initialized\n");
-	return rc;
-}
-device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 5090d3dad10b..e67453b73c3c 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
@@ -10,7 +10,6 @@ EXPORT_SYMBOL(_mcount);
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 EXPORT_SYMBOL(save_fpu_regs);
-EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 9549af102d75..028cc46cb82a 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -179,7 +179,7 @@ static int save_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -199,7 +199,7 @@ static int restore_sigregs_ext(struct pt_regs *regs,
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -381,8 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index c6355e6f3fcc..9062df575afe 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -33,6 +33,7 @@
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
+#include <asm/diag.h>
 #include <asm/switch_to.h>
 #include <asm/facility.h>
 #include <asm/ipl.h>
@@ -261,6 +262,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
 		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->thread_info = (unsigned long) task_thread_info(tsk);
 	lc->current_task = (unsigned long) tsk;
+	lc->lpp = LPP_MAGIC;
+	lc->current_pid = tsk->pid;
 	lc->user_timer = ti->user_timer;
 	lc->system_timer = ti->system_timer;
 	lc->steal_timer = 0;
@@ -375,11 +378,14 @@ int smp_vcpu_scheduled(int cpu)
 
 void smp_yield_cpu(int cpu)
 {
-	if (MACHINE_HAS_DIAG9C)
+	if (MACHINE_HAS_DIAG9C) {
+		diag_stat_inc_norecursion(DIAG_STAT_X09C);
 		asm volatile("diag %0,0,0x9c"
 			     : : "d" (pcpu_devices[cpu].address));
-	else if (MACHINE_HAS_DIAG44)
+	} else if (MACHINE_HAS_DIAG44) {
+		diag_stat_inc_norecursion(DIAG_STAT_X044);
 		asm volatile("diag 0,0,0x44");
+	}
 }
 
 /*
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 017c3a9bfc28..99f84ac31307 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -542,16 +542,17 @@ arch_initcall(etr_init);
  * Switch to local machine check. This is called when the last usable
  * ETR port goes inactive. After switch to local the clock is not in sync.
  */
-void etr_switch_to_local(void)
+int etr_switch_to_local(void)
 {
 	if (!etr_eacr.sl)
-		return;
+		return 0;
 	disable_sync_clock(NULL);
 	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
 		etr_eacr.es = etr_eacr.sl = 0;
 		etr_setr(&etr_eacr);
-		queue_work(time_sync_wq, &etr_work);
+		return 1;
 	}
+	return 0;
 }
 
 /*
@@ -560,16 +561,22 @@ void etr_switch_to_local(void)
  * After a ETR sync check the clock is not in sync. The machine check
  * is broadcasted to all cpus at the same time.
  */
-void etr_sync_check(void)
+int etr_sync_check(void)
 {
 	if (!etr_eacr.es)
-		return;
+		return 0;
 	disable_sync_clock(NULL);
 	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
 		etr_eacr.es = 0;
 		etr_setr(&etr_eacr);
-		queue_work(time_sync_wq, &etr_work);
+		return 1;
 	}
+	return 0;
+}
+
+void etr_queue_work(void)
+{
+	queue_work(time_sync_wq, &etr_work);
 }
 
 /*
@@ -1504,10 +1511,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm)
  * After a STP sync check the clock is not in sync. The machine check
  * is broadcasted to all cpus at the same time.
  */
-void stp_sync_check(void)
+int stp_sync_check(void)
 {
 	disable_sync_clock(NULL);
-	queue_work(time_sync_wq, &stp_work);
+	return 1;
 }
 
 /*
@@ -1516,12 +1523,16 @@ void stp_sync_check(void)
  * have matching CTN ids and have a valid stratum-1 configuration
  * but the configurations do not match.
  */
-void stp_island_check(void)
+int stp_island_check(void)
 {
 	disable_sync_clock(NULL);
-	queue_work(time_sync_wq, &stp_work);
+	return 1;
 }
 
+void stp_queue_work(void)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
 
 static int stp_sync_clock(void *data)
 {
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bf05e7fc3e70..40b8102fdadb 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -84,6 +84,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 					  struct mask_info *socket,
 					  int one_socket_per_cpu)
 {
+	struct cpu_topology_s390 *topo;
 	unsigned int core;
 
 	for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
@@ -95,15 +96,16 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 		if (lcpu < 0)
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
-			per_cpu(cpu_topology, lcpu + i).book_id = book->id;
-			per_cpu(cpu_topology, lcpu + i).core_id = rcore;
-			per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
+			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->book_id = book->id;
+			topo->core_id = rcore;
+			topo->thread_id = lcpu + i;
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
 			if (one_socket_per_cpu)
-				per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
+				topo->socket_id = rcore;
 			else
-				per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
+				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
 		if (one_socket_per_cpu)
@@ -247,17 +249,19 @@ int topology_set_cpu_management(int fc)
 
 static void update_cpu_masks(void)
 {
+	struct cpu_topology_s390 *topo;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
-		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
-		per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
+		topo = &per_cpu(cpu_topology, cpu);
+		topo->thread_mask = cpu_thread_map(cpu);
+		topo->core_mask = cpu_group_map(&socket_info, cpu);
+		topo->book_mask = cpu_group_map(&book_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
-			per_cpu(cpu_topology, cpu).thread_id = cpu;
-			per_cpu(cpu_topology, cpu).core_id = cpu;
-			per_cpu(cpu_topology, cpu).socket_id = cpu;
-			per_cpu(cpu_topology, cpu).book_id = cpu;
+			topo->thread_id = cpu;
+			topo->core_id = cpu;
+			topo->socket_id = cpu;
+			topo->book_id = cpu;
 		}
 	}
 	numa_update_cpu_topology();
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 000000000000..73239bb576c4
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,29 @@
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void trace_diagnose_norecursion(int diag_nr)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+	depth = this_cpu_ptr(&diagnose_trace_depth);
+	if (*depth == 0) {
+		(*depth)++;
+		trace_diagnose(diag_nr);
+		(*depth)--;
+	}
+	local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 9861613fb35a..1b18118bbc06 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -19,7 +19,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <asm/fpu-internal.h>
+#include <asm/fpu/api.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -224,29 +224,6 @@ NOKPROBE_SYMBOL(illegal_op);
 DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 	      "specification exception");
 
-int alloc_vector_registers(struct task_struct *tsk)
-{
-	__vector128 *vxrs;
-	freg_t *fprs;
-
-	/* Allocate vector register save area. */
-	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
-		       GFP_KERNEL|__GFP_REPEAT);
-	if (!vxrs)
-		return -ENOMEM;
-	preempt_disable();
-	if (tsk == current)
-		save_fpu_regs();
-	/* Copy the 16 floating point registers */
-	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
-	fprs = tsk->thread.fpu.fprs;
-	tsk->thread.fpu.vxrs = vxrs;
-	tsk->thread.fpu.flags |= FPU_USE_VX;
-	kfree(fprs);
-	preempt_enable();
-	return 0;
-}
-
 void vector_exception(struct pt_regs *regs)
 {
 	int si_code, vic;
@@ -281,13 +258,6 @@ void vector_exception(struct pt_regs *regs)
 	do_trap(regs, SIGFPE, si_code, "vector exception");
 }
 
-static int __init disable_vector_extension(char *str)
-{
-	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
-	return 1;
-}
-__setup("novx", disable_vector_extension);
-
 void data_exception(struct pt_regs *regs)
 {
 	__u16 __user *location;
@@ -296,15 +266,6 @@ void data_exception(struct pt_regs *regs)
 	location = get_trap_ip(regs);
 
 	save_fpu_regs();
-	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !is_vx_task(current) &&
-	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
-		alloc_vector_registers(current);
-		/* Vector data exception is suppressing, rewind psw. */
-		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
-		clear_pt_regs_flag(regs, PIF_PER_TRAP);
-		return;
-	}
 	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 0d58269ff425..59eddb0e1a3e 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -299,7 +299,7 @@ static int __init vdso_init(void)
 
 	get_page(virt_to_page(vdso_data));
 
-	smp_wmb();
+	smp_mb();
 
 	return 0;
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c8653435c70d..dafc44f519c3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -25,7 +25,7 @@ static DEFINE_SPINLOCK(virt_timer_lock);
 static atomic64_t virt_timer_current;
 static atomic64_t virt_timer_elapsed;
 
-static DEFINE_PER_CPU(u64, mt_cycles[32]);
+DEFINE_PER_CPU(u64, mt_cycles[8]);
 static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
 static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
@@ -60,6 +60,34 @@ static inline int virt_timer_forward(u64 elapsed)
 	return elapsed >= atomic64_read(&virt_timer_current);
 }
 
+static void update_mt_scaling(void)
+{
+	u64 cycles_new[8], *cycles_old;
+	u64 delta, fac, mult, div;
+	int i;
+
+	stcctm5(smp_cpu_mtid + 1, cycles_new);
+	cycles_old = this_cpu_ptr(mt_cycles);
+	fac = 1;
+	mult = div = 0;
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		delta = cycles_new[i] - cycles_old[i];
+		div += delta;
+		mult *= i + 1;
+		mult += delta * fac;
+		fac *= i + 1;
+	}
+	div *= fac;
+	if (div > 0) {
+		/* Update scaling factor */
+		__this_cpu_write(mt_scaling_mult, mult);
+		__this_cpu_write(mt_scaling_div, div);
+		memcpy(cycles_old, cycles_new,
+		       sizeof(u64) * (smp_cpu_mtid + 1));
+	}
+	__this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
 /*
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
@@ -69,7 +97,6 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, clock, user, system, steal;
 	u64 user_scaled, system_scaled;
-	int i;
 
 	timer = S390_lowcore.last_update_timer;
 	clock = S390_lowcore.last_update_clock;
@@ -85,34 +112,10 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
-	/* Do MT utilization calculation */
+	/* Update MT utilization calculation */
 	if (smp_cpu_mtid &&
-	    time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) {
-		u64 cycles_new[32], *cycles_old;
-		u64 delta, fac, mult, div;
-
-		cycles_old = this_cpu_ptr(mt_cycles);
-		if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) {
-			fac = 1;
-			mult = div = 0;
-			for (i = 0; i <= smp_cpu_mtid; i++) {
-				delta = cycles_new[i] - cycles_old[i];
-				div += delta;
-				mult *= i + 1;
-				mult += delta * fac;
-				fac *= i + 1;
-			}
-			div *= fac;
-			if (div > 0) {
-				/* Update scaling factor */
-				__this_cpu_write(mt_scaling_mult, mult);
-				__this_cpu_write(mt_scaling_div, div);
-				memcpy(cycles_old, cycles_new,
-				       sizeof(u64) * (smp_cpu_mtid + 1));
-			}
-		}
-		__this_cpu_write(mt_scaling_jiffies, jiffies_64);
-	}
+	    time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+		update_mt_scaling();
 
 	user = S390_lowcore.user_timer - ti->user_timer;
 	S390_lowcore.steal_timer -= user;
@@ -181,6 +184,11 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 	S390_lowcore.last_update_timer = get_vtimer();
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 
+	/* Update MT utilization calculation */
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+		update_mt_scaling();
+
 	system = S390_lowcore.system_timer - ti->system_timer;
 	S390_lowcore.steal_timer -= system;
 	ti->system_timer = S390_lowcore.system_timer;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 7365e8a46032..b4a5aa110cec 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -336,28 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
 	return -EOPNOTSUPP;
 }
 
-static const intercept_handler_t intercept_funcs[] = {
-	[0x00 >> 2] = handle_noop,
-	[0x04 >> 2] = handle_instruction,
-	[0x08 >> 2] = handle_prog,
-	[0x10 >> 2] = handle_noop,
-	[0x14 >> 2] = handle_external_interrupt,
-	[0x18 >> 2] = handle_noop,
-	[0x1C >> 2] = kvm_s390_handle_wait,
-	[0x20 >> 2] = handle_validity,
-	[0x28 >> 2] = handle_stop,
-	[0x38 >> 2] = handle_partial_execution,
-};
-
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
-	intercept_handler_t func;
-	u8 code = vcpu->arch.sie_block->icptcode;
-
-	if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
+	switch (vcpu->arch.sie_block->icptcode) {
+	case 0x00:
+	case 0x10:
+	case 0x18:
+		return handle_noop(vcpu);
+	case 0x04:
+		return handle_instruction(vcpu);
+	case 0x08:
+		return handle_prog(vcpu);
+	case 0x14:
+		return handle_external_interrupt(vcpu);
+	case 0x1c:
+		return kvm_s390_handle_wait(vcpu);
+	case 0x20:
+		return handle_validity(vcpu);
+	case 0x28:
+		return handle_stop(vcpu);
+	case 0x38:
+		return handle_partial_execution(vcpu);
+	default:
 		return -EOPNOTSUPP;
-	func = intercept_funcs[code >> 2];
-	if (func)
-		return func(vcpu);
-	return -EOPNOTSUPP;
+	}
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c2c169395c3..373e32346d68 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -51,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
 
 static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
 {
-	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
-	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
-	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
-		return 0;
-	return 1;
+	return psw_extint_disabled(vcpu) &&
+	       psw_ioint_disabled(vcpu) &&
+	       psw_mchk_disabled(vcpu);
 }
 
 static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
@@ -71,13 +69,8 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
 
 static int ckc_irq_pending(struct kvm_vcpu *vcpu)
 {
-	preempt_disable();
-	if (!(vcpu->arch.sie_block->ckc <
-	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
-		preempt_enable();
+	if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm))
 		return 0;
-	}
-	preempt_enable();
 	return ckc_interrupts_enabled(vcpu);
 }
 
@@ -109,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word)
 	return (int_word & 0x38000000) >> 27;
 }
 
-static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
 {
-	return vcpu->kvm->arch.float_int.pending_irqs;
-}
-
-static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.local_int.pending_irqs;
+	return vcpu->kvm->arch.float_int.pending_irqs |
+	       vcpu->arch.local_int.pending_irqs;
 }
 
 static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
@@ -135,8 +124,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 {
 	unsigned long active_mask;
 
-	active_mask = pending_local_irqs(vcpu);
-	active_mask |= pending_floating_irqs(vcpu);
+	active_mask = pending_irqs(vcpu);
 	if (!active_mask)
 		return 0;
 
@@ -204,7 +192,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 {
-	if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+	if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
 		return;
 	else if (psw_ioint_disabled(vcpu))
 		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
@@ -214,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
-	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+	if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
 		return;
 	if (psw_extint_disabled(vcpu))
 		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -224,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 
 static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
 {
-	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+	if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
 		return;
 	if (psw_mchk_disabled(vcpu))
 		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -815,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 {
-	int rc;
+	if (deliverable_irqs(vcpu))
+		return 1;
 
-	rc = !!deliverable_irqs(vcpu);
-
-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
-		rc = 1;
+	if (kvm_cpu_has_pending_timer(vcpu))
+		return 1;
 
 	/* external call pending and deliverable */
-	if (!rc && kvm_s390_ext_call_pending(vcpu) &&
+	if (kvm_s390_ext_call_pending(vcpu) &&
 	    !psw_extint_disabled(vcpu) &&
 	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul))
-		rc = 1;
-
-	if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
-		rc = 1;
+		return 1;
 
-	return rc;
+	if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
+		return 1;
+	return 0;
 }
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -846,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 	vcpu->stat.exit_wait_state++;
 
 	/* fast path */
-	if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
+	if (kvm_arch_vcpu_runnable(vcpu))
 		return 0;
 
 	if (psw_interrupts_disabled(vcpu)) {
@@ -860,9 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 		goto no_timer;
 	}
 
-	preempt_disable();
-	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
-	preempt_enable();
+	now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/* underflow */
@@ -901,9 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 	u64 now, sltime;
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
-	preempt_disable();
-	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
-	preempt_enable();
+	now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
 	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
 
 	/*
@@ -981,39 +963,30 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   irq->u.pgm.code, 0);
 
-	li->irq.pgm = irq->u.pgm;
+	if (irq->u.pgm.code == PGM_PER) {
+		li->irq.pgm.code |= PGM_PER;
+		/* only modify PER related information */
+		li->irq.pgm.per_address = irq->u.pgm.per_address;
+		li->irq.pgm.per_code = irq->u.pgm.per_code;
+		li->irq.pgm.per_atmid = irq->u.pgm.per_atmid;
+		li->irq.pgm.per_access_id = irq->u.pgm.per_access_id;
+	} else if (!(irq->u.pgm.code & PGM_PER)) {
+		li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
+				   irq->u.pgm.code;
+		/* only modify non-PER information */
+		li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
+		li->irq.pgm.mon_code = irq->u.pgm.mon_code;
+		li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code;
+		li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr;
+		li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id;
+		li->irq.pgm.op_access_id = irq->u.pgm.op_access_id;
+	} else {
+		li->irq.pgm = irq->u.pgm;
+	}
 	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
 	return 0;
 }
 
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
-{
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_irq irq;
-
-	spin_lock(&li->lock);
-	irq.u.pgm.code = code;
-	__inject_prog(vcpu, &irq);
-	BUG_ON(waitqueue_active(li->wq));
-	spin_unlock(&li->lock);
-	return 0;
-}
-
-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
-			     struct kvm_s390_pgm_info *pgm_info)
-{
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_irq irq;
-	int rc;
-
-	spin_lock(&li->lock);
-	irq.u.pgm = *pgm_info;
-	rc = __inject_prog(vcpu, &irq);
-	BUG_ON(waitqueue_active(li->wq));
-	spin_unlock(&li->lock);
-	return rc;
-}
-
 static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -1390,12 +1363,9 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 
 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
-	struct kvm_s390_float_interrupt *fi;
 	u64 type = READ_ONCE(inti->type);
 	int rc;
 
-	fi = &kvm->arch.float_int;
-
 	switch (type) {
 	case KVM_S390_MCHK:
 		rc = __inject_float_mchk(kvm, inti);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0a67c40eece9..8fe2f1c722dc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -514,35 +514,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 
 	if (gtod_high != 0)
 		return -EINVAL;
-	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
+	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 
 	return 0;
 }
 
 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-	struct kvm_vcpu *cur_vcpu;
-	unsigned int vcpu_idx;
-	u64 host_tod, gtod;
-	int r;
+	u64 gtod;
 
 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 		return -EFAULT;
 
-	r = store_tod_clock(&host_tod);
-	if (r)
-		return r;
-
-	mutex_lock(&kvm->lock);
-	preempt_disable();
-	kvm->arch.epoch = gtod - host_tod;
-	kvm_s390_vcpu_block_all(kvm);
-	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
-		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-	kvm_s390_vcpu_unblock_all(kvm);
-	preempt_enable();
-	mutex_unlock(&kvm->lock);
-	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
+	kvm_s390_set_tod_clock(kvm, gtod);
+	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 	return 0;
 }
 
@@ -574,26 +559,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
 					 sizeof(gtod_high)))
 		return -EFAULT;
-	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
+	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 
 	return 0;
 }
 
 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 {
-	u64 host_tod, gtod;
-	int r;
+	u64 gtod;
 
-	r = store_tod_clock(&host_tod);
-	if (r)
-		return r;
-
-	preempt_disable();
-	gtod = host_tod + kvm->arch.epoch;
-	preempt_enable();
+	gtod = kvm_s390_get_tod_clock_fast(kvm);
 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 		return -EFAULT;
-	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
+	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 
 	return 0;
 }
@@ -1120,7 +1098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (!kvm->arch.sca)
 		goto out_err;
 	spin_lock(&kvm_lock);
-	sca_offset = (sca_offset + 16) & 0x7f0;
+	sca_offset += 16;
+	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+		sca_offset = 0;
 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
 	spin_unlock(&kvm_lock);
 
@@ -1292,7 +1272,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 static inline void save_fpu_to(struct fpu *dst)
 {
 	dst->fpc = current->thread.fpu.fpc;
-	dst->flags = current->thread.fpu.flags;
 	dst->regs = current->thread.fpu.regs;
 }
 
@@ -1303,7 +1282,6 @@ static inline void save_fpu_to(struct fpu *dst)
 static inline void load_fpu_from(struct fpu *from)
 {
 	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.flags = from->flags;
 	current->thread.fpu.regs = from->regs;
 }
 
@@ -1315,15 +1293,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		current->thread.fpu.flags = FPU_USE_VX;
 		/*
 		 * Use the register save area in the SIE-control block
 		 * for register restore and save in kvm_arch_vcpu_put()
 		 */
 		current->thread.fpu.vxrs =
 			(__vector128 *)&vcpu->run->s.regs.vrs;
-		/* Always enable the vector extension for KVM */
-		__ctl_set_vx();
 	} else
 		load_fpu_from(&vcpu->arch.guest_fpregs);
 
@@ -1916,6 +1891,22 @@ retry:
 	return 0;
 }
 
+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	mutex_lock(&kvm->lock);
+	preempt_disable();
+	kvm->arch.epoch = tod - get_tod_clock();
+	kvm_s390_vcpu_block_all(kvm);
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+	kvm_s390_vcpu_unblock_all(kvm);
+	preempt_enable();
+	mutex_unlock(&kvm->lock);
+}
+
 /**
  * kvm_arch_fault_in_page - fault-in guest page if necessary
  * @vcpu: The corresponding virtual cpu
@@ -2326,7 +2317,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 		 * registers and the FPC value and store them in the
 		 * guest_fpregs structure.
 		 */
-		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
 				 current->thread.fpu.vxrs);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index c446aabf60d3..1e70e00d3c5e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -175,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 	return kvm->arch.user_cpu_state_ctrl != 0;
 }
 
+/* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
@@ -185,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_irq *irq);
-int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_pgm_info *pgm_info)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_PROGRAM_INT,
+		.u.pgm = *pgm_info,
+	};
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_PROGRAM_INT,
+		.u.pgm.code = code,
+	};
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 isc_mask, u32 schid);
 int kvm_s390_reinject_io_int(struct kvm *kvm,
@@ -212,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
+void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
@@ -231,9 +251,6 @@ extern unsigned long kvm_s390_fac_list_mask[];
 
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
-/* implemented in interrupt.c */
-int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
-			     struct kvm_s390_pgm_info *pgm_info);
 
 static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
 {
@@ -254,6 +271,16 @@ static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
 		kvm_s390_vcpu_unblock(vcpu);
 }
 
+static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm)
+{
+	u64 rc;
+
+	preempt_disable();
+	rc = get_tod_clock_fast() + kvm->arch.epoch;
+	preempt_enable();
+	return rc;
+}
+
 /**
  * kvm_s390_inject_prog_cond - conditionally inject a program check
  * @vcpu: virtual cpu
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4d21dc4d1a84..77191b85ea7a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -33,11 +33,9 @@
 /* Handle SCK (SET CLOCK) interception */
 static int handle_set_clock(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *cpup;
-	s64 hostclk, val;
-	int i, rc;
+	int rc;
 	ar_t ar;
-	u64 op2;
+	u64 op2, val;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -49,19 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 
-	if (store_tod_clock(&hostclk)) {
-		kvm_s390_set_psw_cc(vcpu, 3);
-		return 0;
-	}
 	VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val);
-	val = (val - hostclk) & ~0x3fUL;
-
-	mutex_lock(&vcpu->kvm->lock);
-	preempt_disable();
-	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
-		cpup->arch.sie_block->epoch = val;
-	preempt_enable();
-	mutex_unlock(&vcpu->kvm->lock);
+	kvm_s390_set_tod_clock(vcpu->kvm, val);
 
 	kvm_s390_set_psw_cc(vcpu, 0);
 	return 0;
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 246a7eb4b680..501dcd4ca4a0 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -12,8 +12,10 @@
 #include <linux/module.h>
 #include <linux/irqflags.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <asm/vtimer.h>
 #include <asm/div64.h>
+#include <asm/idle.h>
 
 void __delay(unsigned long loops)
 {
@@ -30,26 +32,22 @@ EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-	unsigned long cr0, cr6, new;
-	u64 clock_saved, end;
+	unsigned long cr0, cr0_new, psw_mask;
+	struct s390_idle_data idle;
+	u64 end;
 
 	end = get_tod_clock() + (usecs << 12);
-	clock_saved = local_tick_disable();
 	__ctl_store(cr0, 0, 0);
-	__ctl_store(cr6, 6, 6);
-	new = (cr0 &  0xffff00e0) | 0x00000800;
-	__ctl_load(new , 0, 0);
-	new = 0;
-	__ctl_load(new, 6, 6);
-	lockdep_off();
-	do {
-		set_clock_comparator(end);
-		enabled_wait();
-	} while (get_tod_clock_fast() < end);
-	lockdep_on();
+	cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
+	cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
+	__ctl_load(cr0_new, 0, 0);
+	psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
+	set_clock_comparator(end);
+	set_cpu_flag(CIF_IGNORE_IRQ);
+	psw_idle(&idle, psw_mask);
+	clear_cpu_flag(CIF_IGNORE_IRQ);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	__ctl_load(cr0, 0, 0);
-	__ctl_load(cr6, 6, 6);
-	local_tick_enable(clock_saved);
 }
 
 static void __udelay_enabled(unsigned long long usecs)
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
index 922003c1b90d..d90b9245ea41 100644
--- a/arch/s390/lib/find.c
+++ b/arch/s390/lib/find.c
@@ -1,10 +1,8 @@
 /*
  * MSB0 numbered special bitops handling.
  *
- * On s390x the bits are numbered:
+ * The bits are numbered:
  *   |0..............63|64............127|128...........191|192...........255|
- * and on s390:
- *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
  *
  * The reason for this bit numbering is the fact that the hardware sets bits
  * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index d6c9991f7797..427aa44b3505 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -197,7 +197,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
 		}
 		old = ACCESS_ONCE(rw->lock);
 		owner = ACCESS_ONCE(rw->owner);
-		smp_rmb();
+		smp_mb();
 		if ((int) old >= 0) {
 			prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
 			old = prev;
@@ -231,7 +231,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
 		    _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
 			prev = old;
 		else
-			smp_rmb();
+			smp_mb();
 		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
 			break;
 		if (MACHINE_HAS_CAD)
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 23c496957c22..18fccc303db7 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/ctype.h>
 #include <linux/ioport.h>
+#include <asm/diag.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/ebcdic.h>
@@ -112,6 +113,7 @@ dcss_set_subcodes(void)
 	ry = DCSS_FINDSEGX;
 
 	strcpy(name, "dummy");
+	diag_stat_inc(DIAG_STAT_X064);
 	asm volatile(
 		"	diag	%0,%1,0x64\n"
 		"0:	ipm	%2\n"
@@ -205,6 +207,7 @@ dcss_diag(int *func, void *parameter,
 	ry = (unsigned long) *func;
 
 	/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+	diag_stat_inc(DIAG_STAT_X064);
 	if (*func > DCSS_SEGEXT)
 		asm volatile(
 			"	diag	%0,%1,0x64\n"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index f985856a538b..ec1a30d0d11a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -30,6 +30,7 @@
 #include <linux/uaccess.h>
 #include <linux/hugetlb.h>
 #include <asm/asm-offsets.h>
+#include <asm/diag.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
@@ -589,7 +590,7 @@ int pfault_init(void)
 		.reffcode = 0,
 		.refdwlen = 5,
 		.refversn = 2,
-		.refgaddr = __LC_CURRENT_PID,
+		.refgaddr = __LC_LPP,
 		.refselmk = 1ULL << 48,
 		.refcmpmk = 1ULL << 48,
 		.reserved = __PF_RES_FIELD };
@@ -597,6 +598,7 @@ int pfault_init(void)
 
 	if (pfault_disable)
 		return -1;
+	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%1,%0,0x258\n"
 		"0:	j	2f\n"
@@ -618,6 +620,7 @@ void pfault_fini(void)
 
 	if (pfault_disable)
 		return;
+	diag_stat_inc(DIAG_STAT_X258);
 	asm volatile(
 		"	diag	%0,0,0x258\n"
 		"0:\n"
@@ -646,7 +649,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 		return;
 	inc_irq_stat(IRQEXT_PFL);
 	/* Get the token (= pid of the affected task). */
-	pid = param64;
+	pid = param64 & LPP_PFAULT_PID_MASK;
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
 	if (tsk)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index fb4bf2c4379e..f81096b6940d 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -40,6 +40,7 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
 		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
 	} else
 		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
 	return pmd;
@@ -78,6 +79,7 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index eeda051442c3..9a0c4c22e536 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1310,7 +1310,7 @@ void bpf_int_jit_compile(struct bpf_prog *fp)
 	if (jit.prg_buf) {
 		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.prg_buf;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 free_addrs:
 	kfree(jit.addrs);
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 7de4e2f780d7..828d0695d0d4 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -368,7 +368,7 @@ static void topology_add_core(struct toptree *core)
 		cpumask_copy(&top->thread_mask, &core->mask);
 		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
 		cpumask_copy(&top->book_mask, &core_book(core)->mask);
-		cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]);
+		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
 		top->node_id = core_node(core)->id;
 	}
 }
@@ -383,7 +383,7 @@ static void toptree_to_topology(struct toptree *numa)
 
 	/* Clear all node masks */
 	for (i = 0; i < MAX_NUMNODES; i++)
-		cpumask_clear(node_to_cpumask_map[i]);
+		cpumask_clear(&node_to_cpumask_map[i]);
 
 	/* Rebuild all masks */
 	toptree_for_each(core, numa, CORE)
@@ -436,9 +436,15 @@ static void emu_update_cpu_topology(void)
  */
 static unsigned long emu_setup_size_adjust(unsigned long size)
 {
+	unsigned long size_new;
+
 	size = size ? : CONFIG_EMU_SIZE;
-	size = roundup(size, memory_block_size_bytes());
-	return size;
+	size_new = roundup(size, memory_block_size_bytes());
+	if (size_new == size)
+		return size;
+	pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
+		size >> 20, size_new >> 20);
+	return size_new;
 }
 
 /*
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 09b1d2355bd9..43f32ce60aa3 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -23,7 +23,7 @@
 pg_data_t *node_data[MAX_NUMNODES];
 EXPORT_SYMBOL(node_data);
 
-cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+cpumask_t node_to_cpumask_map[MAX_NUMNODES];
 EXPORT_SYMBOL(node_to_cpumask_map);
 
 const struct numa_mode numa_mode_plain = {
@@ -144,7 +144,7 @@ void __init numa_setup(void)
 static int __init numa_init_early(void)
 {
 	/* Attach all possible CPUs to node 0 for now. */
-	cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask);
+	cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
 	return 0;
 }
 early_initcall(numa_init_early);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 37505b8b4093..37d10f74425a 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -24,7 +24,7 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
 				  zdev->iommu_pages * PAGE_SIZE);
 }
 
-static unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(void)
 {
 	unsigned long *table, *entry;
 
@@ -114,12 +114,12 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr
 	return &pto[px];
 }
 
-static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
-				 dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr,
+			  dma_addr_t dma_addr, int flags)
 {
 	unsigned long *entry;
 
-	entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+	entry = dma_walk_cpu_trans(dma_table, dma_addr);
 	if (!entry) {
 		WARN_ON_ONCE(1);
 		return;
@@ -156,7 +156,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 		goto no_refresh;
 
 	for (i = 0; i < nr_pages; i++) {
-		dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
+		dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr,
+				     flags);
 		page_addr += PAGE_SIZE;
 		dma_addr += PAGE_SIZE;
 	}
@@ -181,7 +182,7 @@ no_refresh:
 	return rc;
 }
 
-static void dma_free_seg_table(unsigned long entry)
+void dma_free_seg_table(unsigned long entry)
 {
 	unsigned long *sto = get_rt_sto(entry);
 	int sx;
@@ -193,21 +194,18 @@ static void dma_free_seg_table(unsigned long entry)
 	dma_free_cpu_table(sto);
 }
 
-static void dma_cleanup_tables(struct zpci_dev *zdev)
+void dma_cleanup_tables(unsigned long *table)
 {
-	unsigned long *table;
 	int rtx;
 
-	if (!zdev || !zdev->dma_table)
+	if (!table)
 		return;
 
-	table = zdev->dma_table;
 	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
 		if (reg_entry_isvalid(table[rtx]))
 			dma_free_seg_table(table[rtx]);
 
 	dma_free_cpu_table(table);
-	zdev->dma_table = NULL;
 }
 
 static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
@@ -416,6 +414,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 {
 	int rc;
 
+	/*
+	 * At this point, if the device is part of an IOMMU domain, this would
+	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
+	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+	 */
+	WARN_ON(zdev->s390_domain);
+
 	spin_lock_init(&zdev->iommu_bitmap_lock);
 	spin_lock_init(&zdev->dma_table_lock);
 
@@ -450,8 +455,16 @@ out_clean:
 
 void zpci_dma_exit_device(struct zpci_dev *zdev)
 {
+	/*
+	 * At this point, if the device is part of an IOMMU domain, this would
+	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
+	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+	 */
+	WARN_ON(zdev->s390_domain);
+
 	zpci_unregister_ioat(zdev, 0);
-	dma_cleanup_tables(zdev);
+	dma_cleanup_tables(zdev->dma_table);
+	zdev->dma_table = NULL;
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
 	zdev->next_bit = 0;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index dcc2634ccbe2..10ca15dcab11 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -16,11 +16,11 @@
 static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
 {
 	struct {
-		u8 cc;
-		u8 status;
 		u64 req;
 		u64 offset;
-	} data = {cc, status, req, offset};
+		u8 cc;
+		u8 status;
+	} __packed data = {req, offset, cc, status};
 
 	zpci_err_hex(&data, sizeof(data));
 }
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 92ffe397b893..a05218ff3fe4 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -13,3 +13,4 @@ generic-y += sections.h
 generic-y += trace_clock.h
 generic-y += xor.h
 generic-y += serial.h
+generic-y += word-at-a-time.h
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 05b9f74ce2d5..c399e1c55685 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -14,8 +14,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)		((v)->counter = (i))
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index fe20d14ae051..ceb5201a30ed 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -59,6 +59,7 @@ pages_do_alias(unsigned long addr1, unsigned long addr2)
 
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, void *from);
+#define copy_user_page(to, from, vaddr, pg)  __copy_user(to, from, PAGE_SIZE)
 
 struct page;
 struct vm_area_struct;
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 2e48eb8813ff..c90930de76ba 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -433,6 +433,7 @@ static struct crypto_alg algs[] = { {
 		.blkcipher = {
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= aes_set_key,
 			.encrypt	= cbc_encrypt,
 			.decrypt	= cbc_decrypt,
@@ -452,6 +453,7 @@ static struct crypto_alg algs[] = { {
 		.blkcipher = {
 			.min_keysize	= AES_MIN_KEY_SIZE,
 			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= aes_set_key,
 			.encrypt	= ctr_crypt,
 			.decrypt	= ctr_crypt,
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 6bf2479a12fb..561a84d93cf6 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -274,6 +274,7 @@ static struct crypto_alg algs[] = { {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
 			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
 			.setkey		= camellia_set_key,
 			.encrypt	= cbc_encrypt,
 			.decrypt	= cbc_decrypt,
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index dd6a34fa6e19..61af794aa2d3 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -429,6 +429,7 @@ static struct crypto_alg algs[] = { {
 		.blkcipher = {
 			.min_keysize	= DES_KEY_SIZE,
 			.max_keysize	= DES_KEY_SIZE,
+			.ivsize		= DES_BLOCK_SIZE,
 			.setkey		= des_set_key,
 			.encrypt	= cbc_encrypt,
 			.decrypt	= cbc_decrypt,
@@ -485,6 +486,7 @@ static struct crypto_alg algs[] = { {
 		.blkcipher = {
 			.min_keysize	= DES3_EDE_KEY_SIZE,
 			.max_keysize	= DES3_EDE_KEY_SIZE,
+			.ivsize		= DES3_EDE_BLOCK_SIZE,
 			.setkey		= des3_ede_set_key,
 			.encrypt	= cbc3_encrypt,
 			.decrypt	= cbc3_decrypt,
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 917084ace49d..f2fbf9e16faf 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -14,11 +14,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v, i)	(((v)->counter) = i)
-#define atomic64_set(v, i)	(((v)->counter) = i)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)							\
 void atomic_##op(int, atomic_t *);					\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 01d17046225a..bec481aaca16 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -31,6 +31,9 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	 cpu_all_mask : \
 	 cpumask_of_node(pcibus_to_node(bus)))
 
+int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+
 #else /* CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
diff --git a/arch/sparc/include/uapi/asm/asi.h b/arch/sparc/include/uapi/asm/asi.h
index aace6f313716..7ad7203deaec 100644
--- a/arch/sparc/include/uapi/asm/asi.h
+++ b/arch/sparc/include/uapi/asm/asi.h
@@ -279,7 +279,7 @@
 				      * Most-Recently-Used, primary,
 				      * implicit
 				      */
-#define ASI_ST_BLKINIT_MRU_S	0xf2 /* (NG4) init-store, twin load,
+#define ASI_ST_BLKINIT_MRU_S	0xf3 /* (NG4) init-store, twin load,
 				      * Most-Recently-Used, secondary,
 				      * implicit
 				      */
diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h
index 0b14df33cffa..9765896ecb2c 100644
--- a/arch/sparc/include/uapi/asm/mman.h
+++ b/arch/sparc/include/uapi/asm/mman.h
@@ -17,6 +17,7 @@
 
 #define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
 #define MCL_FUTURE      0x4000          /* lock all additions to address space */
+#define MCL_ONFAULT	0x8000		/* lock all pages that are faulted in */
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 5320689c06e9..37686828c3d9 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -161,7 +161,7 @@ static inline iopte_t *alloc_npages(struct device *dev,
 
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
@@ -253,7 +253,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
 	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
 	iommu = dev->archdata.iommu;
 
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 
 	order = get_order(size);
 	if (order < 10)
@@ -426,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	iommu_free_ctx(iommu, ctx);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -492,7 +492,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -571,7 +571,7 @@ iommu_map_failed:
 				iopte_make_dummy(iommu, base + j);
 
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -648,7 +648,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 			iopte_make_dummy(iommu, base + i);
 
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 1ae5eb1bb045..59d503866431 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1953,7 +1953,7 @@ static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
 
 	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
 				      npages, NULL, (unsigned long)-1, 0);
-	if (unlikely(entry < 0))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		return NULL;
 
 	return iommu->page_table + entry;
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index d2fe57dad433..836e8cef47e2 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -159,7 +159,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto range_alloc_fail;
 
 	*dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -187,7 +187,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
 	return ret;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
 
 range_alloc_fail:
 	free_pages(first_page, order);
@@ -226,7 +226,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 	devhandle = pbm->devhandle;
 	entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
 	order = get_order(size);
 	if (order < 10)
 		free_pages((unsigned long)cpu, order);
@@ -256,7 +256,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
 	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
 				      (unsigned long)(-1), 0);
 
-	if (unlikely(entry == DMA_ERROR_CODE))
+	if (unlikely(entry == IOMMU_ERROR_CODE))
 		goto bad;
 
 	bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
@@ -288,7 +288,7 @@ bad:
 	return DMA_ERROR_CODE;
 
 iommu_map_fail:
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 	return DMA_ERROR_CODE;
 }
 
@@ -317,7 +317,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 	bus_addr &= IO_PAGE_MASK;
 	entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
 	dma_4v_iommu_demap(&devhandle, entry, npages);
-	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE);
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -376,7 +376,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 					      &handle, (unsigned long)(-1), 0);
 
 		/* Handle failure */
-		if (unlikely(entry == DMA_ERROR_CODE)) {
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
 			if (printk_ratelimit())
 				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
 				       " npages %lx\n", iommu, paddr, npages);
@@ -451,7 +451,7 @@ iommu_map_failed:
 			npages = iommu_num_pages(s->dma_address, s->dma_length,
 						 IO_PAGE_SIZE);
 			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
-					     DMA_ERROR_CODE);
+					     IOMMU_ERROR_CODE);
 			/* XXX demap? XXX */
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -496,7 +496,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 		entry = ((dma_handle - tbl->table_map_base) >> shift);
 		dma_4v_iommu_demap(&devhandle, entry, npages);
 		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
-				     DMA_ERROR_CODE);
+				     IOMMU_ERROR_CODE);
 		sg = sg_next(sg);
 	}
 
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 689db65f8529..b0da5aedb336 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -108,7 +108,7 @@ struct cpu_hw_events {
 	/* Enabled/disable state.  */
 	int			enabled;
 
-	unsigned int		group_flag;
+	unsigned int		txn_flags;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
 
@@ -1379,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time(->commit_txn) as a whole
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto nocheck;
 
 	if (check_excludes(cpuc->event, n0, 1))
@@ -1494,12 +1494,17 @@ static int sparc_pmu_event_init(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void sparc_pmu_start_txn(struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
-	cpuhw->group_flag |= PERF_EVENT_TXN;
 }
 
 /*
@@ -1510,8 +1515,15 @@ static void sparc_pmu_start_txn(struct pmu *pmu)
 static void sparc_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
 
-	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
 }
 
@@ -1528,14 +1540,20 @@ static int sparc_pmu_commit_txn(struct pmu *pmu)
 	if (!sparc_pmu)
 		return -EINVAL;
 
-	cpuc = this_cpu_ptr(&cpu_hw_events);
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuc->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuc->n_events;
 	if (check_excludes(cpuc->event, 0, n))
 		return -EINVAL;
 	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
 		return -EAGAIN;
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index 62098a89bbbf..d89e97b374cf 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -436,24 +436,26 @@ extern void sun4v_data_access_exception(struct pt_regs *regs,
 int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 {
 	unsigned long addr = compute_effective_address(regs, insn, 0);
-	int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+	int freg;
 	struct fpustate *f = FPUSTATE;
 	int asi = decode_asi(insn, regs);
-	int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+	int flag;
 
 	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
 
 	save_and_clear_fpu();
 	current_thread_info()->xfsr[0] &= ~0x1c000;
-	if (freg & 3) {
-		current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
-		do_fpother(regs);
-		return 0;
-	}
 	if (insn & 0x200000) {
 		/* STQ */
 		u64 first = 0, second = 0;
 		
+		freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+		if (freg & 3) {
+			current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+			do_fpother(regs);
+			return 0;
+		}
 		if (current_thread_info()->fpsaved[0] & flag) {
 			first = *(u64 *)&f->regs[freg];
 			second = *(u64 *)&f->regs[freg+2];
@@ -513,6 +515,12 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
 		case 0x100000: size = 4; break;
 		default: size = 2; break;
 		}
+		if (size == 1)
+			freg = (insn >> 25) & 0x1f;
+		else
+			freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+
 		for (i = 0; i < size; i++)
 			data[i] = 0;
 		
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
index a063d84336d6..62c2647bd5ce 100644
--- a/arch/sparc/lib/VISsave.S
+++ b/arch/sparc/lib/VISsave.S
@@ -6,24 +6,23 @@
  * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/linkage.h>
+
 #include <asm/asi.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 
-	.text
-	.globl		VISenter, VISenterhalf
-
 	/* On entry: %o5=current FPRS value, %g7 is callers address */
 	/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
 
 	/* Nothing special need be done here to handle pre-emption, this
 	 * FPU save/restore mechanism is already preemption safe.
 	 */
-
+	.text
 	.align		32
-VISenter:
+ENTRY(VISenter)
 	ldub		[%g6 + TI_FPDEPTH], %g1
 	brnz,a,pn	%g1, 1f
 	 cmp		%g1, 1
@@ -79,3 +78,4 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	.align		32
 80:	jmpl		%g7 + %g0, %g0
 	 nop
+ENDPROC(VISenter)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4ac88b757514..3025bd57f7ab 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -93,6 +93,8 @@ static unsigned long cpu_pgsz_mask;
 static struct linux_prom64_registers pavail[MAX_BANKS];
 static int pavail_ents;
 
+u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];
+
 static int cmp_p64(const void *a, const void *b)
 {
 	const struct linux_prom64_registers *x = a, *y = b;
@@ -1157,6 +1159,48 @@ static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
 	return NULL;
 }
 
+int __node_distance(int from, int to)
+{
+	if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) {
+		pr_warn("Returning default NUMA distance value for %d->%d\n",
+			from, to);
+		return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+	return numa_latency[from][to];
+}
+
+static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+{
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct node_mem_mask *n = &node_masks[i];
+
+		if ((grp->mask == n->mask) && (grp->match == n->val))
+			break;
+	}
+	return i;
+}
+
+static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
+					  int index)
+{
+	u64 arc;
+
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
+		int tnode;
+		u64 target = mdesc_arc_target(md, arc);
+		struct mdesc_mlgroup *m = find_mlgroup(target);
+
+		if (!m)
+			continue;
+		tnode = find_best_numa_node_for_mlgroup(m);
+		if (tnode == MAX_NUMNODES)
+			continue;
+		numa_latency[index][tnode] = m->latency;
+	}
+}
+
 static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
 				      int index)
 {
@@ -1220,9 +1264,16 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
 static int __init numa_parse_mdesc(void)
 {
 	struct mdesc_handle *md = mdesc_grab();
-	int i, err, count;
+	int i, j, err, count;
 	u64 node;
 
+	/* Some sane defaults for numa latency values */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		for (j = 0; j < MAX_NUMNODES; j++)
+			numa_latency[i][j] = (i == j) ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+
 	node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
 	if (node == MDESC_NODE_NULL) {
 		mdesc_release(md);
@@ -1245,6 +1296,23 @@ static int __init numa_parse_mdesc(void)
 		count++;
 	}
 
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "group") {
+		find_numa_latencies_for_group(md, node, count);
+		count++;
+	}
+
+	/* Normalize numa latency matrix according to ACPI SLIT spec. */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		u64 self_latency = numa_latency[i][i];
+
+		for (j = 0; j < MAX_NUMNODES; j++) {
+			numa_latency[i][j] =
+				(numa_latency[i][j] * LOCAL_DISTANCE) /
+				self_latency;
+		}
+	}
+
 	add_node_ranges();
 
 	for (i = 0; i < num_node_masks; i++) {
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f8b9f71b9a2b..22564f5f2364 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 	if (image) {
 		bpf_flush_icache(image, image + proglen);
 		fp->bpf_func = (void *)image;
-		fp->jited = true;
+		fp->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index ee186e13dfe6..f102048d9c0e 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/string.h>
 
 #include <gxio/iorpc_globals.h>
 #include <gxio/iorpc_mpipe.h>
@@ -29,32 +30,6 @@
 /* HACK: Avoid pointless "shadow" warnings. */
 #define link link_shadow
 
-/**
- * strscpy - Copy a C-string into a sized buffer, but only if it fits
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @size: size of destination buffer
- *
- * Use this routine to avoid copying too-long strings.
- * The routine returns the total number of bytes copied
- * (including the trailing NUL) or zero if the buffer wasn't
- * big enough.  To ensure that programmers pay attention
- * to the return code, the destination has a single NUL
- * written at the front (if size is non-zero) when the
- * buffer is not big enough.
- */
-static size_t strscpy(char *dest, const char *src, size_t size)
-{
-	size_t len = strnlen(src, size) + 1;
-	if (len > size) {
-		if (size)
-			dest[0] = '\0';
-		return 0;
-	}
-	memcpy(dest, src, len);
-	return len;
-}
-
 int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index)
 {
 	char file[32];
@@ -540,7 +515,7 @@ int gxio_mpipe_link_instance(const char *link_name)
 	if (!context)
 		return GXIO_ERR_NO_DEVICE;
 
-	if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+	if (strscpy(name.name, link_name, sizeof(name.name)) < 0)
 		return GXIO_ERR_NO_DEVICE;
 
 	return gxio_mpipe_info_instance_aux(context, name);
@@ -559,7 +534,7 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac)
 
 	rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac);
 	if (rv >= 0) {
-		if (strscpy(link_name, name.name, sizeof(name.name)) == 0)
+		if (strscpy(link_name, name.name, sizeof(name.name)) < 0)
 			return GXIO_ERR_INVAL_MEMORY_SIZE;
 		memcpy(link_mac, mac.mac, sizeof(mac.mac));
 	}
@@ -576,7 +551,7 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link,
 	_gxio_mpipe_link_name_t name;
 	int rv;
 
-	if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+	if (strscpy(name.name, link_name, sizeof(name.name)) < 0)
 		return GXIO_ERR_NO_DEVICE;
 
 	rv = gxio_mpipe_link_open_aux(context, name, flags);
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 709798460763..9fc0107a9c5e 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -34,7 +34,7 @@
  */
 static inline int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE(v->counter);
+	return READ_ONCE(v->counter);
 }
 
 /**
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 096a56d6ead4..51cabc26e387 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -24,7 +24,7 @@
 
 /* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
 
-#define atomic_set(v, i) ((v)->counter = (i))
+#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
 
 /*
  * The smp_mb() operations throughout are to support the fact that
@@ -82,8 +82,8 @@ static inline void atomic_xor(int i, atomic_t *v)
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic64_read(v)		((v)->counter)
-#define atomic64_set(v, i) ((v)->counter = (i))
+#define atomic64_read(v)	READ_ONCE((v)->counter)
+#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 static inline void atomic64_add(long i, atomic64_t *v)
 {
diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h
index 9e5ce0d7b292..b66a693c2c34 100644
--- a/arch/tile/include/asm/word-at-a-time.h
+++ b/arch/tile/include/asm/word-at-a-time.h
@@ -6,7 +6,7 @@
 struct word_at_a_time { /* unused */ };
 #define WORD_AT_A_TIME_CONSTANTS {}
 
-/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+/* Generate 0x01 byte values for zero bytes using a SIMD instruction. */
 static inline unsigned long has_zero(unsigned long val, unsigned long *data,
 				     const struct word_at_a_time *c)
 {
@@ -33,4 +33,10 @@ static inline long find_zero(unsigned long mask)
 #endif
 }
 
+#ifdef __BIG_ENDIAN
+#define zero_bytemask(mask) (~1ul << (63 - __builtin_clzl(mask)))
+#else
+#define zero_bytemask(mask) ((2ul << __builtin_ctzl(mask)) - 1)
+#endif
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/tile/include/uapi/asm/mman.h b/arch/tile/include/uapi/asm/mman.h
index 81b8fc348d63..63ee13faf17d 100644
--- a/arch/tile/include/uapi/asm/mman.h
+++ b/arch/tile/include/uapi/asm/mman.h
@@ -36,6 +36,7 @@
  */
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
 
 
 #endif /* _ASM_TILE_MMAN_H */
diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c
index f0da5a237e94..9f1e05e12255 100644
--- a/arch/tile/kernel/usb.c
+++ b/arch/tile/kernel/usb.c
@@ -22,6 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/usb/tilegx.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <linux/types.h>
 
 static u64 ehci_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 098ab3333e7c..e3abe6f3156d 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -70,8 +70,8 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE)
 
 USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \
 		$(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \
-		-D_FILE_OFFSET_BITS=64 -idirafter include \
-		-D__KERNEL__ -D__UM_HOST__
+		-D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \
+		-idirafter $(obj)/include -D__KERNEL__ -D__UM_HOST__
 
 #This will adjust *FLAGS accordingly to the platform.
 include $(ARCH_DIR)/Makefile-os-$(OS)
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 149ec55f9c46..904f3ebf4220 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -25,4 +25,5 @@ generic-y += preempt.h
 generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index d8a9fce6ee2e..98783dd0fa2e 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -220,7 +220,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		show_regs(container_of(regs, struct pt_regs, regs));
 		panic("Segfault with no mm");
 	}
-	else if (!is_user && address < TASK_SIZE) {
+	else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) {
 		show_regs(container_of(regs, struct pt_regs, regs));
 		panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx",
 		       address, ip);
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index e3ee4a51ef63..3f02d4232812 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -96,7 +96,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 			       "ret = %d\n", -n);
 			ret = n;
 		}
-		CATCH_EINTR(waitpid(pid, NULL, __WCLONE));
+		CATCH_EINTR(waitpid(pid, NULL, __WALL));
 	}
 
 out_free2:
@@ -129,7 +129,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
 		return err;
 	}
 	if (stack_out == NULL) {
-		CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE));
+		CATCH_EINTR(pid = waitpid(pid, &status, __WALL));
 		if (pid < 0) {
 			err = -errno;
 			printk(UM_KERN_ERR "run_helper_thread - wait failed, "
@@ -148,7 +148,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
 int helper_wait(int pid)
 {
 	int ret, status;
-	int wflags = __WCLONE;
+	int wflags = __WALL;
 
 	CATCH_EINTR(ret = waitpid(pid, &status, wflags));
 	if (ret < 0) {
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 928237a7b9ca..c9faddc61100 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -222,7 +222,7 @@ config I2C_BATTERY_BQ27200
 	tristate "I2C Battery BQ27200 Support"
 	select I2C_PUV3
 	select POWER_SUPPLY
-	select BATTERY_BQ27x00
+	select BATTERY_BQ27XXX
 
 config I2C_EEPROM_AT24
 	tristate "I2C EEPROMs AT24 support"
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1fc7a286dc6f..256c45b3ae34 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -62,4 +62,5 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 328c8352480c..db3622f22b61 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1123,8 +1123,10 @@ config X86_REBOOTFIXUPS
 	  Say N otherwise.
 
 config MICROCODE
-	tristate "CPU microcode loading support"
+	bool "CPU microcode loading support"
+	default y
 	depends on CPU_SUP_AMD || CPU_SUP_INTEL
+	depends on BLK_DEV_INITRD
 	select FW_LOADER
 	---help---
 
@@ -1166,24 +1168,6 @@ config MICROCODE_OLD_INTERFACE
 	def_bool y
 	depends on MICROCODE
 
-config MICROCODE_INTEL_EARLY
-	bool
-
-config MICROCODE_AMD_EARLY
-	bool
-
-config MICROCODE_EARLY
-	bool "Early load microcode"
-	depends on MICROCODE=y && BLK_DEV_INITRD
-	select MICROCODE_INTEL_EARLY if MICROCODE_INTEL
-	select MICROCODE_AMD_EARLY if MICROCODE_AMD
-	default y
-	help
-	  This option provides functionality to read additional microcode data
-	  at the beginning of initrd image. The data tells kernel to load
-	  microcode to CPU's as early as possible. No functional change if no
-	  microcode data is glued to the initrd, therefore it's safe to say Y.
-
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	---help---
@@ -1308,6 +1292,7 @@ config HIGHMEM
 config X86_PAE
 	bool "PAE (Physical Address Extension) Support"
 	depends on X86_32 && !HIGHMEM4G
+	select SWIOTLB
 	---help---
 	  PAE is required for NX support, and furthermore enables
 	  larger swapspace support for non-overcommit purposes. It
@@ -2042,6 +2027,55 @@ config COMPAT_VDSO
 	  If unsure, say N: if you are compiling your own kernel, you
 	  are unlikely to be using a buggy version of glibc.
 
+choice
+	prompt "vsyscall table for legacy applications"
+	depends on X86_64
+	default LEGACY_VSYSCALL_EMULATE
+	help
+	  Legacy user code that does not know how to find the vDSO expects
+	  to be able to issue three syscalls by calling fixed addresses in
+	  kernel space. Since this location is not randomized with ASLR,
+	  it can be used to assist security vulnerability exploitation.
+
+	  This setting can be changed at boot time via the kernel command
+	  line parameter vsyscall=[native|emulate|none].
+
+	  On a system with recent enough glibc (2.14 or newer) and no
+	  static binaries, you can say None without a performance penalty
+	  to improve security.
+
+	  If unsure, select "Emulate".
+
+	config LEGACY_VSYSCALL_NATIVE
+		bool "Native"
+		help
+		  Actual executable code is located in the fixed vsyscall
+		  address mapping, implementing time() efficiently. Since
+		  this makes the mapping executable, it can be used during
+		  security vulnerability exploitation (traditionally as
+		  ROP gadgets). This configuration is not recommended.
+
+	config LEGACY_VSYSCALL_EMULATE
+		bool "Emulate"
+		help
+		  The kernel traps and emulates calls into the fixed
+		  vsyscall address mapping. This makes the mapping
+		  non-executable, but it still contains known contents,
+		  which could be used in certain rare security vulnerability
+		  exploits. This configuration is recommended when userspace
+		  still uses the vsyscall area.
+
+	config LEGACY_VSYSCALL_NONE
+		bool "None"
+		help
+		  There will be no vsyscall mapping at all. This will
+		  eliminate any risk of ASLR bypass due to the vsyscall
+		  fixed address mapping. Attempts to use the vsyscalls
+		  will be reported to dmesg, so that either old or
+		  malicious userspace programs can be identified.
+
+endchoice
+
 config CMDLINE_BOOL
 	bool "Built-in kernel command line"
 	---help---
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 6983314c8b37..3ba5ff2f2d08 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -3,10 +3,6 @@ choice
 	prompt "Processor family"
 	default M686 if X86_32
 	default GENERIC_CPU if X86_64
-
-config M486
-	bool "486"
-	depends on X86_32
 	---help---
 	  This is the processor type of your CPU. This information is
 	  used for optimizing purposes. In order to compile a kernel
@@ -23,9 +19,9 @@ config M486
 
 	  Here are the settings recommended for greatest speed:
 	  - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
-	  SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+	    SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
 	  - "586" for generic Pentium CPUs lacking the TSC
-	  (time stamp counter) register.
+	    (time stamp counter) register.
 	  - "Pentium-Classic" for the Intel Pentium.
 	  - "Pentium-MMX" for the Intel Pentium MMX.
 	  - "Pentium-Pro" for the Intel Pentium Pro.
@@ -34,17 +30,31 @@ config M486
 	  - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
 	  - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
 	  - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
+	  - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs.
 	  - "Crusoe" for the Transmeta Crusoe series.
 	  - "Efficeon" for the Transmeta Efficeon series.
 	  - "Winchip-C6" for original IDT Winchip.
 	  - "Winchip-2" for IDT Winchips with 3dNow! capabilities.
+	  - "AMD Elan" for the 32-bit AMD Elan embedded CPU.
 	  - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
 	  - "Geode GX/LX" For AMD Geode GX and LX processors.
 	  - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
 	  - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
 	  - "VIA C7" for VIA C7.
+	  - "Intel P4" for the Pentium 4/Netburst microarchitecture.
+	  - "Core 2/newer Xeon" for all core2 and newer Intel CPUs.
+	  - "Intel Atom" for the Atom-microarchitecture CPUs.
+	  - "Generic-x86-64" for a kernel which runs on any x86-64 CPU.
+
+	  See each option's help text for additional details. If you don't know
+	  what to do, choose "486".
 
-	  If you don't know what to do, choose "486".
+config M486
+	bool "486"
+	depends on X86_32
+	---help---
+	  Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel
+	  486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
 
 config M586
 	bool "586/K5/5x86/6x86/6x86MX"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index d8c0d3266173..3e0baf726eef 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI
 	  This is useful for kernel debugging when your machine crashes very
 	  early before the console code is initialized.
 
+config X86_PTDUMP_CORE
+	def_bool n
+
 config X86_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
 	select DEBUG_FS
+	select X86_PTDUMP_CORE
 	---help---
 	  Say Y here if you want to show the kernel pagetable layout in a
 	  debugfs file. This information is only useful for kernel developers
@@ -79,7 +83,8 @@ config X86_PTDUMP
 
 config EFI_PGT_DUMP
 	bool "Dump the EFI pagetable"
-	depends on EFI && X86_PTDUMP
+	depends on EFI
+	select X86_PTDUMP_CORE
 	---help---
 	  Enable this if you want to dump the EFI page table before
 	  enabling virtual mode. This can be used to debug miscellaneous
@@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST
 	  feature as well as for the change_page_attr() infrastructure.
 	  If in doubt, say "N"
 
+config DEBUG_WX
+	bool "Warn on W+X mappings at boot"
+	depends on DEBUG_RODATA
+	default y
+	select X86_PTDUMP_CORE
+	---help---
+	  Generate a warning if any W+X mappings are found at boot.
+
+	  This is useful for discovering cases where the kernel is leaving
+	  W+X mappings after applying NX, as such mappings are a security risk.
+
+	  Look for a message in dmesg output like this:
+
+	    x86/mm: Checked W+X mappings: passed, no W+X pages found.
+
+	  or like this, if the check failed:
+
+	    x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+	  Note that even if the check fails, your kernel is possibly
+	  still fine, as W+X mappings are not a security hole in
+	  themselves, what they do is that they make the exploitation
+	  of other unfixed kernel bugs easier.
+
+	  There is no runtime or memory usage effect of this option
+	  once the kernel has booted up - it's a one time check.
+
+	  If in doubt, say "Y".
+
 config DEBUG_SET_MODULE_RONX
 	bool "Set loadable kernel module data as NX and text as RO"
 	depends on MODULES
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 747860c696e1..4086abca0b32 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -159,15 +159,23 @@ endif
 sp-$(CONFIG_X86_32) := esp
 sp-$(CONFIG_X86_64) := rsp
 
+# do binutils support CFI?
+cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1)
+# is .cfi_signal_frame supported too?
+cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
+cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
+
 # does binutils support specific instructions?
 asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
 asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
 asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
 avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
+sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
+sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
 
-KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
-KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 0d553e54171b..2ee62dba0373 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -9,13 +9,13 @@
 # Changed by many, many contributors over the years.
 #
 
+KASAN_SANITIZE := n
+
 # If you want to preset the SVGA mode, uncomment the next line and
 # set SVGA_MODE to whatever number you want.
 # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
 # The number is the same as you would ordinarily press at bootup.
 
-KASAN_SANITIZE := n
-
 SVGA_MODE	:= -DSVGA_MODE=NORMAL_VGA
 
 targets		:= vmlinux.bin setup.bin setup.elf bzImage
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ee1b6d346b98..583d539a4197 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -624,7 +624,7 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
 static efi_status_t
 __gop_query32(struct efi_graphics_output_protocol_32 *gop32,
 	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u32 *fb_base)
+	      unsigned long *size, u64 *fb_base)
 {
 	struct efi_graphics_output_protocol_mode_32 *mode;
 	efi_status_t status;
@@ -650,7 +650,8 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
 	unsigned long nr_gops;
 	u16 width, height;
 	u32 pixels_per_scan_line;
-	u32 fb_base;
+	u32 ext_lfb_base;
+	u64 fb_base;
 	struct efi_pixel_bitmask pixel_info;
 	int pixel_format;
 	efi_status_t status;
@@ -667,6 +668,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
 		bool conout_found = false;
 		void *dummy = NULL;
 		u32 h = handles[i];
+		u64 current_fb_base;
 
 		status = efi_call_early(handle_protocol, h,
 					proto, (void **)&gop32);
@@ -678,7 +680,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
 		if (status == EFI_SUCCESS)
 			conout_found = true;
 
-		status = __gop_query32(gop32, &info, &size, &fb_base);
+		status = __gop_query32(gop32, &info, &size, &current_fb_base);
 		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
 			/*
 			 * Systems that use the UEFI Console Splitter may
@@ -692,6 +694,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
 			pixel_format = info->pixel_format;
 			pixel_info = info->pixel_information;
 			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
 
 			/*
 			 * Once we've found a GOP supporting ConOut,
@@ -713,6 +716,13 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
 	si->lfb_width = width;
 	si->lfb_height = height;
 	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
 	si->pages = 1;
 
 	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
@@ -727,7 +737,7 @@ out:
 static efi_status_t
 __gop_query64(struct efi_graphics_output_protocol_64 *gop64,
 	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u32 *fb_base)
+	      unsigned long *size, u64 *fb_base)
 {
 	struct efi_graphics_output_protocol_mode_64 *mode;
 	efi_status_t status;
@@ -753,7 +763,8 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
 	unsigned long nr_gops;
 	u16 width, height;
 	u32 pixels_per_scan_line;
-	u32 fb_base;
+	u32 ext_lfb_base;
+	u64 fb_base;
 	struct efi_pixel_bitmask pixel_info;
 	int pixel_format;
 	efi_status_t status;
@@ -770,6 +781,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
 		bool conout_found = false;
 		void *dummy = NULL;
 		u64 h = handles[i];
+		u64 current_fb_base;
 
 		status = efi_call_early(handle_protocol, h,
 					proto, (void **)&gop64);
@@ -781,7 +793,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
 		if (status == EFI_SUCCESS)
 			conout_found = true;
 
-		status = __gop_query64(gop64, &info, &size, &fb_base);
+		status = __gop_query64(gop64, &info, &size, &current_fb_base);
 		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
 			/*
 			 * Systems that use the UEFI Console Splitter may
@@ -795,6 +807,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
 			pixel_format = info->pixel_format;
 			pixel_info = info->pixel_information;
 			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
 
 			/*
 			 * Once we've found a GOP supporting ConOut,
@@ -816,6 +829,13 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
 	si->lfb_width = width;
 	si->lfb_height = height;
 	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
 	si->pages = 1;
 
 	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2d6b309c8e9a..6236b9ec4b76 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -154,7 +154,7 @@ extra_header_fields:
 #else
 	.quad	0				# ImageBase
 #endif
-	.long	CONFIG_PHYSICAL_ALIGN		# SectionAlignment
+	.long	0x20				# SectionAlignment
 	.long	0x20				# FileAlignment
 	.word	0				# MajorOperatingSystemVersion
 	.word	0				# MinorOperatingSystemVersion
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 9a2838cf0591..b9b912a44d61 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -5,6 +5,8 @@
 avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
 				$(comma)4)$(comma)%ymm2,yes,no)
+sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
+sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
 
 obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
@@ -91,9 +93,15 @@ ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
 poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
+ifeq ($(sha1_ni_supported),yes)
+sha1-ssse3-y += sha1_ni_asm.o
+endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
+ifeq ($(sha256_ni_supported),yes)
+sha256-ssse3-y += sha256_ni_asm.o
+endif
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
 crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 4c65c70e628b..d84456924563 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 	}
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 80a0e4389c9a..93d8f295784e 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,7 +554,13 @@ static int __init camellia_aesni_init(void)
 {
 	const char *feature_name;
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+		pr_info("AVX or AES-NI instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index be00aa48b2b5..8648158f3916 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -469,7 +469,8 @@ static int __init cast5_init(void)
 {
 	const char *feature_name;
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 5dbba7224221..fca459578c35 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -591,7 +591,8 @@ static int __init cast6_init(void)
 {
 	const char *feature_name;
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index effe2160b7c5..722bacea040e 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void)
 
 #ifdef CONFIG_AS_AVX2
 	chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
-			    cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 #endif
 	return crypto_register_alg(&alg);
 }
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 225be06edc80..4fe27e074194 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -330,7 +330,7 @@ ENDPROC(crc_pcl)
 	## PCLMULQDQ tables
 	## Table is 128 entries x 2 words (8 bytes) each
 	################################################################
-.section	.rotata, "a", %progbits
+.section	.rodata, "a", %progbits
 .align 8
 K_table:
 	.long 0x493c7d27, 0x00000001
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index f7170d764f32..4264a3d59589 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void)
 
 #ifdef CONFIG_AS_AVX2
 	poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
-			    cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
 	if (poly1305_use_avx2)
 		alg.descsize += 10 * sizeof(u32);
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 7d838dc4d888..6d198342e2de 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -542,7 +542,8 @@ static int __init init(void)
 		pr_info("AVX2 instructions are not detected.\n");
 		return -ENODEV;
 	}
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index da7dafc9b16d..5dc37026c7ce 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -597,7 +597,8 @@ static int __init serpent_init(void)
 {
 	const char *feature_name;
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
new file mode 100644
index 000000000000..874a651b9e7d
--- /dev/null
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -0,0 +1,302 @@
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-1 update function
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * 	Sean Gulley <sean.m.gulley@intel.com>
+ * 	Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 	* Redistributions of source code must retain the above copyright
+ * 	  notice, this list of conditions and the following disclaimer.
+ * 	* Redistributions in binary form must reproduce the above copyright
+ * 	  notice, this list of conditions and the following disclaimer in
+ * 	  the documentation and/or other materials provided with the
+ * 	  distribution.
+ * 	* Neither the name of Intel Corporation nor the names of its
+ * 	  contributors may be used to endorse or promote products derived
+ * 	  from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/linkage.h>
+
+#define DIGEST_PTR	%rdi	/* 1st arg */
+#define DATA_PTR	%rsi	/* 2nd arg */
+#define NUM_BLKS	%rdx	/* 3rd arg */
+
+#define RSPSAVE		%rax
+
+/* gcc conversion */
+#define FRAME_SIZE	32	/* space for 2x16 bytes */
+
+#define ABCD		%xmm0
+#define E0		%xmm1	/* Need two E's b/c they ping pong */
+#define E1		%xmm2
+#define MSG0		%xmm3
+#define MSG1		%xmm4
+#define MSG2		%xmm5
+#define MSG3		%xmm6
+#define SHUF_MASK	%xmm7
+
+
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-1 update function
+ *
+ * The function takes a pointer to the current hash values, a pointer to the
+ * input data, and a number of 64 byte blocks to process.  Once all blocks have
+ * been processed, the digest pointer is  updated with the resulting hash value.
+ * The function only processes complete blocks, there is no functionality to
+ * store partial blocks. All message padding and hash value initialization must
+ * be done outside the update function.
+ *
+ * The indented lines in the loop are instructions related to rounds processing.
+ * The non-indented lines are instructions related to the message schedule.
+ *
+ * void sha1_ni_transform(uint32_t *digest, const void *data,
+		uint32_t numBlocks)
+ * digest : pointer to digest
+ * data: pointer to input data
+ * numBlocks: Number of blocks to process
+ */
+.text
+.align 32
+ENTRY(sha1_ni_transform)
+	mov		%rsp, RSPSAVE
+	sub		$FRAME_SIZE, %rsp
+	and		$~0xF, %rsp
+
+	shl		$6, NUM_BLKS		/* convert to bytes */
+	jz		.Ldone_hash
+	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
+
+	/* load initial hash values */
+	pinsrd		$3, 1*16(DIGEST_PTR), E0
+	movdqu		0*16(DIGEST_PTR), ABCD
+	pand		UPPER_WORD_MASK(%rip), E0
+	pshufd		$0x1B, ABCD, ABCD
+
+	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
+
+.Lloop0:
+	/* Save hash values for addition after rounds */
+	movdqa		E0, (0*16)(%rsp)
+	movdqa		ABCD, (1*16)(%rsp)
+
+	/* Rounds 0-3 */
+	movdqu		0*16(DATA_PTR), MSG0
+	pshufb		SHUF_MASK, MSG0
+		paddd		MSG0, E0
+		movdqa		ABCD, E1
+		sha1rnds4	$0, E0, ABCD
+
+	/* Rounds 4-7 */
+	movdqu		1*16(DATA_PTR), MSG1
+	pshufb		SHUF_MASK, MSG1
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+		sha1rnds4	$0, E1, ABCD
+	sha1msg1	MSG1, MSG0
+
+	/* Rounds 8-11 */
+	movdqu		2*16(DATA_PTR), MSG2
+	pshufb		SHUF_MASK, MSG2
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+		sha1rnds4	$0, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 12-15 */
+	movdqu		3*16(DATA_PTR), MSG3
+	pshufb		SHUF_MASK, MSG3
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$0, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 16-19 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$0, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 20-23 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 24-27 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$1, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 28-31 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 32-35 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$1, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 36-39 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$1, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 40-43 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 44-47 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$2, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 48-51 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 52-55 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$2, E1, ABCD
+	sha1msg1	MSG1, MSG0
+	pxor		MSG1, MSG3
+
+	/* Rounds 56-59 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$2, E0, ABCD
+	sha1msg1	MSG2, MSG1
+	pxor		MSG2, MSG0
+
+	/* Rounds 60-63 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG3, MSG0
+		sha1rnds4	$3, E1, ABCD
+	sha1msg1	MSG3, MSG2
+	pxor		MSG3, MSG1
+
+	/* Rounds 64-67 */
+		sha1nexte	MSG0, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG0, MSG1
+		sha1rnds4	$3, E0, ABCD
+	sha1msg1	MSG0, MSG3
+	pxor		MSG0, MSG2
+
+	/* Rounds 68-71 */
+		sha1nexte	MSG1, E1
+		movdqa		ABCD, E0
+	sha1msg2	MSG1, MSG2
+		sha1rnds4	$3, E1, ABCD
+	pxor		MSG1, MSG3
+
+	/* Rounds 72-75 */
+		sha1nexte	MSG2, E0
+		movdqa		ABCD, E1
+	sha1msg2	MSG2, MSG3
+		sha1rnds4	$3, E0, ABCD
+
+	/* Rounds 76-79 */
+		sha1nexte	MSG3, E1
+		movdqa		ABCD, E0
+		sha1rnds4	$3, E1, ABCD
+
+	/* Add current hash values with previously saved */
+	sha1nexte	(0*16)(%rsp), E0
+	paddd		(1*16)(%rsp), ABCD
+
+	/* Increment data pointer and loop if more to process */
+	add		$64, DATA_PTR
+	cmp		NUM_BLKS, DATA_PTR
+	jne		.Lloop0
+
+	/* Write hash values back in the correct order */
+	pshufd		$0x1B, ABCD, ABCD
+	movdqu		ABCD, 0*16(DIGEST_PTR)
+	pextrd		$3, E0, 1*16(DIGEST_PTR)
+
+.Ldone_hash:
+	mov		RSPSAVE, %rsp
+
+	ret
+ENDPROC(sha1_ni_transform)
+
+.data
+
+.align 64
+PSHUFFLE_BYTE_FLIP_MASK:
+	.octa 0x000102030405060708090a0b0c0d0e0f
+UPPER_WORD_MASK:
+	.octa 0xFFFFFFFF000000000000000000000000
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 7c48e8b20848..dd14616b7739 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -31,24 +31,11 @@
 #include <crypto/sha1_base.h>
 #include <asm/fpu/api.h>
 
+typedef void (sha1_transform_fn)(u32 *digest, const char *data,
+				unsigned int rounds);
 
-asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
-				     unsigned int rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
-				   unsigned int rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
-
-asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
-				    unsigned int rounds);
-#endif
-
-static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
-
-static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, sha1_transform_fn *sha1_xform)
 {
 	struct sha1_state *sctx = shash_desc_ctx(desc);
 
@@ -61,14 +48,14 @@ static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha1_base_do_update(desc, data, len,
-			    (sha1_block_fn *)sha1_transform_asm);
+			    (sha1_block_fn *)sha1_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out, sha1_transform_fn *sha1_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha1_finup(desc, data, len, out);
@@ -76,32 +63,37 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha1_base_do_update(desc, data, len,
-				    (sha1_block_fn *)sha1_transform_asm);
-	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
+				    (sha1_block_fn *)sha1_xform);
+	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform);
 	kernel_fpu_end();
 
 	return sha1_base_finish(desc, out);
 }
 
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
+				     unsigned int rounds);
+
+static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
 {
-	return sha1_ssse3_finup(desc, NULL, 0, out);
+	return sha1_update(desc, data, len,
+			(sha1_transform_fn *) sha1_transform_ssse3);
 }
 
-#ifdef CONFIG_AS_AVX2
-static void sha1_apply_transform_avx2(u32 *digest, const char *data,
-				unsigned int rounds)
+static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
 {
-	/* Select the optimal transform based on data block size */
-	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
-		sha1_transform_avx2(digest, data, rounds);
-	else
-		sha1_transform_avx(digest, data, rounds);
+	return sha1_finup(desc, data, len, out,
+			(sha1_transform_fn *) sha1_transform_ssse3);
+}
+
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_ssse3_finup(desc, NULL, 0, out);
 }
-#endif
 
-static struct shash_alg alg = {
+static struct shash_alg sha1_ssse3_alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_base_init,
 	.update		=	sha1_ssse3_update,
@@ -110,7 +102,7 @@ static struct shash_alg alg = {
 	.descsize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
-		.cra_driver_name=	"sha1-ssse3",
+		.cra_driver_name =	"sha1-ssse3",
 		.cra_priority	=	150,
 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
 		.cra_blocksize	=	SHA1_BLOCK_SIZE,
@@ -118,10 +110,62 @@ static struct shash_alg alg = {
 	}
 };
 
+static int register_sha1_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shash(&sha1_ssse3_alg);
+	return 0;
+}
+
+static void unregister_sha1_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shash(&sha1_ssse3_alg);
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
+				   unsigned int rounds);
+
+static int sha1_avx_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
 {
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+	return sha1_update(desc, data, len,
+			(sha1_transform_fn *) sha1_transform_avx);
+}
+
+static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+			(sha1_transform_fn *) sha1_transform_avx);
+}
+
+static int sha1_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_avx_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_avx_update,
+	.final		=	sha1_avx_final,
+	.finup		=	sha1_avx_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool avx_usable(void)
+{
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
 			pr_info("AVX detected but unusable.\n");
 		return false;
@@ -130,55 +174,197 @@ static bool __init avx_usable(void)
 	return true;
 }
 
-#ifdef CONFIG_AS_AVX2
-static bool __init avx2_usable(void)
+static int register_sha1_avx(void)
+{
+	if (avx_usable())
+		return crypto_register_shash(&sha1_avx_alg);
+	return 0;
+}
+
+static void unregister_sha1_avx(void)
 {
-	if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) &&
-	    boot_cpu_has(X86_FEATURE_BMI2))
+	if (avx_usable())
+		crypto_unregister_shash(&sha1_avx_alg);
+}
+
+#else  /* CONFIG_AS_AVX */
+static inline int register_sha1_avx(void) { return 0; }
+static inline void unregister_sha1_avx(void) { }
+#endif /* CONFIG_AS_AVX */
+
+
+#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX)
+#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+				    unsigned int rounds);
+
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
+		&& boot_cpu_has(X86_FEATURE_BMI1)
+		&& boot_cpu_has(X86_FEATURE_BMI2))
 		return true;
 
 	return false;
 }
+
+static void sha1_apply_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds)
+{
+	/* Select the optimal transform based on data block size */
+	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+		sha1_transform_avx2(digest, data, rounds);
+	else
+		sha1_transform_avx(digest, data, rounds);
+}
+
+static int sha1_avx2_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	return sha1_update(desc, data, len,
+		(sha1_transform_fn *) sha1_apply_transform_avx2);
+}
+
+static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+		(sha1_transform_fn *) sha1_apply_transform_avx2);
+}
+
+static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_avx2_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_avx2_update,
+	.final		=	sha1_avx2_final,
+	.finup		=	sha1_avx2_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int register_sha1_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shash(&sha1_avx2_alg);
+	return 0;
+}
+
+static void unregister_sha1_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shash(&sha1_avx2_alg);
+}
+
+#else
+static inline int register_sha1_avx2(void) { return 0; }
+static inline void unregister_sha1_avx2(void) { }
 #endif
+
+#ifdef CONFIG_AS_SHA1_NI
+asmlinkage void sha1_ni_transform(u32 *digest, const char *data,
+				   unsigned int rounds);
+
+static int sha1_ni_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	return sha1_update(desc, data, len,
+		(sha1_transform_fn *) sha1_ni_transform);
+}
+
+static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
+			      unsigned int len, u8 *out)
+{
+	return sha1_finup(desc, data, len, out,
+		(sha1_transform_fn *) sha1_ni_transform);
+}
+
+static int sha1_ni_final(struct shash_desc *desc, u8 *out)
+{
+	return sha1_ni_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha1_ni_alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_base_init,
+	.update		=	sha1_ni_update,
+	.final		=	sha1_ni_final,
+	.finup		=	sha1_ni_finup,
+	.descsize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name =	"sha1-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int register_sha1_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		return crypto_register_shash(&sha1_ni_alg);
+	return 0;
+}
+
+static void unregister_sha1_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		crypto_unregister_shash(&sha1_ni_alg);
+}
+
+#else
+static inline int register_sha1_ni(void) { return 0; }
+static inline void unregister_sha1_ni(void) { }
 #endif
 
 static int __init sha1_ssse3_mod_init(void)
 {
-	char *algo_name;
+	if (register_sha1_ssse3())
+		goto fail;
 
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3) {
-		sha1_transform_asm = sha1_transform_ssse3;
-		algo_name = "SSSE3";
+	if (register_sha1_avx()) {
+		unregister_sha1_ssse3();
+		goto fail;
 	}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-		sha1_transform_asm = sha1_transform_avx;
-		algo_name = "AVX";
-#ifdef CONFIG_AS_AVX2
-		/* allow AVX2 to override AVX, it's a little faster */
-		if (avx2_usable()) {
-			sha1_transform_asm = sha1_apply_transform_avx2;
-			algo_name = "AVX2";
-		}
-#endif
+	if (register_sha1_avx2()) {
+		unregister_sha1_avx();
+		unregister_sha1_ssse3();
+		goto fail;
 	}
-#endif
 
-	if (sha1_transform_asm) {
-		pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
-		return crypto_register_shash(&alg);
+	if (register_sha1_ni()) {
+		unregister_sha1_avx2();
+		unregister_sha1_avx();
+		unregister_sha1_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
 
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha1_ssse3_mod_fini(void)
 {
-	crypto_unregister_shash(&alg);
+	unregister_sha1_ni();
+	unregister_sha1_avx2();
+	unregister_sha1_avx();
+	unregister_sha1_ssse3();
 }
 
 module_init(sha1_ssse3_mod_init);
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
new file mode 100644
index 000000000000..748cdf21a938
--- /dev/null
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -0,0 +1,353 @@
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * 	Sean Gulley <sean.m.gulley@intel.com>
+ * 	Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 	* Redistributions of source code must retain the above copyright
+ * 	  notice, this list of conditions and the following disclaimer.
+ * 	* Redistributions in binary form must reproduce the above copyright
+ * 	  notice, this list of conditions and the following disclaimer in
+ * 	  the documentation and/or other materials provided with the
+ * 	  distribution.
+ * 	* Neither the name of Intel Corporation nor the names of its
+ * 	  contributors may be used to endorse or promote products derived
+ * 	  from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/linkage.h>
+
+#define DIGEST_PTR	%rdi	/* 1st arg */
+#define DATA_PTR	%rsi	/* 2nd arg */
+#define NUM_BLKS	%rdx	/* 3rd arg */
+
+#define SHA256CONSTANTS	%rax
+
+#define MSG		%xmm0
+#define STATE0		%xmm1
+#define STATE1		%xmm2
+#define MSGTMP0		%xmm3
+#define MSGTMP1		%xmm4
+#define MSGTMP2		%xmm5
+#define MSGTMP3		%xmm6
+#define MSGTMP4		%xmm7
+
+#define SHUF_MASK	%xmm8
+
+#define ABEF_SAVE	%xmm9
+#define CDGH_SAVE	%xmm10
+
+/*
+ * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ *
+ * The function takes a pointer to the current hash values, a pointer to the
+ * input data, and a number of 64 byte blocks to process.  Once all blocks have
+ * been processed, the digest pointer is  updated with the resulting hash value.
+ * The function only processes complete blocks, there is no functionality to
+ * store partial blocks.  All message padding and hash value initialization must
+ * be done outside the update function.
+ *
+ * The indented lines in the loop are instructions related to rounds processing.
+ * The non-indented lines are instructions related to the message schedule.
+ *
+ * void sha256_ni_transform(uint32_t *digest, const void *data,
+		uint32_t numBlocks);
+ * digest : pointer to digest
+ * data: pointer to input data
+ * numBlocks: Number of blocks to process
+ */
+
+.text
+.align 32
+ENTRY(sha256_ni_transform)
+
+	shl		$6, NUM_BLKS		/*  convert to bytes */
+	jz		.Ldone_hash
+	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
+
+	/*
+	 * load initial hash values
+	 * Need to reorder these appropriately
+	 * DCBA, HGFE -> ABEF, CDGH
+	 */
+	movdqu		0*16(DIGEST_PTR), STATE0
+	movdqu		1*16(DIGEST_PTR), STATE1
+
+	pshufd		$0xB1, STATE0,  STATE0		/* CDAB */
+	pshufd		$0x1B, STATE1,  STATE1		/* EFGH */
+	movdqa		STATE0, MSGTMP4
+	palignr		$8, STATE1,  STATE0		/* ABEF */
+	pblendw		$0xF0, MSGTMP4, STATE1		/* CDGH */
+
+	movdqa		PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
+	lea		K256(%rip), SHA256CONSTANTS
+
+.Lloop0:
+	/* Save hash values for addition after rounds */
+	movdqa		STATE0, ABEF_SAVE
+	movdqa		STATE1, CDGH_SAVE
+
+	/* Rounds 0-3 */
+	movdqu		0*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP0
+		paddd		0*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 4-7 */
+	movdqu		1*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP1
+		paddd		1*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 8-11 */
+	movdqu		2*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP2
+		paddd		2*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 12-15 */
+	movdqu		3*16(DATA_PTR), MSG
+	pshufb		SHUF_MASK, MSG
+	movdqa		MSG, MSGTMP3
+		paddd		3*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 16-19 */
+	movdqa		MSGTMP0, MSG
+		paddd		4*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 20-23 */
+	movdqa		MSGTMP1, MSG
+		paddd		5*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 24-27 */
+	movdqa		MSGTMP2, MSG
+		paddd		6*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 28-31 */
+	movdqa		MSGTMP3, MSG
+		paddd		7*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 32-35 */
+	movdqa		MSGTMP0, MSG
+		paddd		8*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 36-39 */
+	movdqa		MSGTMP1, MSG
+		paddd		9*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP1, MSGTMP0
+
+	/* Rounds 40-43 */
+	movdqa		MSGTMP2, MSG
+		paddd		10*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP2, MSGTMP1
+
+	/* Rounds 44-47 */
+	movdqa		MSGTMP3, MSG
+		paddd		11*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP3, MSGTMP4
+	palignr		$4, MSGTMP2, MSGTMP4
+	paddd		MSGTMP4, MSGTMP0
+	sha256msg2	MSGTMP3, MSGTMP0
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP3, MSGTMP2
+
+	/* Rounds 48-51 */
+	movdqa		MSGTMP0, MSG
+		paddd		12*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP0, MSGTMP4
+	palignr		$4, MSGTMP3, MSGTMP4
+	paddd		MSGTMP4, MSGTMP1
+	sha256msg2	MSGTMP0, MSGTMP1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+	sha256msg1	MSGTMP0, MSGTMP3
+
+	/* Rounds 52-55 */
+	movdqa		MSGTMP1, MSG
+		paddd		13*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP1, MSGTMP4
+	palignr		$4, MSGTMP0, MSGTMP4
+	paddd		MSGTMP4, MSGTMP2
+	sha256msg2	MSGTMP1, MSGTMP2
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 56-59 */
+	movdqa		MSGTMP2, MSG
+		paddd		14*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+	movdqa		MSGTMP2, MSGTMP4
+	palignr		$4, MSGTMP1, MSGTMP4
+	paddd		MSGTMP4, MSGTMP3
+	sha256msg2	MSGTMP2, MSGTMP3
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Rounds 60-63 */
+	movdqa		MSGTMP3, MSG
+		paddd		15*16(SHA256CONSTANTS), MSG
+		sha256rnds2	STATE0, STATE1
+		pshufd 		$0x0E, MSG, MSG
+		sha256rnds2	STATE1, STATE0
+
+	/* Add current hash values with previously saved */
+	paddd		ABEF_SAVE, STATE0
+	paddd		CDGH_SAVE, STATE1
+
+	/* Increment data pointer and loop if more to process */
+	add		$64, DATA_PTR
+	cmp		NUM_BLKS, DATA_PTR
+	jne		.Lloop0
+
+	/* Write hash values back in the correct order */
+	pshufd		$0x1B, STATE0,  STATE0		/* FEBA */
+	pshufd		$0xB1, STATE1,  STATE1		/* DCHG */
+	movdqa		STATE0, MSGTMP4
+	pblendw		$0xF0, STATE1,  STATE0		/* DCBA */
+	palignr		$8, MSGTMP4, STATE1		/* HGFE */
+
+	movdqu		STATE0, 0*16(DIGEST_PTR)
+	movdqu		STATE1, 1*16(DIGEST_PTR)
+
+.Ldone_hash:
+
+	ret
+ENDPROC(sha256_ni_transform)
+
+.data
+.align 64
+K256:
+	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+PSHUFFLE_BYTE_FLIP_MASK:
+	.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index f8097fc0d1d1..5f4d6086dc59 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -42,19 +42,10 @@
 
 asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
 				       u64 rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
-				     u64 rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
-				      u64 rounds);
-#endif
-
-static void (*sha256_transform_asm)(u32 *, const char *, u64);
+typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
 
-static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len, sha256_transform_fn *sha256_xform)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -67,14 +58,14 @@ static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha256_base_do_update(desc, data, len,
-			      (sha256_block_fn *)sha256_transform_asm);
+			      (sha256_block_fn *)sha256_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out, sha256_transform_fn *sha256_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha256_finup(desc, data, len, out);
@@ -82,20 +73,32 @@ static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha256_base_do_update(desc, data, len,
-				      (sha256_block_fn *)sha256_transform_asm);
-	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
+				      (sha256_block_fn *)sha256_xform);
+	sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform);
 	kernel_fpu_end();
 
 	return sha256_base_finish(desc, out);
 }
 
+static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_ssse3);
+}
+
+static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_ssse3);
+}
+
 /* Add padding and return the message digest. */
 static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
 {
 	return sha256_ssse3_finup(desc, NULL, 0, out);
 }
 
-static struct shash_alg algs[] = { {
+static struct shash_alg sha256_ssse3_algs[] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_base_init,
 	.update		=	sha256_ssse3_update,
@@ -127,10 +130,77 @@ static struct shash_alg algs[] = { {
 	}
 } };
 
+static int register_sha256_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shashes(sha256_ssse3_algs,
+				ARRAY_SIZE(sha256_ssse3_algs));
+	return 0;
+}
+
+static void unregister_sha256_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(sha256_ssse3_algs,
+				ARRAY_SIZE(sha256_ssse3_algs));
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
+				     u64 rounds);
+
+static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_avx);
+}
+
+static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_avx);
+}
+
+static int sha256_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_avx_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_avx_update,
+	.final		=	sha256_avx_final,
+	.finup		=	sha256_avx_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_avx_update,
+	.final		=	sha256_avx_final,
+	.finup		=	sha256_avx_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static bool avx_usable(void)
 {
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
 			pr_info("AVX detected but unusable.\n");
 		return false;
@@ -138,47 +208,216 @@ static bool __init avx_usable(void)
 
 	return true;
 }
-#endif
 
-static int __init sha256_ssse3_mod_init(void)
+static int register_sha256_avx(void)
 {
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
-		sha256_transform_asm = sha256_transform_ssse3;
+	if (avx_usable())
+		return crypto_register_shashes(sha256_avx_algs,
+				ARRAY_SIZE(sha256_avx_algs));
+	return 0;
+}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-#ifdef CONFIG_AS_AVX2
-		if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2))
-			sha256_transform_asm = sha256_transform_rorx;
-		else
+static void unregister_sha256_avx(void)
+{
+	if (avx_usable())
+		crypto_unregister_shashes(sha256_avx_algs,
+				ARRAY_SIZE(sha256_avx_algs));
+}
+
+#else
+static inline int register_sha256_avx(void) { return 0; }
+static inline void unregister_sha256_avx(void) { }
 #endif
-			sha256_transform_asm = sha256_transform_avx;
+
+#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
+asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
+				      u64 rounds);
+
+static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_transform_rorx);
+}
+
+static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_transform_rorx);
+}
+
+static int sha256_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_avx2_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_avx2_update,
+	.final		=	sha256_avx2_final,
+	.finup		=	sha256_avx2_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
 	}
-#endif
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_avx2_update,
+	.final		=	sha256_avx2_final,
+	.finup		=	sha256_avx2_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
 
-	if (sha256_transform_asm) {
-#ifdef CONFIG_AS_AVX
-		if (sha256_transform_asm == sha256_transform_avx)
-			pr_info("Using AVX optimized SHA-256 implementation\n");
-#ifdef CONFIG_AS_AVX2
-		else if (sha256_transform_asm == sha256_transform_rorx)
-			pr_info("Using AVX2 optimized SHA-256 implementation\n");
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
+		    boot_cpu_has(X86_FEATURE_BMI2))
+		return true;
+
+	return false;
+}
+
+static int register_sha256_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shashes(sha256_avx2_algs,
+				ARRAY_SIZE(sha256_avx2_algs));
+	return 0;
+}
+
+static void unregister_sha256_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shashes(sha256_avx2_algs,
+				ARRAY_SIZE(sha256_avx2_algs));
+}
+
+#else
+static inline int register_sha256_avx2(void) { return 0; }
+static inline void unregister_sha256_avx2(void) { }
 #endif
-		else
+
+#ifdef CONFIG_AS_SHA256_NI
+asmlinkage void sha256_ni_transform(u32 *digest, const char *data,
+				   u64 rounds); /*unsigned int rounds);*/
+
+static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	return sha256_update(desc, data, len, sha256_ni_transform);
+}
+
+static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
+		      unsigned int len, u8 *out)
+{
+	return sha256_finup(desc, data, len, out, sha256_ni_transform);
+}
+
+static int sha256_ni_final(struct shash_desc *desc, u8 *out)
+{
+	return sha256_ni_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha256_ni_algs[] = { {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_base_init,
+	.update		=	sha256_ni_update,
+	.final		=	sha256_ni_final,
+	.finup		=	sha256_ni_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name =	"sha256-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+}, {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_base_init,
+	.update		=	sha256_ni_update,
+	.final		=	sha256_ni_final,
+	.finup		=	sha256_ni_finup,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name =	"sha224-ni",
+		.cra_priority	=	250,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static int register_sha256_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		return crypto_register_shashes(sha256_ni_algs,
+				ARRAY_SIZE(sha256_ni_algs));
+	return 0;
+}
+
+static void unregister_sha256_ni(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+		crypto_unregister_shashes(sha256_ni_algs,
+				ARRAY_SIZE(sha256_ni_algs));
+}
+
+#else
+static inline int register_sha256_ni(void) { return 0; }
+static inline void unregister_sha256_ni(void) { }
 #endif
-			pr_info("Using SSSE3 optimized SHA-256 implementation\n");
-		return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+static int __init sha256_ssse3_mod_init(void)
+{
+	if (register_sha256_ssse3())
+		goto fail;
+
+	if (register_sha256_avx()) {
+		unregister_sha256_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
 
+	if (register_sha256_avx2()) {
+		unregister_sha256_avx();
+		unregister_sha256_ssse3();
+		goto fail;
+	}
+
+	if (register_sha256_ni()) {
+		unregister_sha256_avx2();
+		unregister_sha256_avx();
+		unregister_sha256_ssse3();
+		goto fail;
+	}
+
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha256_ssse3_mod_fini(void)
 {
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	unregister_sha256_ni();
+	unregister_sha256_avx2();
+	unregister_sha256_avx();
+	unregister_sha256_ssse3();
 }
 
 module_init(sha256_ssse3_mod_init);
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 2edad7b81870..34e5083d6f36 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -41,19 +41,11 @@
 
 asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
 				       u64 rounds);
-#ifdef CONFIG_AS_AVX
-asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
-				     u64 rounds);
-#endif
-#ifdef CONFIG_AS_AVX2
-asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
-				      u64 rounds);
-#endif
 
-static void (*sha512_transform_asm)(u64 *, const char *, u64);
+typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds);
 
-static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len)
+static int sha512_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len, sha512_transform_fn *sha512_xform)
 {
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 
@@ -66,14 +58,14 @@ static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
 
 	kernel_fpu_begin();
 	sha512_base_do_update(desc, data, len,
-			      (sha512_block_fn *)sha512_transform_asm);
+			      (sha512_block_fn *)sha512_xform);
 	kernel_fpu_end();
 
 	return 0;
 }
 
-static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
+static int sha512_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out, sha512_transform_fn *sha512_xform)
 {
 	if (!irq_fpu_usable())
 		return crypto_sha512_finup(desc, data, len, out);
@@ -81,20 +73,32 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
 	kernel_fpu_begin();
 	if (len)
 		sha512_base_do_update(desc, data, len,
-				      (sha512_block_fn *)sha512_transform_asm);
-	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
+				      (sha512_block_fn *)sha512_xform);
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform);
 	kernel_fpu_end();
 
 	return sha512_base_finish(desc, out);
 }
 
+static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	return sha512_update(desc, data, len, sha512_transform_ssse3);
+}
+
+static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
+}
+
 /* Add padding and return the message digest. */
 static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
 {
 	return sha512_ssse3_finup(desc, NULL, 0, out);
 }
 
-static struct shash_alg algs[] = { {
+static struct shash_alg sha512_ssse3_algs[] = { {
 	.digestsize	=	SHA512_DIGEST_SIZE,
 	.init		=	sha512_base_init,
 	.update		=	sha512_ssse3_update,
@@ -126,10 +130,27 @@ static struct shash_alg algs[] = { {
 	}
 } };
 
+static int register_sha512_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		return crypto_register_shashes(sha512_ssse3_algs,
+			ARRAY_SIZE(sha512_ssse3_algs));
+	return 0;
+}
+
+static void unregister_sha512_ssse3(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SSSE3))
+		crypto_unregister_shashes(sha512_ssse3_algs,
+			ARRAY_SIZE(sha512_ssse3_algs));
+}
+
 #ifdef CONFIG_AS_AVX
-static bool __init avx_usable(void)
+asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
+				     u64 rounds);
+static bool avx_usable(void)
 {
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
 		if (cpu_has_avx)
 			pr_info("AVX detected but unusable.\n");
 		return false;
@@ -137,47 +158,185 @@ static bool __init avx_usable(void)
 
 	return true;
 }
-#endif
 
-static int __init sha512_ssse3_mod_init(void)
+static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
 {
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
-		sha512_transform_asm = sha512_transform_ssse3;
+	return sha512_update(desc, data, len, sha512_transform_avx);
+}
 
-#ifdef CONFIG_AS_AVX
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable()) {
-#ifdef CONFIG_AS_AVX2
-		if (boot_cpu_has(X86_FEATURE_AVX2))
-			sha512_transform_asm = sha512_transform_rorx;
-		else
-#endif
-			sha512_transform_asm = sha512_transform_avx;
+static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_avx);
+}
+
+/* Add padding and return the message digest. */
+static int sha512_avx_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_avx_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha512_avx_algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_base_init,
+	.update		=	sha512_avx_update,
+	.final		=	sha512_avx_final,
+	.finup		=	sha512_avx_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
 	}
-#endif
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_base_init,
+	.update		=	sha512_avx_update,
+	.final		=	sha512_avx_final,
+	.finup		=	sha512_avx_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-avx",
+		.cra_priority	=	160,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
 
-	if (sha512_transform_asm) {
-#ifdef CONFIG_AS_AVX
-		if (sha512_transform_asm == sha512_transform_avx)
-			pr_info("Using AVX optimized SHA-512 implementation\n");
-#ifdef CONFIG_AS_AVX2
-		else if (sha512_transform_asm == sha512_transform_rorx)
-			pr_info("Using AVX2 optimized SHA-512 implementation\n");
+static int register_sha512_avx(void)
+{
+	if (avx_usable())
+		return crypto_register_shashes(sha512_avx_algs,
+			ARRAY_SIZE(sha512_avx_algs));
+	return 0;
+}
+
+static void unregister_sha512_avx(void)
+{
+	if (avx_usable())
+		crypto_unregister_shashes(sha512_avx_algs,
+			ARRAY_SIZE(sha512_avx_algs));
+}
+#else
+static inline int register_sha512_avx(void) { return 0; }
+static inline void unregister_sha512_avx(void) { }
 #endif
-		else
+
+#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX)
+asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
+				      u64 rounds);
+
+static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	return sha512_update(desc, data, len, sha512_transform_rorx);
+}
+
+static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
+	      unsigned int len, u8 *out)
+{
+	return sha512_finup(desc, data, len, out, sha512_transform_rorx);
+}
+
+/* Add padding and return the message digest. */
+static int sha512_avx2_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_avx2_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg sha512_avx2_algs[] = { {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_base_init,
+	.update		=	sha512_avx2_update,
+	.final		=	sha512_avx2_final,
+	.finup		=	sha512_avx2_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name =	"sha512-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+},  {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_base_init,
+	.update		=	sha512_avx2_update,
+	.final		=	sha512_avx2_final,
+	.finup		=	sha512_avx2_finup,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name =	"sha384-avx2",
+		.cra_priority	=	170,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+} };
+
+static bool avx2_usable(void)
+{
+	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
+		    boot_cpu_has(X86_FEATURE_BMI2))
+		return true;
+
+	return false;
+}
+
+static int register_sha512_avx2(void)
+{
+	if (avx2_usable())
+		return crypto_register_shashes(sha512_avx2_algs,
+			ARRAY_SIZE(sha512_avx2_algs));
+	return 0;
+}
+
+static void unregister_sha512_avx2(void)
+{
+	if (avx2_usable())
+		crypto_unregister_shashes(sha512_avx2_algs,
+			ARRAY_SIZE(sha512_avx2_algs));
+}
+#else
+static inline int register_sha512_avx2(void) { return 0; }
+static inline void unregister_sha512_avx2(void) { }
 #endif
-			pr_info("Using SSSE3 optimized SHA-512 implementation\n");
-		return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+static int __init sha512_ssse3_mod_init(void)
+{
+
+	if (register_sha512_ssse3())
+		goto fail;
+
+	if (register_sha512_avx()) {
+		unregister_sha512_ssse3();
+		goto fail;
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
 
+	if (register_sha512_avx2()) {
+		unregister_sha512_avx();
+		unregister_sha512_ssse3();
+		goto fail;
+	}
+
+	return 0;
+fail:
 	return -ENODEV;
 }
 
 static void __exit sha512_ssse3_mod_fini(void)
 {
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	unregister_sha512_avx2();
+	unregister_sha512_avx();
+	unregister_sha512_ssse3();
 }
 
 module_init(sha512_ssse3_mod_init);
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index c2bd0ce718ee..b7a3904b953c 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -558,7 +558,7 @@ static int __init twofish_init(void)
 {
 	const char *feature_name;
 
-	if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
 		return -ENODEV;
 	}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 80dcc9261ca3..a89fdbc1f0be 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -24,10 +24,19 @@
 
 #include <asm/desc.h>
 #include <asm/traps.h>
+#include <asm/vdso.h>
+#include <asm/uaccess.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
+{
+	unsigned long top_of_stack =
+		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
+	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
+}
+
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
 __visible void enter_from_user_mode(void)
@@ -66,13 +75,14 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
  */
 unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
 {
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	unsigned long ret = 0;
 	u32 work;
 
-	BUG_ON(regs != task_pt_regs(current));
+	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+		BUG_ON(regs != task_pt_regs(current));
 
-	work = ACCESS_ONCE(current_thread_info()->flags) &
-		_TIF_WORK_SYSCALL_ENTRY;
+	work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
 
 #ifdef CONFIG_CONTEXT_TRACKING
 	/*
@@ -154,11 +164,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
 long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch,
 				unsigned long phase1_result)
 {
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	long ret = 0;
-	u32 work = ACCESS_ONCE(current_thread_info()->flags) &
-		_TIF_WORK_SYSCALL_ENTRY;
+	u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
 
-	BUG_ON(regs != task_pt_regs(current));
+	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+		BUG_ON(regs != task_pt_regs(current));
 
 	/*
 	 * If we stepped into a sysenter/syscall insn, it trapped in
@@ -207,19 +218,12 @@ long syscall_trace_enter(struct pt_regs *regs)
 		return syscall_trace_enter_phase2(regs, arch, phase1_result);
 }
 
-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
-{
-	unsigned long top_of_stack =
-		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
-	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
-}
+#define EXIT_TO_USERMODE_LOOP_FLAGS				\
+	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |	\
+	 _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
 
-/* Called with IRQs disabled. */
-__visible void prepare_exit_to_usermode(struct pt_regs *regs)
+static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
 {
-	if (WARN_ON(!irqs_disabled()))
-		local_irq_disable();
-
 	/*
 	 * In order to return to user mode, we need to have IRQs off with
 	 * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
@@ -229,14 +233,6 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
 	 * work to clear some of the flags can sleep.
 	 */
 	while (true) {
-		u32 cached_flags =
-			READ_ONCE(pt_regs_to_thread_info(regs)->flags);
-
-		if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
-				      _TIF_UPROBE | _TIF_NEED_RESCHED |
-				      _TIF_USER_RETURN_NOTIFY)))
-			break;
-
 		/* We have work to do. */
 		local_irq_enable();
 
@@ -260,50 +256,81 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
 
 		/* Disable IRQs and retry */
 		local_irq_disable();
+
+		cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+
+		if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
+			break;
+
 	}
+}
+
+/* Called with IRQs disabled. */
+__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
+{
+	u32 cached_flags;
+
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
+		local_irq_disable();
+
+	lockdep_sys_exit();
+
+	cached_flags =
+		READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+
+	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
+		exit_to_usermode_loop(regs, cached_flags);
 
 	user_enter();
 }
 
+#define SYSCALL_EXIT_WORK_FLAGS				\
+	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |	\
+	 _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+
+static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
+{
+	bool step;
+
+	audit_syscall_exit(regs);
+
+	if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
+		trace_sys_exit(regs, regs->ax);
+
+	/*
+	 * If TIF_SYSCALL_EMU is set, we only get here because of
+	 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+	 * We already reported this syscall instruction in
+	 * syscall_trace_enter().
+	 */
+	step = unlikely(
+		(cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
+		== _TIF_SINGLESTEP);
+	if (step || cached_flags & _TIF_SYSCALL_TRACE)
+		tracehook_report_syscall_exit(regs, step);
+}
+
 /*
  * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
  * state such that we can immediately switch to user mode.
  */
-__visible void syscall_return_slowpath(struct pt_regs *regs)
+__visible inline void syscall_return_slowpath(struct pt_regs *regs)
 {
 	struct thread_info *ti = pt_regs_to_thread_info(regs);
 	u32 cached_flags = READ_ONCE(ti->flags);
-	bool step;
 
 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 
-	if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled",
-		 regs->orig_ax))
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING) &&
+	    WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax))
 		local_irq_enable();
 
 	/*
 	 * First do one-time work.  If these work items are enabled, we
 	 * want to run them exactly once per syscall exit with IRQs on.
 	 */
-	if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |
-			    _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) {
-		audit_syscall_exit(regs);
-
-		if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
-			trace_sys_exit(regs, regs->ax);
-
-		/*
-		 * If TIF_SYSCALL_EMU is set, we only get here because of
-		 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
-		 * We already reported this syscall instruction in
-		 * syscall_trace_enter().
-		 */
-		step = unlikely(
-			(cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
-			== _TIF_SINGLESTEP);
-		if (step || cached_flags & _TIF_SYSCALL_TRACE)
-			tracehook_report_syscall_exit(regs, step);
-	}
+	if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
+		syscall_slow_exit_work(regs, cached_flags);
 
 #ifdef CONFIG_COMPAT
 	/*
@@ -316,3 +343,144 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
 	local_irq_disable();
 	prepare_exit_to_usermode(regs);
 }
+
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
+/*
+ * Does a 32-bit syscall.  Called with IRQs on and does all entry and
+ * exit work and returns with IRQs off.  This function is extremely hot
+ * in workloads that use it, and it's usually called from
+ * do_fast_syscall_32, so forcibly inline it to improve performance.
+ */
+#ifdef CONFIG_X86_32
+/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */
+__visible
+#else
+/* 64-bit kernels use do_syscall_32_irqs_off() instead. */
+static
+#endif
+__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
+{
+	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	unsigned int nr = (unsigned int)regs->orig_ax;
+
+#ifdef CONFIG_IA32_EMULATION
+	ti->status |= TS_COMPAT;
+#endif
+
+	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
+		/*
+		 * Subtlety here: if ptrace pokes something larger than
+		 * 2^32-1 into orig_ax, this truncates it.  This may or
+		 * may not be necessary, but it matches the old asm
+		 * behavior.
+		 */
+		nr = syscall_trace_enter(regs);
+	}
+
+	if (likely(nr < IA32_NR_syscalls)) {
+		/*
+		 * It's possible that a 32-bit syscall implementation
+		 * takes a 64-bit parameter but nonetheless assumes that
+		 * the high bits are zero.  Make sure we zero-extend all
+		 * of the args.
+		 */
+		regs->ax = ia32_sys_call_table[nr](
+			(unsigned int)regs->bx, (unsigned int)regs->cx,
+			(unsigned int)regs->dx, (unsigned int)regs->si,
+			(unsigned int)regs->di, (unsigned int)regs->bp);
+	}
+
+	syscall_return_slowpath(regs);
+}
+
+#ifdef CONFIG_X86_64
+/* Handles INT80 on 64-bit kernels */
+__visible void do_syscall_32_irqs_off(struct pt_regs *regs)
+{
+	local_irq_enable();
+	do_syscall_32_irqs_on(regs);
+}
+#endif
+
+/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
+__visible long do_fast_syscall_32(struct pt_regs *regs)
+{
+	/*
+	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
+	 * convention.  Adjust regs so it looks like we entered using int80.
+	 */
+
+	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
+		vdso_image_32.sym_int80_landing_pad;
+
+	/*
+	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
+	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
+	 * Fix it up.
+	 */
+	regs->ip = landing_pad;
+
+	/*
+	 * Fetch ECX from where the vDSO stashed it.
+	 *
+	 * WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
+	 */
+	local_irq_enable();
+	if (
+#ifdef CONFIG_X86_64
+		/*
+		 * Micro-optimization: the pointer we're following is explicitly
+		 * 32 bits, so it can't be out of range.
+		 */
+		__get_user(*(u32 *)&regs->cx,
+			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
+#else
+		get_user(*(u32 *)&regs->cx,
+			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
+#endif
+		) {
+
+		/* User code screwed up. */
+		local_irq_disable();
+		regs->ax = -EFAULT;
+#ifdef CONFIG_CONTEXT_TRACKING
+		enter_from_user_mode();
+#endif
+		prepare_exit_to_usermode(regs);
+		return 0;	/* Keep it simple: use IRET. */
+	}
+
+	/* Now this is just like a normal syscall. */
+	do_syscall_32_irqs_on(regs);
+
+#ifdef CONFIG_X86_64
+	/*
+	 * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
+	 * SYSRETL is available on all 64-bit CPUs, so we don't need to
+	 * bother with SYSEXIT.
+	 *
+	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
+	 * because the ECX fixup above will ensure that this is essentially
+	 * never the case.
+	 */
+	return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
+		regs->ip == landing_pad &&
+		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
+#else
+	/*
+	 * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
+	 *
+	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
+	 * because the ECX fixup above will ensure that this is essentially
+	 * never the case.
+	 *
+	 * We don't allow syscalls at all from VM86 mode, but we still
+	 * need to check VM, because we might be returning from sys_vm86.
+	 */
+	return static_cpu_has(X86_FEATURE_SEP) &&
+		regs->cs == __USER_CS && regs->ss == __USER_DS &&
+		regs->ip == landing_pad &&
+		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
+#endif
+}
+#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b2909bf8cf70..3eb572ed3d7a 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -3,7 +3,7 @@
  *
  * entry_32.S contains the system-call and low-level fault and trap handling routines.
  *
- * Stack layout in 'syscall_exit':
+ * Stack layout while running C code:
  *	ptrace needs to have all registers on the stack.
  *	If the order here is changed, it needs to be
  *	updated in fork.c:copy_process(), signal.c:do_signal(),
@@ -153,13 +153,13 @@
 
 #endif /* CONFIG_X86_32_LAZY_GS */
 
-.macro SAVE_ALL
+.macro SAVE_ALL pt_regs_ax=%eax
 	cld
 	PUSH_GS
 	pushl	%fs
 	pushl	%es
 	pushl	%ds
-	pushl	%eax
+	pushl	\pt_regs_ax
 	pushl	%ebp
 	pushl	%edi
 	pushl	%esi
@@ -211,7 +211,11 @@ ENTRY(ret_from_fork)
 	popl	%eax
 	pushl	$0x0202				# Reset kernel eflags
 	popfl
-	jmp	syscall_exit
+
+	/* When we fork, we trace the syscall return in the child, too. */
+	movl    %esp, %eax
+	call    syscall_return_slowpath
+	jmp     restore_all
 END(ret_from_fork)
 
 ENTRY(ret_from_kernel_thread)
@@ -224,7 +228,15 @@ ENTRY(ret_from_kernel_thread)
 	movl	PT_EBP(%esp), %eax
 	call	*PT_EBX(%esp)
 	movl	$0, PT_EAX(%esp)
-	jmp	syscall_exit
+
+	/*
+	 * Kernel threads return to userspace as if returning from a syscall.
+	 * We should check whether anything actually uses this path and, if so,
+	 * consider switching it over to ret_from_fork.
+	 */
+	movl    %esp, %eax
+	call    syscall_return_slowpath
+	jmp     restore_all
 ENDPROC(ret_from_kernel_thread)
 
 /*
@@ -255,7 +267,6 @@ ret_from_intr:
 	jb	resume_kernel			# not returning to v8086 or userspace
 
 ENTRY(resume_userspace)
-	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl	%esp, %eax
@@ -276,76 +287,47 @@ need_resched:
 END(resume_kernel)
 #endif
 
-/*
- * SYSENTER_RETURN points to after the SYSENTER instruction
- * in the vsyscall page.  See vsyscall-sysentry.S, which defines
- * the symbol.
- */
-
 	# SYSENTER  call handler stub
 ENTRY(entry_SYSENTER_32)
 	movl	TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
+	pushl	$__USER_DS		/* pt_regs->ss */
+	pushl	%ecx			/* pt_regs->cx */
+	pushfl				/* pt_regs->flags (except IF = 0) */
+	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
+	pushl	$__USER_CS		/* pt_regs->cs */
+	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
+	pushl	%eax			/* pt_regs->orig_ax */
+	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
+
 	/*
-	 * Interrupts are disabled here, but we can't trace it until
-	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
-	 * we immediately enable interrupts at that point anyway.
-	 */
-	pushl	$__USER_DS
-	pushl	%ebp
-	pushfl
-	orl	$X86_EFLAGS_IF, (%esp)
-	pushl	$__USER_CS
-	/*
-	 * Push current_thread_info()->sysenter_return to the stack.
-	 * A tiny bit of offset fixup is necessary: TI_sysenter_return
-	 * is relative to thread_info, which is at the bottom of the
-	 * kernel stack page.  4*4 means the 4 words pushed above;
-	 * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
-	 * and THREAD_SIZE takes us to the bottom.
+	 * User mode is traced as though IRQs are on, and SYSENTER
+	 * turned them off.
 	 */
-	pushl	((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
-
-	pushl	%eax
-	SAVE_ALL
-	ENABLE_INTERRUPTS(CLBR_NONE)
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
-	cmpl	$__PAGE_OFFSET-3, %ebp
-	jae	syscall_fault
-	ASM_STAC
-1:	movl	(%ebp), %ebp
-	ASM_CLAC
-	movl	%ebp, PT_EBP(%esp)
-	_ASM_EXTABLE(1b, syscall_fault)
+	TRACE_IRQS_OFF
 
-	GET_THREAD_INFO(%ebp)
+	movl	%esp, %eax
+	call	do_fast_syscall_32
+	testl	%eax, %eax
+	jz	.Lsyscall_32_done
 
-	testl	$_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
-	jnz	syscall_trace_entry
-sysenter_do_call:
-	cmpl	$(NR_syscalls), %eax
-	jae	sysenter_badsys
-	call	*sys_call_table(, %eax, 4)
-sysenter_after_call:
-	movl	%eax, PT_EAX(%esp)
-	LOCKDEP_SYS_EXIT
-	DISABLE_INTERRUPTS(CLBR_ANY)
-	TRACE_IRQS_OFF
-	movl	TI_flags(%ebp), %ecx
-	testl	$_TIF_ALLWORK_MASK, %ecx
-	jnz	syscall_exit_work_irqs_off
-sysenter_exit:
-/* if something modifies registers it must also disable sysexit */
-	movl	PT_EIP(%esp), %edx
-	movl	PT_OLDESP(%esp), %ecx
-	xorl	%ebp, %ebp
-	TRACE_IRQS_ON
+/* Opportunistic SYSEXIT */
+	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
+	movl	PT_EIP(%esp), %edx	/* pt_regs->ip */
+	movl	PT_OLDESP(%esp), %ecx	/* pt_regs->sp */
 1:	mov	PT_FS(%esp), %fs
 	PTGS_TO_GS
+	popl	%ebx			/* pt_regs->bx */
+	addl	$2*4, %esp		/* skip pt_regs->cx and pt_regs->dx */
+	popl	%esi			/* pt_regs->si */
+	popl	%edi			/* pt_regs->di */
+	popl	%ebp			/* pt_regs->bp */
+	popl	%eax			/* pt_regs->ax */
+
+	/*
+	 * Return back to the vDSO, which will pop ecx and edx.
+	 * Don't bother with DS and ES (they already contain __USER_DS).
+	 */
 	ENABLE_INTERRUPTS_SYSEXIT
 
 .pushsection .fixup, "ax"
@@ -359,21 +341,18 @@ ENDPROC(entry_SYSENTER_32)
 	# system call handler stub
 ENTRY(entry_INT80_32)
 	ASM_CLAC
-	pushl	%eax				# save orig_eax
-	SAVE_ALL
-	GET_THREAD_INFO(%ebp)
-						# system call tracing in operation / emulation
-	testl	$_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
-	jnz	syscall_trace_entry
-	cmpl	$(NR_syscalls), %eax
-	jae	syscall_badsys
-syscall_call:
-	call	*sys_call_table(, %eax, 4)
-syscall_after_call:
-	movl	%eax, PT_EAX(%esp)		# store the return value
-syscall_exit:
-	LOCKDEP_SYS_EXIT
-	jmp	syscall_exit_work
+	pushl	%eax			/* pt_regs->orig_ax */
+	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
+
+	/*
+	 * User mode is traced as though IRQs are on.  Unlike the 64-bit
+	 * case, INT80 is a trap gate on 32-bit kernels, so interrupts
+	 * are already on (unless user code is messing around with iopl).
+	 */
+
+	movl	%esp, %eax
+	call	do_syscall_32_irqs_on
+.Lsyscall_32_done:
 
 restore_all:
 	TRACE_IRQS_IRET
@@ -450,47 +429,6 @@ ldt_ss:
 #endif
 ENDPROC(entry_INT80_32)
 
-	# perform syscall exit tracing
-	ALIGN
-syscall_trace_entry:
-	movl	$-ENOSYS, PT_EAX(%esp)
-	movl	%esp, %eax
-	call	syscall_trace_enter
-	/* What it returned is what we'll actually use.  */
-	cmpl	$(NR_syscalls), %eax
-	jnae	syscall_call
-	jmp	syscall_exit
-END(syscall_trace_entry)
-
-	# perform syscall exit tracing
-	ALIGN
-syscall_exit_work_irqs_off:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_ANY)
-
-syscall_exit_work:
-	movl	%esp, %eax
-	call	syscall_return_slowpath
-	jmp	restore_all
-END(syscall_exit_work)
-
-syscall_fault:
-	ASM_CLAC
-	GET_THREAD_INFO(%ebp)
-	movl	$-EFAULT, PT_EAX(%esp)
-	jmp	resume_userspace
-END(syscall_fault)
-
-syscall_badsys:
-	movl	$-ENOSYS, %eax
-	jmp	syscall_after_call
-END(syscall_badsys)
-
-sysenter_badsys:
-	movl	$-ENOSYS, %eax
-	jmp	sysenter_after_call
-END(sysenter_badsys)
-
 .macro FIXUP_ESPFIX_STACK
 /*
  * Switch back for ESPFIX stack to the normal zerobased stack
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 055a01de7c8d..53616ca03244 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -391,20 +391,16 @@ GLOBAL(stub_execveat)
 	jmp	return_from_execve
 END(stub_execveat)
 
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
+#if defined(CONFIG_X86_X32_ABI)
 	.align	8
 GLOBAL(stub_x32_execve)
-GLOBAL(stub32_execve)
 	call	compat_sys_execve
 	jmp	return_from_execve
-END(stub32_execve)
 END(stub_x32_execve)
 	.align	8
 GLOBAL(stub_x32_execveat)
-GLOBAL(stub32_execveat)
 	call	compat_sys_execveat
 	jmp	return_from_execve
-END(stub32_execveat)
 END(stub_x32_execveat)
 #endif
 
@@ -557,7 +553,6 @@ ret_from_intr:
 	jz	retint_kernel
 
 	/* Interrupt came from user space */
-	LOCKDEP_SYS_EXIT_IRQ
 GLOBAL(retint_user)
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
@@ -587,7 +582,7 @@ retint_kernel:
  * At this label, code paths which return to kernel and to user,
  * which come from interrupts/exception and from syscalls, merge.
  */
-restore_regs_and_iret:
+GLOBAL(restore_regs_and_iret)
 	RESTORE_EXTRA_REGS
 restore_c_regs_and_iret:
 	RESTORE_C_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index a9360d40fb7f..c3201830a85e 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -16,16 +16,6 @@
 #include <linux/linkage.h>
 #include <linux/err.h>
 
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE		0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-# define sysexit_audit		ia32_ret_from_sys_call_irqs_off
-# define sysretl_audit		ia32_ret_from_sys_call_irqs_off
-#endif
-
 	.section .entry.text, "ax"
 
 #ifdef CONFIG_PARAVIRT
@@ -58,219 +48,87 @@ ENDPROC(native_usergs_sysret32)
  * with the int 0x80 path.
  */
 ENTRY(entry_SYSENTER_compat)
-	/*
-	 * Interrupts are off on entry.
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
-	 */
+	/* Interrupts are off on entry. */
 	SWAPGS_UNSAFE_STACK
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-	ENABLE_INTERRUPTS(CLBR_NONE)
 
-	/* Zero-extending 32-bit regs, do not remove */
-	movl	%ebp, %ebp
+	/*
+	 * User tracing code (ptrace or signal handlers) might assume that
+	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+	 * syscall.  Just in case the high bits are nonzero, zero-extend
+	 * the syscall number.  (This could almost certainly be deleted
+	 * with no ill effects.)
+	 */
 	movl	%eax, %eax
 
-	movl	ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
-
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
-	pushq	%rbp			/* pt_regs->sp */
-	pushfq				/* pt_regs->flags */
+	pushq	%rcx			/* pt_regs->sp */
+
+	/*
+	 * Push flags.  This is nasty.  First, interrupts are currently
+	 * off, but we need pt_regs->flags to have IF set.  Second, even
+	 * if TF was set when SYSENTER started, it's clear by now.  We fix
+	 * that later using TIF_SINGLESTEP.
+	 */
+	pushfq				/* pt_regs->flags (except IF = 0) */
+	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
+	ASM_CLAC			/* Clear AC after saving FLAGS */
+
 	pushq	$__USER32_CS		/* pt_regs->cs */
-	pushq	%r10			/* pt_regs->ip = thread_info->sysenter_return */
+	xorq    %r8,%r8
+	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
-	pushq	%rcx			/* pt_regs->cx */
+	pushq	%rcx			/* pt_regs->cx (will be overwritten) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
+	pushq   %r8                     /* pt_regs->r8  = 0 */
+	pushq   %r8                     /* pt_regs->r9  = 0 */
+	pushq   %r8                     /* pt_regs->r10 = 0 */
+	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   %rbx                    /* pt_regs->rbx */
+	pushq   %rbp                    /* pt_regs->rbp */
+	pushq   %r8                     /* pt_regs->r12 = 0 */
+	pushq   %r8                     /* pt_regs->r13 = 0 */
+	pushq   %r8                     /* pt_regs->r14 = 0 */
+	pushq   %r8                     /* pt_regs->r15 = 0 */
 	cld
-	sub	$(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
-
-	/*
-	 * no need to do an access_ok check here because rbp has been
-	 * 32-bit zero extended
-	 */
-	ASM_STAC
-1:	movl	(%rbp), %ebp
-	_ASM_EXTABLE(1b, ia32_badarg)
-	ASM_CLAC
 
 	/*
 	 * Sysenter doesn't filter flags, so we need to clear NT
 	 * ourselves.  To save a few cycles, we can check whether
 	 * NT was set instead of doing an unconditional popfq.
+	 * This needs to happen before enabling interrupts so that
+	 * we don't get preempted with NT set.
+	 *
+	 * NB.: sysenter_fix_flags is a label with the code under it moved
+	 * out-of-line as an optimization: NT is unlikely to be set in the
+	 * majority of the cases and instead of polluting the I$ unnecessarily,
+	 * we're keeping that code behind a branch which will predict as
+	 * not-taken and therefore its instructions won't be fetched.
 	 */
 	testl	$X86_EFLAGS_NT, EFLAGS(%rsp)
 	jnz	sysenter_fix_flags
 sysenter_flags_fixed:
 
-	orl	$TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
-	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	sysenter_tracesys
-
-sysenter_do_call:
-	/* 32-bit syscall -> 64-bit C ABI argument conversion */
-	movl	%edi, %r8d		/* arg5 */
-	movl	%ebp, %r9d		/* arg6 */
-	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
-	movl	%ebx, %edi		/* arg1 */
-	movl	%edx, %edx		/* arg3 (zero extension) */
-sysenter_dispatch:
-	cmpq	$(IA32_NR_syscalls-1), %rax
-	ja	1f
-	call	*ia32_sys_call_table(, %rax, 8)
-	movq	%rax, RAX(%rsp)
-1:
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	sysexit_audit
-sysexit_from_sys_call:
 	/*
-	 * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
-	 * NMI between STI and SYSEXIT has poorly specified behavior,
-	 * and and NMI followed by an IRQ with usergs is fatal.  So
-	 * we just pretend we're using SYSEXIT but we really use
-	 * SYSRETL instead.
-	 *
-	 * This code path is still called 'sysexit' because it pairs
-	 * with 'sysenter' and it uses the SYSENTER calling convention.
+	 * User mode is traced as though IRQs are on, and SYSENTER
+	 * turned them off.
 	 */
-	andl	$~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
-	movl	RIP(%rsp), %ecx		/* User %eip */
-	movq    RAX(%rsp), %rax
-	movl	RSI(%rsp), %esi
-	movl	RDI(%rsp), %edi
-	xorl	%edx, %edx		/* Do not leak kernel information */
-	xorq	%r8, %r8
-	xorq	%r9, %r9
-	xorq	%r10, %r10
-	movl	EFLAGS(%rsp), %r11d	/* User eflags */
-	TRACE_IRQS_ON
-
-	/*
-	 * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
-	 * since it avoids a dicey window with interrupts enabled.
-	 */
-	movl	RSP(%rsp), %esp
-
-	/*
-	 * USERGS_SYSRET32 does:
-	 *  gsbase = user's gs base
-	 *  eip = ecx
-	 *  rflags = r11
-	 *  cs = __USER32_CS
-	 *  ss = __USER_DS
-	 *
-	 * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
-	 *
-	 *  pop %ebp
-	 *  pop %edx
-	 *  pop %ecx
-	 *
-	 * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
-	 * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
-	 * address (already known to user code), and R12-R15 are
-	 * callee-saved and therefore don't contain any interesting
-	 * kernel data.
-	 */
-	USERGS_SYSRET32
-
-#ifdef CONFIG_AUDITSYSCALL
-	.macro auditsys_entry_common
-	/*
-	 * At this point, registers hold syscall args in the 32-bit syscall ABI:
-	 * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP.
-	 *
-	 * We want to pass them to __audit_syscall_entry(), which is a 64-bit
-	 * C function with 5 parameters, so shuffle them to match what
-	 * the function expects: RDI,RSI,RDX,RCX,R8.
-	 */
-	movl	%esi, %r8d		/* arg5 (R8 ) <= 4th syscall arg (ESI) */
-	xchg	%ecx, %edx		/* arg4 (RCX) <= 3rd syscall arg (EDX) */
-					/* arg3 (RDX) <= 2nd syscall arg (ECX) */
-	movl	%ebx, %esi		/* arg2 (RSI) <= 1st syscall arg (EBX) */
-	movl	%eax, %edi		/* arg1 (RDI) <= syscall number  (EAX) */
-	call	__audit_syscall_entry
-
-	/*
-	 * We are going to jump back to the syscall dispatch code.
-	 * Prepare syscall args as required by the 64-bit C ABI.
-	 * Registers clobbered by __audit_syscall_entry() are
-	 * loaded from pt_regs on stack:
-	 */
-	movl	ORIG_RAX(%rsp), %eax	/* syscall number */
-	movl	%ebx, %edi		/* arg1 */
-	movl	RCX(%rsp), %esi		/* arg2 */
-	movl	RDX(%rsp), %edx		/* arg3 */
-	movl	RSI(%rsp), %ecx		/* arg4 */
-	movl	RDI(%rsp), %r8d		/* arg5 */
-	.endm
-
-	.macro auditsys_exit exit
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	testl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	ia32_ret_from_sys_call
-	movl	%eax, %esi		/* second arg, syscall return value */
-	cmpl	$-MAX_ERRNO, %eax	/* is it an error ? */
-	jbe	1f
-	movslq	%eax, %rsi		/* if error sign extend to 64 bits */
-1:	setbe	%al			/* 1 if error, 0 if not */
-	movzbl	%al, %edi		/* zero-extend that into %edi */
-	call	__audit_syscall_exit
-	movl	$(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
-	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	%edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jz	\exit
-	xorl	%eax, %eax		/* Do not leak kernel information */
-	movq	%rax, R11(%rsp)
-	movq	%rax, R10(%rsp)
-	movq	%rax, R9(%rsp)
-	movq	%rax, R8(%rsp)
-	jmp	int_ret_from_sys_call_irqs_off
-	.endm
 
-sysenter_auditsys:
-	auditsys_entry_common
-	movl	%ebp, %r9d		/* reload 6th syscall arg */
-	jmp	sysenter_dispatch
-
-sysexit_audit:
-	auditsys_exit sysexit_from_sys_call
-#endif
+	movq	%rsp, %rdi
+	call	do_fast_syscall_32
+	testl	%eax, %eax
+	jz	.Lsyscall_32_done
+	jmp	sysret32_from_system_call
 
 sysenter_fix_flags:
-	pushq	$(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
+	pushq	$X86_EFLAGS_FIXED
 	popfq
 	jmp	sysenter_flags_fixed
-
-sysenter_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jz	sysenter_auditsys
-#endif
-	SAVE_EXTRA_REGS
-	xorl	%eax, %eax		/* Do not leak kernel information */
-	movq	%rax, R11(%rsp)
-	movq	%rax, R10(%rsp)
-	movq	%rax, R9(%rsp)
-	movq	%rax, R8(%rsp)
-	movq	%rsp, %rdi		/* &pt_regs -> arg1 */
-	call	syscall_trace_enter
-
-	/* Reload arg registers from stack. (see sysenter_tracesys) */
-	movl	RCX(%rsp), %ecx
-	movl	RDX(%rsp), %edx
-	movl	RSI(%rsp), %esi
-	movl	RDI(%rsp), %edi
-	movl	%eax, %eax		/* zero extension */
-
-	RESTORE_EXTRA_REGS
-	jmp	sysenter_do_call
 ENDPROC(entry_SYSENTER_compat)
 
 /*
@@ -298,21 +156,14 @@ ENDPROC(entry_SYSENTER_compat)
  * edi  arg5
  * esp  user stack
  * 0(%esp) arg6
- *
- * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. We set up a complete hardware stack frame to share code
- * with the int 0x80 path.
  */
 ENTRY(entry_SYSCALL_compat)
-	/*
-	 * Interrupts are off on entry.
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
-	 */
+	/* Interrupts are off on entry. */
 	SWAPGS_UNSAFE_STACK
+
+	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-	ENABLE_INTERRUPTS(CLBR_NONE)
 
 	/* Zero-extending 32-bit regs, do not remove */
 	movl	%eax, %eax
@@ -327,162 +178,67 @@ ENTRY(entry_SYSCALL_compat)
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
-	pushq	%rbp			/* pt_regs->cx */
-	movl	%ebp, %ecx
+	pushq	%rcx			/* pt_regs->cx (will be overwritten) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	sub	$(10*8), %rsp		/* pt_regs->r8-11, bp, bx, r12-15 not saved */
+	xorq    %r8,%r8
+	pushq   %r8                     /* pt_regs->r8  = 0 */
+	pushq   %r8                     /* pt_regs->r9  = 0 */
+	pushq   %r8                     /* pt_regs->r10 = 0 */
+	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   %rbx                    /* pt_regs->rbx */
+	pushq   %rbp                    /* pt_regs->rbp */
+	pushq   %r8                     /* pt_regs->r12 = 0 */
+	pushq   %r8                     /* pt_regs->r13 = 0 */
+	pushq   %r8                     /* pt_regs->r14 = 0 */
+	pushq   %r8                     /* pt_regs->r15 = 0 */
 
 	/*
-	 * No need to do an access_ok check here because r8 has been
-	 * 32-bit zero extended:
+	 * User mode is traced as though IRQs are on, and SYSENTER
+	 * turned them off.
 	 */
-	ASM_STAC
-1:	movl	(%r8), %r9d
-	_ASM_EXTABLE(1b, ia32_badarg)
-	ASM_CLAC
-	orl	$TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
-	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	cstar_tracesys
-
-cstar_do_call:
-	/* 32-bit syscall -> 64-bit C ABI argument conversion */
-	movl	%edi, %r8d		/* arg5 */
-	/* r9 already loaded */		/* arg6 */
-	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
-	movl	%ebx, %edi		/* arg1 */
-	movl	%edx, %edx		/* arg3 (zero extension) */
-
-cstar_dispatch:
-	cmpq	$(IA32_NR_syscalls-1), %rax
-	ja	1f
-
-	call	*ia32_sys_call_table(, %rax, 8)
-	movq	%rax, RAX(%rsp)
-1:
-	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	sysretl_audit
 
-sysretl_from_sys_call:
-	andl	$~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
-	movl	RDX(%rsp), %edx
-	movl	RSI(%rsp), %esi
-	movl	RDI(%rsp), %edi
-	movl	RIP(%rsp), %ecx
-	movl	EFLAGS(%rsp), %r11d
-	movq    RAX(%rsp), %rax
-	xorq	%r10, %r10
-	xorq	%r9, %r9
-	xorq	%r8, %r8
-	TRACE_IRQS_ON
-	movl	RSP(%rsp), %esp
-	/*
-	 * 64-bit->32-bit SYSRET restores eip from ecx,
-	 * eflags from r11 (but RF and VM bits are forced to 0),
-	 * cs and ss are loaded from MSRs.
-	 * (Note: 32-bit->32-bit SYSRET is different: since r11
-	 * does not exist, it merely sets eflags.IF=1).
+	movq	%rsp, %rdi
+	call	do_fast_syscall_32
+	testl	%eax, %eax
+	jz	.Lsyscall_32_done
+
+	/* Opportunistic SYSRET */
+sysret32_from_system_call:
+	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
+	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
+	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
+	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
+	movq	RIP(%rsp), %rcx		/* pt_regs->ip (in rcx) */
+	addq	$RAX, %rsp		/* Skip r8-r15 */
+	popq	%rax			/* pt_regs->rax */
+	popq	%rdx			/* Skip pt_regs->cx */
+	popq	%rdx			/* pt_regs->dx */
+	popq	%rsi			/* pt_regs->si */
+	popq	%rdi			/* pt_regs->di */
+
+        /*
+         * USERGS_SYSRET32 does:
+         *  GSBASE = user's GS base
+         *  EIP = ECX
+         *  RFLAGS = R11
+         *  CS = __USER32_CS
+         *  SS = __USER_DS
+         *
+	 * ECX will not match pt_regs->cx, but we're returning to a vDSO
+	 * trampoline that will fix up RCX, so this is okay.
 	 *
-	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
-	 * descriptor is not reinitialized.  This means that we must
-	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
-	 * exit the kernel, and re-enter using an interrupt vector.  (All
-	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
-	 * from happening by reloading SS in __switch_to.
-	 */
-	USERGS_SYSRET32
-
-#ifdef CONFIG_AUDITSYSCALL
-cstar_auditsys:
-	movl	%r9d, R9(%rsp)		/* register to be clobbered by call */
-	auditsys_entry_common
-	movl	R9(%rsp), %r9d		/* reload 6th syscall arg */
-	jmp	cstar_dispatch
-
-sysretl_audit:
-	auditsys_exit sysretl_from_sys_call
-#endif
-
-cstar_tracesys:
-#ifdef CONFIG_AUDITSYSCALL
-	testl	$(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jz	cstar_auditsys
-#endif
-	xchgl	%r9d, %ebp
-	SAVE_EXTRA_REGS
-	xorl	%eax, %eax		/* Do not leak kernel information */
-	movq	%rax, R11(%rsp)
-	movq	%rax, R10(%rsp)
-	movq	%r9, R9(%rsp)
-	movq	%rax, R8(%rsp)
-	movq	%rsp, %rdi		/* &pt_regs -> arg1 */
-	call	syscall_trace_enter
-	movl	R9(%rsp), %r9d
-
-	/* Reload arg registers from stack. (see sysenter_tracesys) */
-	movl	RCX(%rsp), %ecx
-	movl	RDX(%rsp), %edx
-	movl	RSI(%rsp), %esi
-	movl	RDI(%rsp), %edi
-	movl	%eax, %eax		/* zero extension */
-
-	RESTORE_EXTRA_REGS
-	xchgl	%ebp, %r9d
-	jmp	cstar_do_call
+	 * R12-R15 are callee-saved, so they contain whatever was in them
+	 * when the system call started, which is already known to user
+	 * code.  We zero R8-R10 to avoid info leaks.
+         */
+	xorq	%r8, %r8
+	xorq	%r9, %r9
+	xorq	%r10, %r10
+	movq	RSP-ORIG_RAX(%rsp), %rsp
+        USERGS_SYSRET32
 END(entry_SYSCALL_compat)
 
-ia32_badarg:
-	/*
-	 * So far, we've entered kernel mode, set AC, turned on IRQs, and
-	 * saved C regs except r8-r11.  We haven't done any of the other
-	 * standard entry work, though.  We want to bail, but we shouldn't
-	 * treat this as a syscall entry since we don't even know what the
-	 * args are.  Instead, treat this as a non-syscall entry, finish
-	 * the entry work, and immediately exit after setting AX = -EFAULT.
-	 *
-	 * We're really just being polite here.  Killing the task outright
-	 * would be a reasonable action, too.  Given that the only valid
-	 * way to have gotten here is through the vDSO, and we already know
-	 * that the stack pointer is bad, the task isn't going to survive
-	 * for long no matter what we do.
-	 */
-
-	ASM_CLAC			/* undo STAC */
-	movq	$-EFAULT, RAX(%rsp)	/* return -EFAULT if possible */
-
-	/* Fill in the rest of pt_regs */
-	xorl	%eax, %eax
-	movq	%rax, R11(%rsp)
-	movq	%rax, R10(%rsp)
-	movq	%rax, R9(%rsp)
-	movq	%rax, R8(%rsp)
-	SAVE_EXTRA_REGS
-
-	/* Turn IRQs back off. */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-
-	/* Now finish entering normal kernel mode. */
-#ifdef CONFIG_CONTEXT_TRACKING
-	call enter_from_user_mode
-#endif
-
-	/* And exit again. */
-	jmp retint_user
-
-ia32_ret_from_sys_call_irqs_off:
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-
-ia32_ret_from_sys_call:
-	xorl	%eax, %eax		/* Do not leak kernel information */
-	movq	%rax, R11(%rsp)
-	movq	%rax, R10(%rsp)
-	movq	%rax, R9(%rsp)
-	movq	%rax, R8(%rsp)
-	jmp	int_ret_from_sys_call
-
 /*
  * Emulated IA32 system calls via int 0x80.
  *
@@ -507,14 +263,17 @@ ia32_ret_from_sys_call:
 ENTRY(entry_INT80_compat)
 	/*
 	 * Interrupts are off on entry.
-	 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
-	 * it is too small to ever cause noticeable irq latency.
 	 */
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	SWAPGS
-	ENABLE_INTERRUPTS(CLBR_NONE)
 
-	/* Zero-extending 32-bit regs, do not remove */
+	/*
+	 * User tracing code (ptrace or signal handlers) might assume that
+	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+	 * syscall.  Just in case the high bits are nonzero, zero-extend
+	 * the syscall number.  (This could almost certainly be deleted
+	 * with no ill effects.)
+	 */
 	movl	%eax, %eax
 
 	/* Construct struct pt_regs on stack (iret frame is already on stack) */
@@ -524,67 +283,37 @@ ENTRY(entry_INT80_compat)
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq	$0			/* pt_regs->r8 */
-	pushq	$0			/* pt_regs->r9 */
-	pushq	$0			/* pt_regs->r10 */
-	pushq	$0			/* pt_regs->r11 */
+	xorq    %r8,%r8
+	pushq   %r8                     /* pt_regs->r8  = 0 */
+	pushq   %r8                     /* pt_regs->r9  = 0 */
+	pushq   %r8                     /* pt_regs->r10 = 0 */
+	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   %rbx                    /* pt_regs->rbx */
+	pushq   %rbp                    /* pt_regs->rbp */
+	pushq   %r12                    /* pt_regs->r12 */
+	pushq   %r13                    /* pt_regs->r13 */
+	pushq   %r14                    /* pt_regs->r14 */
+	pushq   %r15                    /* pt_regs->r15 */
 	cld
-	sub	$(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
-
-	orl	$TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
-	testl	$_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
-	jnz	ia32_tracesys
-
-ia32_do_call:
-	/* 32-bit syscall -> 64-bit C ABI argument conversion */
-	movl	%edi, %r8d		/* arg5 */
-	movl	%ebp, %r9d		/* arg6 */
-	xchg	%ecx, %esi		/* rsi:arg2, rcx:arg4 */
-	movl	%ebx, %edi		/* arg1 */
-	movl	%edx, %edx		/* arg3 (zero extension) */
-	cmpq	$(IA32_NR_syscalls-1), %rax
-	ja	1f
 
-	call	*ia32_sys_call_table(, %rax, 8)
-	movq	%rax, RAX(%rsp)
-1:
-	jmp	int_ret_from_sys_call
-
-ia32_tracesys:
-	SAVE_EXTRA_REGS
-	movq	%rsp, %rdi			/* &pt_regs -> arg1 */
-	call	syscall_trace_enter
 	/*
-	 * Reload arg registers from stack in case ptrace changed them.
-	 * Don't reload %eax because syscall_trace_enter() returned
-	 * the %rax value we should see.  But do truncate it to 32 bits.
-	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
-	 * an appropriately invalid value.
+	 * User mode is traced as though IRQs are on, and the interrupt
+	 * gate turned them off.
 	 */
-	movl	RCX(%rsp), %ecx
-	movl	RDX(%rsp), %edx
-	movl	RSI(%rsp), %esi
-	movl	RDI(%rsp), %edi
-	movl	%eax, %eax		/* zero extension */
-	RESTORE_EXTRA_REGS
-	jmp	ia32_do_call
-END(entry_INT80_compat)
+	TRACE_IRQS_OFF
 
-	.macro PTREGSCALL label, func
-	ALIGN
-GLOBAL(\label)
-	leaq	\func(%rip), %rax
-	jmp	ia32_ptregs_common
-	.endm
+	movq	%rsp, %rdi
+	call	do_syscall_32_irqs_off
+.Lsyscall_32_done:
 
-	PTREGSCALL stub32_rt_sigreturn,	sys32_rt_sigreturn
-	PTREGSCALL stub32_sigreturn,	sys32_sigreturn
-	PTREGSCALL stub32_fork,		sys_fork
-	PTREGSCALL stub32_vfork,	sys_vfork
+	/* Go back to user mode. */
+	TRACE_IRQS_ON
+	SWAPGS
+	jmp	restore_regs_and_iret
+END(entry_INT80_compat)
 
 	ALIGN
 GLOBAL(stub32_clone)
-	leaq	sys_clone(%rip), %rax
 	/*
 	 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
 	 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
@@ -593,12 +322,4 @@ GLOBAL(stub32_clone)
 	 * so we need to swap arguments here before calling it:
 	 */
 	xchg	%r8, %rcx
-	jmp	ia32_ptregs_common
-
-	ALIGN
-ia32_ptregs_common:
-	SAVE_EXTRA_REGS 8
-	call	*%rax
-	RESTORE_EXTRA_REGS 8
-	ret
-END(ia32_ptregs_common)
+	jmp	sys_clone
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8ea34f94e973..9a6649857106 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -4,24 +4,21 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
+#include <asm/syscall.h>
 
 #ifdef CONFIG_IA32_EMULATION
 #define SYM(sym, compat) compat
 #else
 #define SYM(sym, compat) sym
-#define ia32_sys_call_table sys_call_table
-#define __NR_syscall_compat_max __NR_syscall_max
 #endif
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
 #define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
 
-typedef asmlinkage void (*sys_call_ptr_t)(void);
-
-extern asmlinkage void sys_ni_syscall(void);
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 4ac730b37f0b..41283d22be7a 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -14,13 +14,13 @@
 # define __SYSCALL_X32(nr, sym, compat) /* nothing */
 #endif
 
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
+#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
 #define __SYSCALL_64(nr, sym, compat) [nr] = sym,
 
-extern void sys_ni_syscall(void);
+extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7663c455b9f6..f17705e1332c 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,7 +8,7 @@
 #
 0	i386	restart_syscall		sys_restart_syscall
 1	i386	exit			sys_exit
-2	i386	fork			sys_fork			stub32_fork
+2	i386	fork			sys_fork			sys_fork
 3	i386	read			sys_read
 4	i386	write			sys_write
 5	i386	open			sys_open			compat_sys_open
@@ -17,7 +17,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			sys_execve			stub32_execve
+11	i386	execve			sys_execve			compat_sys_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time			compat_sys_time
 14	i386	mknod			sys_mknod
@@ -125,7 +125,7 @@
 116	i386	sysinfo			sys_sysinfo			compat_sys_sysinfo
 117	i386	ipc			sys_ipc				compat_sys_ipc
 118	i386	fsync			sys_fsync
-119	i386	sigreturn		sys_sigreturn			stub32_sigreturn
+119	i386	sigreturn		sys_sigreturn			sys32_sigreturn
 120	i386	clone			sys_clone			stub32_clone
 121	i386	setdomainname		sys_setdomainname
 122	i386	uname			sys_newuname
@@ -179,7 +179,7 @@
 170	i386	setresgid		sys_setresgid16
 171	i386	getresgid		sys_getresgid16
 172	i386	prctl			sys_prctl
-173	i386	rt_sigreturn		sys_rt_sigreturn		stub32_rt_sigreturn
+173	i386	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
 174	i386	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 175	i386	rt_sigprocmask		sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
@@ -196,7 +196,7 @@
 187	i386	sendfile		sys_sendfile			compat_sys_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
-190	i386	vfork			sys_vfork			stub32_vfork
+190	i386	vfork			sys_vfork			sys_vfork
 191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit
 192	i386	mmap2			sys_mmap_pgoff
 193	i386	truncate64		sys_truncate64			sys32_truncate64
@@ -364,7 +364,7 @@
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
 357	i386	bpf			sys_bpf
-358	i386	execveat		sys_execveat			stub32_execveat
+358	i386	execveat		sys_execveat			compat_sys_execveat
 359	i386	socket			sys_socket
 360	i386	socketpair		sys_socketpair
 361	i386	bind			sys_bind
@@ -382,3 +382,4 @@
 373	i386	shutdown		sys_shutdown
 374	i386	userfaultfd		sys_userfaultfd
 375	i386	membarrier		sys_membarrier
+376	i386	mlock2			sys_mlock2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 278842fdf1f6..314a90bfc09c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -331,6 +331,7 @@
 322	64	execveat		stub_execveat
 323	common	userfaultfd		sys_userfaultfd
 324	common	membarrier		sys_membarrier
+325	common	mlock2			sys_mlock2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index a3d0767a6b29..265c0ed68118 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -19,9 +19,7 @@ obj-y				+= vma.o
 # vDSO images to build
 vdso_img-$(VDSO64-y)		+= 64
 vdso_img-$(VDSOX32-y)		+= x32
-vdso_img-$(VDSO32-y)		+= 32-int80
-vdso_img-$(CONFIG_IA32_EMULATION)	+= 32-syscall
-vdso_img-$(VDSO32-y)		+= 32-sysenter
+vdso_img-$(VDSO32-y)		+= 32
 
 obj-$(VDSO32-y)			+= vdso32-setup.o
 
@@ -69,7 +67,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
        $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
        -fno-omit-frame-pointer -foptimize-sibling-calls \
-       -DDISABLE_BRANCH_PROFILING
+       -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
 $(vobjs): KBUILD_CFLAGS += $(CFL)
 
@@ -122,15 +120,6 @@ $(obj)/%.so: $(obj)/%.so.dbg
 $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
 	$(call if_changed,vdso)
 
-#
-# Build multiple 32-bit vDSO images to choose from at boot time.
-#
-vdso32.so-$(VDSO32-y)		+= int80
-vdso32.so-$(CONFIG_IA32_EMULATION)	+= syscall
-vdso32.so-$(VDSO32-y)		+= sysenter
-
-vdso32-images			= $(vdso32.so-y:%=vdso32-%.so)
-
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 
@@ -139,14 +128,12 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
+targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
 targets += vdso32/vclock_gettime.o
 
-$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
-
-KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
+$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
 
 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
@@ -157,13 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
-$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
 
-$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
-				 $(obj)/vdso32/vdso32.lds \
-				 $(obj)/vdso32/vclock_gettime.o \
-				 $(obj)/vdso32/note.o \
-				 $(obj)/vdso32/%.o
+$(obj)/vdso32.so.dbg: FORCE \
+		      $(obj)/vdso32/vdso32.lds \
+		      $(obj)/vdso32/vclock_gettime.o \
+		      $(obj)/vdso32/note.o \
+		      $(obj)/vdso32/system_call.o
 	$(call if_changed,vdso)
 
 #
@@ -206,4 +193,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
 PHONY += vdso_install $(vdso_img_insttargets)
 vdso_install: $(vdso_img_insttargets) FORCE
 
-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
+clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 8627db24a7f6..785d9922b106 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -98,10 +98,10 @@ struct vdso_sym required_syms[] = {
 		"VDSO_FAKE_SECTION_TABLE_END", false
 	},
 	{"VDSO32_NOTE_MASK", true},
-	{"VDSO32_SYSENTER_RETURN", true},
 	{"__kernel_vsyscall", true},
 	{"__kernel_sigreturn", true},
 	{"__kernel_rt_sigreturn", true},
+	{"int80_landing_pad", true},
 };
 
 __attribute__((format(printf, 1, 2))) __attribute__((noreturn))
diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index e904c270573b..08a317a9ae4b 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -48,35 +48,9 @@ __setup("vdso32=", vdso32_setup);
 __setup_param("vdso=", vdso_setup, vdso32_setup, 0);
 #endif
 
-#ifdef CONFIG_X86_64
-
-#define	vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SYSENTER32))
-#define	vdso32_syscall()	(boot_cpu_has(X86_FEATURE_SYSCALL32))
-
-#else  /* CONFIG_X86_32 */
-
-#define vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SEP))
-#define vdso32_syscall()	(0)
-
-#endif	/* CONFIG_X86_64 */
-
-#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
-const struct vdso_image *selected_vdso32;
-#endif
-
 int __init sysenter_setup(void)
 {
-#ifdef CONFIG_COMPAT
-	if (vdso32_syscall())
-		selected_vdso32 = &vdso_image_32_syscall;
-	else
-#endif
-	if (vdso32_sysenter())
-		selected_vdso32 = &vdso_image_32_sysenter;
-	else
-		selected_vdso32 = &vdso_image_32_int80;
-
-	init_vdso_image(selected_vdso32);
+	init_vdso_image(&vdso_image_32);
 
 	return 0;
 }
diff --git a/arch/x86/entry/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S
deleted file mode 100644
index b15b7c01aedb..000000000000
--- a/arch/x86/entry/vdso/vdso32/int80.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the old int $0x80 method.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-	ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	int $0x80
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-	.long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIEDLSI:
-	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0
-	.align 4
-.LENDFDEDLSI:
-	.previous
-
-	/*
-	 * Pad out the segment to match the size of the sysenter.S version.
-	 */
-VDSO32_vsyscall_eh_frame_size = 0x40
-	.section .data,"aw",@progbits
-	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
-	.previous
diff --git a/arch/x86/entry/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S
deleted file mode 100644
index 6b286bb5251c..000000000000
--- a/arch/x86/entry/vdso/vdso32/syscall.S
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the syscall instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#define SYSCALL_ENTER_KERNEL	syscall
-#include "sigreturn.S"
-
-#include <asm/segment.h>
-
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-	ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push	%ebp
-.Lpush_ebp:
-	movl	%ecx, %ebp
-	syscall
-	movl	%ebp, %ecx
-	popl	%ebp
-.Lpop_ebp:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAME:
-	.long .LENDCIE-.LSTARTCIE
-.LSTARTCIE:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIE:
-
-	.long .LENDFDE1-.LSTARTFDE1	/* Length FDE */
-.LSTARTFDE1:
-	.long .LSTARTFDE1-.LSTARTFRAME	/* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0			/* Augmentation length */
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 8
-	.byte 0x85, 0x02	/* DW_CFA_offset %ebp -8 */
-	.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.uleb128 4
-	.align 4
-.LENDFDE1:
-	.previous
-
-	/*
-	 * Pad out the segment to match the size of the sysenter.S version.
-	 */
-VDSO32_vsyscall_eh_frame_size = 0x40
-	.section .data,"aw",@progbits
-	.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
-	.previous
diff --git a/arch/x86/entry/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S
deleted file mode 100644
index e354bceee0e0..000000000000
--- a/arch/x86/entry/vdso/vdso32/sysenter.S
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Code for the vDSO.  This version uses the sysenter instruction.
- *
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
-/*
- * The caller puts arg2 in %ecx, which gets pushed. The kernel will use
- * %ecx itself for arg2. The pushing is because the sysexit instruction
- * (found in entry.S) requires that we clobber %ecx with the desired %esp.
- * User code might expect that %ecx is unclobbered though, as it would be
- * for returning via the iret instruction, so we must push and pop.
- *
- * The caller puts arg3 in %edx, which the sysexit instruction requires
- * for %eip. Thus, exactly as for arg2, we must push and pop.
- *
- * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
- * instruction clobbers %esp, the user's %esp won't even survive entry
- * into the kernel. We store %esp in %ebp. Code in entry.S must fetch
- * arg6 from the stack.
- *
- * You can not use this vsyscall for the clone() syscall because the
- * three words on the parent stack do not get copied to the child.
- */
-	.text
-	.globl __kernel_vsyscall
-	.type __kernel_vsyscall,@function
-	ALIGN
-__kernel_vsyscall:
-.LSTART_vsyscall:
-	push %ecx
-.Lpush_ecx:
-	push %edx
-.Lpush_edx:
-	push %ebp
-.Lenter_kernel:
-	movl %esp,%ebp
-	sysenter
-
-	/* 7: align return point with nop's to make disassembly easier */
-	.space 7,0x90
-
-	/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-	int $0x80
-	/* 16: System call normal return point is here! */
-VDSO32_SYSENTER_RETURN:	/* Symbol used by sysenter.c via vdso32-syms.h */
-	pop %ebp
-.Lpop_ebp:
-	pop %edx
-.Lpop_edx:
-	pop %ecx
-.Lpop_ecx:
-	ret
-.LEND_vsyscall:
-	.size __kernel_vsyscall,.-.LSTART_vsyscall
-	.previous
-
-	.section .eh_frame,"a",@progbits
-.LSTARTFRAMEDLSI:
-	.long .LENDCIEDLSI-.LSTARTCIEDLSI
-.LSTARTCIEDLSI:
-	.long 0			/* CIE ID */
-	.byte 1			/* Version number */
-	.string "zR"		/* NUL-terminated augmentation string */
-	.uleb128 1		/* Code alignment factor */
-	.sleb128 -4		/* Data alignment factor */
-	.byte 8			/* Return address register column */
-	.uleb128 1		/* Augmentation value length */
-	.byte 0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
-	.byte 0x0c		/* DW_CFA_def_cfa */
-	.uleb128 4
-	.uleb128 4
-	.byte 0x88		/* DW_CFA_offset, column 0x8 */
-	.uleb128 1
-	.align 4
-.LENDCIEDLSI:
-	.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
-.LSTARTFDEDLSI:
-	.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
-	.long .LSTART_vsyscall-.	/* PC-relative start address */
-	.long .LEND_vsyscall-.LSTART_vsyscall
-	.uleb128 0
-	/* What follows are the instructions for the table generation.
-	   We have to record all changes of the stack pointer.  */
-	.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x10		/* RA at offset 16 now */
-	.byte 0x85, 0x04	/* DW_CFA_offset %ebp -16 */
-	/* Finally the epilogue.  */
-	.byte 0x40 + (.Lpop_ebp-.Lenter_kernel)	/* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x0c		/* RA at offset 12 now */
-	.byte 0xc5		/* DW_CFA_restore %ebp */
-	.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x08		/* RA at offset 8 now */
-	.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
-	.byte 0x0e		/* DW_CFA_def_cfa_offset */
-	.byte 0x04		/* RA at offset 4 now */
-	.align 4
-.LENDFDEDLSI:
-	.previous
-
-	/*
-	 * Emit a symbol with the size of this .eh_frame data,
-	 * to verify it matches the other versions.
-	 */
-VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
new file mode 100644
index 000000000000..93bd8452383f
--- /dev/null
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -0,0 +1,57 @@
+/*
+ * Code for the vDSO.  This version uses the old int $0x80 method.
+*/
+
+#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
+
+/*
+ * First get the common code for the sigreturn entry points.
+ * This must come first.
+ */
+#include "sigreturn.S"
+
+	.text
+	.globl __kernel_vsyscall
+	.type __kernel_vsyscall,@function
+	ALIGN
+__kernel_vsyscall:
+	CFI_STARTPROC
+	/*
+	 * Reshuffle regs so that all of any of the entry instructions
+	 * will preserve enough state.
+	 */
+	pushl	%edx
+	CFI_ADJUST_CFA_OFFSET	4
+	CFI_REL_OFFSET		edx, 0
+	pushl	%ecx
+	CFI_ADJUST_CFA_OFFSET	4
+	CFI_REL_OFFSET		ecx, 0
+	movl	%esp, %ecx
+
+#ifdef CONFIG_X86_64
+	/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
+	ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
+	                  "syscall",  X86_FEATURE_SYSCALL32
+#else
+	ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
+#endif
+
+	/* Enter using int $0x80 */
+	movl	(%esp), %ecx
+	int	$0x80
+GLOBAL(int80_landing_pad)
+
+	/* Restore ECX and EDX in case they were clobbered. */
+	popl	%ecx
+	CFI_RESTORE		ecx
+	CFI_ADJUST_CFA_OFFSET	-4
+	popl	%edx
+	CFI_RESTORE		edx
+	CFI_ADJUST_CFA_OFFSET	-4
+	ret
+	CFI_ENDPROC
+
+	.size __kernel_vsyscall,.-__kernel_vsyscall
+	.previous
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 175cc72c0f68..87a86e017f0e 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -14,11 +14,13 @@
  */
 #undef CONFIG_64BIT
 #undef CONFIG_X86_64
+#undef CONFIG_PGTABLE_LEVELS
 #undef CONFIG_ILLEGAL_POINTER_VALUE
 #undef CONFIG_SPARSEMEM_VMEMMAP
 #undef CONFIG_NR_CPUS
 
 #define CONFIG_X86_32 1
+#define CONFIG_PGTABLE_LEVELS 2
 #define CONFIG_PAGE_OFFSET 0
 #define CONFIG_ILLEGAL_POINTER_VALUE 0
 #define CONFIG_NR_CPUS 1
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 434543145d78..64df47148160 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -180,21 +180,10 @@ up_fail:
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static int load_vdso32(void)
 {
-	int ret;
-
 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
 		return 0;
 
-	ret = map_vdso(selected_vdso32, false);
-	if (ret)
-		return ret;
-
-	if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
-		current_thread_info()->sysenter_return =
-			current->mm->context.vdso +
-			selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
-
-	return 0;
+	return map_vdso(&vdso_image_32, false);
 }
 #endif
 
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index b160c0c6baed..174c2549939d 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -38,7 +38,14 @@
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
+#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
+	NATIVE;
+#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+	NONE;
+#else
+	EMULATE;
+#endif
 
 static int __init vsyscall_setup(char *str)
 {
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a0a19b7ba22d..0552884da18d 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -26,7 +26,7 @@
 #include <asm/ptrace.h>
 #include <asm/ia32_unistd.h>
 #include <asm/user32.h>
-#include <asm/sigcontext32.h>
+#include <uapi/asm/sigcontext.h>
 #include <asm/proto.h>
 #include <asm/vdso.h>
 #include <asm/sigframe.h>
@@ -68,7 +68,7 @@
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
-				   struct sigcontext_ia32 __user *sc)
+				   struct sigcontext_32 __user *sc)
 {
 	unsigned int tmpflags, err = 0;
 	void __user *buf;
@@ -170,7 +170,7 @@ badframe:
  * Set up a signal frame.
  */
 
-static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
+static int ia32_setup_sigcontext(struct sigcontext_32 __user *sc,
 				 void __user *fpstate,
 				 struct pt_regs *regs, unsigned int mask)
 {
@@ -234,7 +234,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
 		unsigned long fx_aligned, math_size;
 
 		sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
-		*fpstate = (struct _fpstate_ia32 __user *) sp;
+		*fpstate = (struct _fpstate_32 __user *) sp;
 		if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
 				    math_size) < 0)
 			return (void __user *) -1L;
@@ -289,7 +289,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
 		/* Return stub is in 32bit vsyscall page */
 		if (current->mm->context.vdso)
 			restorer = current->mm->context.vdso +
-				selected_vdso32->sym___kernel_sigreturn;
+				vdso_image_32.sym___kernel_sigreturn;
 		else
 			restorer = &frame->retcode;
 	}
@@ -368,7 +368,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
 			restorer = ksig->ka.sa.sa_restorer;
 		else
 			restorer = current->mm->context.vdso +
-				selected_vdso32->sym___kernel_rt_sigreturn;
+				vdso_image_32.sym___kernel_rt_sigreturn;
 		put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
 
 		/*
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 3a45668f6dc3..94c18ebfd68c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -32,6 +32,10 @@
 #include <asm/mpspec.h>
 #include <asm/realmode.h>
 
+#ifdef CONFIG_ACPI_APEI
+# include <asm/pgtable_types.h>
+#endif
+
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
 extern int acpi_ioapic;
@@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void);
 
 #define acpi_unlazy_tlb(x)	leave_mm(x)
 
+#ifdef CONFIG_ACPI_APEI
+static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
+{
+	/*
+	 * We currently have no way to look up the EFI memory map
+	 * attributes for a region in a consistent way, because the
+	 * memmap is discarded after efi_free_boot_services(). So if
+	 * you call efi_mem_attributes() during boot and at runtime,
+	 * you could theoretically see different attributes.
+	 *
+	 * Since we are yet to see any x86 platforms that require
+	 * anything other than PAGE_KERNEL (some arm64 platforms
+	 * require the equivalent of PAGE_KERNEL_NOCACHE), return that
+	 * until we know differently.
+	 */
+	 return PAGE_KERNEL;
+}
+#endif
+
 #endif /* _ASM_X86_ACPI_H */
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 1a5da2e63aee..3c56ef1ae068 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
 	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
 
-static inline u16 amd_get_node_id(struct pci_dev *pdev)
+static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
 {
 	struct pci_dev *misc;
 	int i;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index ebf6d5e5668c..a30316bf801a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void)
 	return msr & X2APIC_ENABLE;
 }
 
+extern void enable_IR_x2apic(void);
+
+extern int get_physical_broadcast(void);
+
+extern int lapic_get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void disconnect_bsp_APIC(int virt_wire_setup);
+extern void disable_local_APIC(void);
+extern void lapic_shutdown(void);
+extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
+extern void setup_local_APIC(void);
+extern void init_apic_mappings(void);
+void register_lapic_address(unsigned long address);
+extern void setup_boot_APIC_clock(void);
+extern void setup_secondary_APIC_clock(void);
+extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+	return -1;
+}
+#else
+extern int apic_force_enable(unsigned long addr);
+#endif
+
+extern int apic_bsp_setup(bool upmode);
+extern void apic_ap_setup(void);
+
+/*
+ * On 32bit this is mach-xxx local
+ */
+#ifdef CONFIG_X86_64
+extern int apic_is_clustered_box(void);
+#else
+static inline int apic_is_clustered_box(void)
+{
+	return 0;
+}
+#endif
+
+extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
+
+#else /* !CONFIG_X86_LOCAL_APIC */
+static inline void lapic_shutdown(void) { }
+#define local_apic_timer_c2_ok		1
+static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
+#endif /* !CONFIG_X86_LOCAL_APIC */
+
 #ifdef CONFIG_X86_X2APIC
 /*
  * Make previous memory operations globally visible before
@@ -186,67 +239,14 @@ static inline int x2apic_enabled(void)
 }
 
 #define x2apic_supported()	(cpu_has_x2apic)
-#else
+#else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }
 static inline int x2apic_enabled(void) { return 0; }
 
 #define x2apic_mode		(0)
 #define	x2apic_supported()	(0)
-#endif
-
-extern void enable_IR_x2apic(void);
-
-extern int get_physical_broadcast(void);
-
-extern int lapic_get_maxlvt(void);
-extern void clear_local_APIC(void);
-extern void disconnect_bsp_APIC(int virt_wire_setup);
-extern void disable_local_APIC(void);
-extern void lapic_shutdown(void);
-extern void sync_Arb_IDs(void);
-extern void init_bsp_APIC(void);
-extern void setup_local_APIC(void);
-extern void init_apic_mappings(void);
-void register_lapic_address(unsigned long address);
-extern void setup_boot_APIC_clock(void);
-extern void setup_secondary_APIC_clock(void);
-extern int APIC_init_uniprocessor(void);
-
-#ifdef CONFIG_X86_64
-static inline int apic_force_enable(unsigned long addr)
-{
-	return -1;
-}
-#else
-extern int apic_force_enable(unsigned long addr);
-#endif
-
-extern int apic_bsp_setup(bool upmode);
-extern void apic_ap_setup(void);
-
-/*
- * On 32bit this is mach-xxx local
- */
-#ifdef CONFIG_X86_64
-extern int apic_is_clustered_box(void);
-#else
-static inline int apic_is_clustered_box(void)
-{
-	return 0;
-}
-#endif
-
-extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
-
-#else /* !CONFIG_X86_LOCAL_APIC */
-static inline void lapic_shutdown(void) { }
-#define local_apic_timer_c2_ok		1
-static inline void init_apic_mappings(void) { }
-static inline void disable_local_APIC(void) { }
-# define setup_boot_APIC_clock x86_init_noop
-# define setup_secondary_APIC_clock x86_init_noop
-#endif /* !CONFIG_X86_LOCAL_APIC */
+#endif /* !CONFIG_X86_X2APIC */
 
 #ifdef CONFIG_X86_64
 #define	SET_APIC_ID(x)		(apic->set_apic_id(x))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index fb52aa644aab..ae5fb83e6d91 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -24,7 +24,7 @@
  */
 static __always_inline int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
  */
 static __always_inline void atomic_set(atomic_t *v, int i)
 {
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 }
 
 /**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 50e33eff58de..037351022f54 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -18,7 +18,7 @@
  */
 static inline long atomic64_read(const atomic64_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
  */
 static inline void atomic64_set(atomic64_t *v, long i)
 {
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 }
 
 /**
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e6cf2ad350d1..e4f8010f22e0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
 #include <asm/disabled-features.h>
 #endif
 
-#define NCAPINTS	13	/* N 32-bit words worth of info */
+#define NCAPINTS	14	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -193,7 +193,7 @@
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_HWP		( 7*32+ 10) /* "hwp" Intel HWP */
-#define X86_FEATURE_HWP_NOITFY	( 7*32+ 11) /* Intel HWP_NOTIFY */
+#define X86_FEATURE_HWP_NOTIFY	( 7*32+ 11) /* Intel HWP_NOTIFY */
 #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
 #define X86_FEATURE_HWP_EPP	( 7*32+13) /* Intel HWP_EPP */
 #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
@@ -255,6 +255,9 @@
 /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
 #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
 
+/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
+#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
new file mode 100644
index 000000000000..b7a1ab865d68
--- /dev/null
+++ b/arch/x86/include/asm/dwarf2.h
@@ -0,0 +1,84 @@
+#ifndef _ASM_X86_DWARF2_H
+#define _ASM_X86_DWARF2_H
+
+#ifndef __ASSEMBLY__
+#warning "asm/dwarf2.h should be only included in pure assembly files"
+#endif
+
+/*
+ * Macros for dwarf2 CFI unwind table entries.
+ * See "as.info" for details on these pseudo ops. Unfortunately
+ * they are only supported in very new binutils, so define them
+ * away for older version.
+ */
+
+#ifdef CONFIG_AS_CFI
+
+#define CFI_STARTPROC		.cfi_startproc
+#define CFI_ENDPROC		.cfi_endproc
+#define CFI_DEF_CFA		.cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER	.cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET	.cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET	.cfi_adjust_cfa_offset
+#define CFI_OFFSET		.cfi_offset
+#define CFI_REL_OFFSET		.cfi_rel_offset
+#define CFI_REGISTER		.cfi_register
+#define CFI_RESTORE		.cfi_restore
+#define CFI_REMEMBER_STATE	.cfi_remember_state
+#define CFI_RESTORE_STATE	.cfi_restore_state
+#define CFI_UNDEFINED		.cfi_undefined
+#define CFI_ESCAPE		.cfi_escape
+
+#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
+#define CFI_SIGNAL_FRAME	.cfi_signal_frame
+#else
+#define CFI_SIGNAL_FRAME
+#endif
+
+#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__)
+#ifndef BUILD_VDSO
+	/*
+	 * Emit CFI data in .debug_frame sections, not .eh_frame sections.
+	 * The latter we currently just discard since we don't do DWARF
+	 * unwinding at runtime.  So only the offline DWARF information is
+	 * useful to anyone.  Note we should not use this directive if
+	 * vmlinux.lds.S gets changed so it doesn't discard .eh_frame.
+	 */
+	.cfi_sections .debug_frame
+#else
+	 /*
+	  * For the vDSO, emit both runtime unwind information and debug
+	  * symbols for the .dbg file.
+	  */
+	.cfi_sections .eh_frame, .debug_frame
+#endif
+#endif
+
+#else
+
+/*
+ * Due to the structure of pre-exisiting code, don't use assembler line
+ * comment character # to ignore the arguments. Instead, use a dummy macro.
+ */
+.macro cfi_ignore a=0, b=0, c=0, d=0
+.endm
+
+#define CFI_STARTPROC		cfi_ignore
+#define CFI_ENDPROC		cfi_ignore
+#define CFI_DEF_CFA		cfi_ignore
+#define CFI_DEF_CFA_REGISTER	cfi_ignore
+#define CFI_DEF_CFA_OFFSET	cfi_ignore
+#define CFI_ADJUST_CFA_OFFSET	cfi_ignore
+#define CFI_OFFSET		cfi_ignore
+#define CFI_REL_OFFSET		cfi_ignore
+#define CFI_REGISTER		cfi_ignore
+#define CFI_RESTORE		cfi_ignore
+#define CFI_REMEMBER_STATE	cfi_ignore
+#define CFI_RESTORE_STATE	cfi_ignore
+#define CFI_UNDEFINED		cfi_ignore
+#define CFI_ESCAPE		cfi_ignore
+#define CFI_SIGNAL_FRAME	cfi_ignore
+
+#endif
+
+#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ab5f1d447ef9..0010c78c4998 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...);
 extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 					u32 type, u64 attribute);
 
+#ifdef CONFIG_KASAN
 /*
  * CONFIG_KASAN may redefine memset to __memset.  __memset function is present
  * only in kernel binary.  Since the EFI stub linked into a separate binary it
@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 #undef memcpy
 #undef memset
 #undef memmove
+#endif
 
 #endif /* CONFIG_X86_32 */
 
@@ -103,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
 extern pgd_t * __init efi_call_phys_prolog(void);
 extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
+extern void __init efi_print_memmap(void);
 extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 141c561f4664..1514753fd435 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,11 +171,11 @@ do {						\
 static inline void elf_common_init(struct thread_struct *t,
 				   struct pt_regs *regs, const u16 ds)
 {
-	/* Commented-out registers are cleared in stub_execve */
-	/*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
-	regs->si = regs->di /*= regs->bp*/ = 0;
+	/* ax gets execve's return value. */
+	/*regs->ax = */ regs->bx = regs->cx = regs->dx = 0;
+	regs->si = regs->di = regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
-	/*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
+	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
 	t->fs = t->gs = 0;
 	t->fsindex = t->gsindex = 0;
 	t->ds = t->es = ds;
@@ -328,7 +328,7 @@ else									\
 
 #define VDSO_ENTRY							\
 	((unsigned long)current->mm->context.vdso +			\
-	 selected_vdso32->sym___kernel_vsyscall)
+	 vdso_image_32.sym___kernel_vsyscall)
 
 struct linux_binprm;
 
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 7358e9d61f1e..0e970d00dfcd 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -5,7 +5,7 @@
 #define _ASM_X86_FPU_SIGNAL_H
 
 #ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
+# include <uapi/asm/sigcontext.h>
 # include <asm/user32.h>
 struct ksignal;
 int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c49c5173158e..1c6f6ac52ad0 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -95,63 +95,122 @@ struct swregs_state {
 /*
  * List of XSAVE features Linux knows about:
  */
-enum xfeature_bit {
-	XSTATE_BIT_FP,
-	XSTATE_BIT_SSE,
-	XSTATE_BIT_YMM,
-	XSTATE_BIT_BNDREGS,
-	XSTATE_BIT_BNDCSR,
-	XSTATE_BIT_OPMASK,
-	XSTATE_BIT_ZMM_Hi256,
-	XSTATE_BIT_Hi16_ZMM,
-
-	XFEATURES_NR_MAX,
+enum xfeature {
+	XFEATURE_FP,
+	XFEATURE_SSE,
+	/*
+	 * Values above here are "legacy states".
+	 * Those below are "extended states".
+	 */
+	XFEATURE_YMM,
+	XFEATURE_BNDREGS,
+	XFEATURE_BNDCSR,
+	XFEATURE_OPMASK,
+	XFEATURE_ZMM_Hi256,
+	XFEATURE_Hi16_ZMM,
+
+	XFEATURE_MAX,
 };
 
-#define XSTATE_FP		(1 << XSTATE_BIT_FP)
-#define XSTATE_SSE		(1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM		(1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS		(1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR		(1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK		(1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM		(1 << XSTATE_BIT_Hi16_ZMM)
+#define XFEATURE_MASK_FP		(1 << XFEATURE_FP)
+#define XFEATURE_MASK_SSE		(1 << XFEATURE_SSE)
+#define XFEATURE_MASK_YMM		(1 << XFEATURE_YMM)
+#define XFEATURE_MASK_BNDREGS		(1 << XFEATURE_BNDREGS)
+#define XFEATURE_MASK_BNDCSR		(1 << XFEATURE_BNDCSR)
+#define XFEATURE_MASK_OPMASK		(1 << XFEATURE_OPMASK)
+#define XFEATURE_MASK_ZMM_Hi256		(1 << XFEATURE_ZMM_Hi256)
+#define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
+
+#define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
+#define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK \
+					 | XFEATURE_MASK_ZMM_Hi256 \
+					 | XFEATURE_MASK_Hi16_ZMM)
+
+#define FIRST_EXTENDED_XFEATURE	XFEATURE_YMM
 
-#define XSTATE_FPSSE		(XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512		(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+struct reg_128_bit {
+	u8      regbytes[128/8];
+};
+struct reg_256_bit {
+	u8	regbytes[256/8];
+};
+struct reg_512_bit {
+	u8	regbytes[512/8];
+};
 
 /*
+ * State component 2:
+ *
  * There are 16x 256-bit AVX registers named YMM0-YMM15.
  * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
- * and are stored in 'struct fxregs_state::xmm_space[]'.
+ * and are stored in 'struct fxregs_state::xmm_space[]' in the
+ * "legacy" area.
  *
- * The high 128 bits are stored here:
- *    16x 128 bits == 256 bytes.
+ * The high 128 bits are stored here.
  */
 struct ymmh_struct {
-	u8				ymmh_space[256];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
-	u8				reserved[128];
-};
+	struct reg_128_bit              hi_ymm[16];
+} __packed;
 
 /* Intel MPX support: */
-struct bndreg {
+
+struct mpx_bndreg {
 	u64				lower_bound;
 	u64				upper_bound;
 } __packed;
+/*
+ * State component 3 is used for the 4 128-bit bounds registers
+ */
+struct mpx_bndreg_state {
+	struct mpx_bndreg		bndreg[4];
+} __packed;
 
-struct bndcsr {
+/*
+ * State component 4 is used for the 64-bit user-mode MPX
+ * configuration register BNDCFGU and the 64-bit MPX status
+ * register BNDSTATUS.  We call the pair "BNDCSR".
+ */
+struct mpx_bndcsr {
 	u64				bndcfgu;
 	u64				bndstatus;
 } __packed;
 
-struct mpx_struct {
-	struct bndreg			bndreg[4];
-	struct bndcsr			bndcsr;
-};
+/*
+ * The BNDCSR state is padded out to be 64-bytes in size.
+ */
+struct mpx_bndcsr_state {
+	union {
+		struct mpx_bndcsr		bndcsr;
+		u8				pad_to_64_bytes[64];
+	};
+} __packed;
+
+/* AVX-512 Components: */
+
+/*
+ * State component 5 is used for the 8 64-bit opmask registers
+ * k0-k7 (opmask state).
+ */
+struct avx_512_opmask_state {
+	u64				opmask_reg[8];
+} __packed;
+
+/*
+ * State component 6 is used for the upper 256 bits of the
+ * registers ZMM0-ZMM15. These 16 256-bit values are denoted
+ * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
+ */
+struct avx_512_zmm_uppers_state {
+	struct reg_256_bit		zmm_upper[16];
+} __packed;
+
+/*
+ * State component 7 is used for the 16 512-bit registers
+ * ZMM16-ZMM31 (Hi16_ZMM state).
+ */
+struct avx_512_hi16_state {
+	struct reg_512_bit		hi16_zmm[16];
+} __packed;
 
 struct xstate_header {
 	u64				xfeatures;
@@ -159,22 +218,19 @@ struct xstate_header {
 	u64				reserved[6];
 } __attribute__((packed));
 
-/* New processor state extensions should be added here: */
-#define XSTATE_RESERVE			(sizeof(struct ymmh_struct) + \
-					 sizeof(struct lwp_struct)  + \
-					 sizeof(struct mpx_struct)  )
 /*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
  *
  * It consists of a legacy fxregs portion, an xstate header and
- * subsequent fixed size areas as defined by the xstate header.
- * Not all CPUs support all the extensions.
+ * subsequent areas as defined by the xstate header.  Not all CPUs
+ * support all the extensions, so the size of the extended area
+ * can vary quite a bit between CPUs.
  */
 struct xregs_state {
 	struct fxregs_state		i387;
 	struct xstate_header		header;
-	u8				__reserved[XSTATE_RESERVE];
+	u8				extended_state_area[0];
 } __attribute__ ((packed, aligned (64)));
 
 /*
@@ -182,7 +238,9 @@ struct xregs_state {
  * put together, so that we can pick the right one runtime.
  *
  * The size of the structure is determined by the largest
- * member - which is the xsave area:
+ * member - which is the xsave area.  The padding is there
+ * to ensure that statically-allocated task_structs (just
+ * the init_task today) have enough space.
  */
 union fpregs_state {
 	struct fregs_state		fsave;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 4656b25bb9a7..3a6c89b70307 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -6,7 +6,7 @@
 #include <linux/uaccess.h>
 
 /* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
+#define XFEATURE_MASK_EXTEND	(~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
 
 #define XSTATE_CPUID		0x0000000d
 
@@ -19,14 +19,18 @@
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
 /* Supported features which support lazy state saving */
-#define XSTATE_LAZY	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM		      \
-			| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+#define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
+				 XFEATURE_MASK_SSE | \
+				 XFEATURE_MASK_YMM | \
+				 XFEATURE_MASK_OPMASK |	\
+				 XFEATURE_MASK_ZMM_Hi256 | \
+				 XFEATURE_MASK_Hi16_ZMM)
 
 /* Supported features which require eager state saving */
-#define XSTATE_EAGER	(XSTATE_BNDREGS | XSTATE_BNDCSR)
+#define XFEATURE_MASK_EAGER	(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
 
 /* All currently supported features */
-#define XCNTXT_MASK	(XSTATE_LAZY | XSTATE_EAGER)
+#define XCNTXT_MASK	(XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
 
 #ifdef CONFIG_X86_64
 #define REX_PREFIX	"0x48, "
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 
+void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5fa9fb0f8809..cc285ec4b2c1 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,10 +63,10 @@
 /* hpet memory map physical address */
 extern unsigned long hpet_address;
 extern unsigned long force_hpet_address;
-extern int boot_hpet_disable;
+extern bool boot_hpet_disable;
 extern u8 hpet_blockid;
-extern int hpet_force_user;
-extern u8 hpet_msi_disable;
+extern bool hpet_force_user;
+extern bool hpet_msi_disable;
 extern int is_hpet_enabled(void);
 extern int hpet_enable(void);
 extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 28019765442e..a9bdf5569ab3 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -10,7 +10,7 @@
  * 32 bit structures for IA32 support.
  */
 
-#include <asm/sigcontext32.h>
+#include <uapi/asm/sigcontext.h>
 
 /* signal.h */
 
@@ -18,7 +18,7 @@ struct ucontext_ia32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
 	compat_stack_t	  uc_stack;
-	struct sigcontext_ia32 uc_mcontext;
+	struct sigcontext_32 uc_mcontext;
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 046c7fb1ca43..a210eba2727c 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
 	IRQ_POSTING_CAP = 0,
 };
 
+struct vcpu_data {
+	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
+	u32 vector;		/* Guest vector of the interrupt */
+};
+
 #ifdef CONFIG_IRQ_REMAP
 
 extern bool irq_remapping_cap(enum irq_remap_cap cap);
@@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
 	return x86_vector_domain;
 }
 
-struct vcpu_data {
-	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
-	u32 vector;		/* Guest vector of the interrupt */
-};
-
 #else  /* CONFIG_IRQ_REMAP */
 
 static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index b130d59406fb..e5f5dc9787d5 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -29,11 +29,5 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs,
 extern void __show_regs(struct pt_regs *regs, int all);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
-#ifdef CONFIG_KEXEC_CORE
-extern int in_crash_kexec;
-#else
-/* no crash dump is ever in progress if no crash kernel can be kexec'd */
-#define in_crash_kexec 0
-#endif
 
 #endif /* _ASM_X86_KDEBUG_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e16466ec473c..e9cd7befcb76 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -112,6 +112,16 @@ struct x86_emulate_ops {
 			struct x86_exception *fault);
 
 	/*
+	 * read_phys: Read bytes of standard (non-emulated/special) memory.
+	 *            Used for descriptor reading.
+	 *  @addr:  [IN ] Physical address from which to read.
+	 *  @val:   [OUT] Value read from memory.
+	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 */
+	int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+			void *val, unsigned int bytes);
+
+	/*
 	 * write_std: Write bytes of standard (non-emulated/special) memory.
 	 *            Used for descriptor writing.
 	 *  @addr:  [IN ] Linear address to which to write.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee0382088..9265196e877f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include <linux/perf_event.h>
 #include <linux/pvclock_gtod.h>
 #include <linux/clocksource.h>
+#include <linux/irqbypass.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -176,6 +177,8 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING	1
 
+struct kvm_kernel_irq_routing_entry;
+
 /*
  * We don't want allocation failures within the mmu code, so we preallocate
  * enough memory for a single page fault in a cache.
@@ -374,6 +377,7 @@ struct kvm_mtrr {
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
 	u64 hv_vapic;
+	s64 runtime_offset;
 };
 
 struct kvm_vcpu_arch {
@@ -396,6 +400,7 @@ struct kvm_vcpu_arch {
 	u64 efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
+	u64 eoi_exit_bitmap[4];
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
 	int mp_state;
@@ -573,6 +578,9 @@ struct kvm_vcpu_arch {
 	struct {
 		bool pv_unhalted;
 	} pv;
+
+	int pending_ioapic_eoi;
+	int pending_external_vector;
 };
 
 struct kvm_lpage_info {
@@ -683,6 +691,9 @@ struct kvm_arch {
 	u32 bsp_vcpu_id;
 
 	u64 disabled_quirks;
+
+	bool irqchip_split;
+	u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
@@ -819,10 +830,10 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	int (*vm_has_apicv)(struct kvm *kvm);
+	int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
-	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
@@ -887,6 +898,20 @@ struct kvm_x86_ops {
 					   gfn_t offset, unsigned long mask);
 	/* pmu operations of sub-arch */
 	const struct kvm_pmu_ops *pmu_ops;
+
+	/*
+	 * Architecture specific hooks for vCPU blocking due to
+	 * HLT instruction.
+	 * Returns for .pre_block():
+	 *    - 0 means continue to block the vCPU.
+	 *    - 1 means we cannot block the vCPU since some event
+	 *        happens during this period, such as, 'ON' bit in
+	 *        posted-interrupts descriptor is set.
+	 */
+	int (*pre_block)(struct kvm_vcpu *vcpu);
+	void (*post_block)(struct kvm_vcpu *vcpu);
+	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
+			      uint32_t guest_irq, bool set);
 };
 
 struct kvm_arch_async_pf {
@@ -1226,11 +1251,18 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
 int kvm_is_in_guest(void);
 
-int __x86_set_memory_region(struct kvm *kvm,
-			    const struct kvm_userspace_memory_region *mem);
-int x86_set_memory_region(struct kvm *kvm,
-			  const struct kvm_userspace_memory_region *mem);
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			     struct kvm_vcpu **dest_vcpu);
+
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+		     struct kvm_lapic_irq *irq);
+
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2dbc0bf2b9f3..2ea4527e462f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -123,19 +123,27 @@ struct mca_config {
 };
 
 struct mce_vendor_flags {
-			/*
-			 * overflow recovery cpuid bit indicates that overflow
-			 * conditions are not fatal
-			 */
-	__u64		overflow_recov	: 1,
-
-			/*
-			 * SUCCOR stands for S/W UnCorrectable error COntainment
-			 * and Recovery. It indicates support for data poisoning
-			 * in HW and deferred error interrupts.
-			 */
-			succor		: 1,
-			__reserved_0	: 62;
+	/*
+	 * Indicates that overflow conditions are not fatal, when set.
+	 */
+	__u64 overflow_recov	: 1,
+
+	/*
+	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+	 * Recovery. It indicates support for data poisoning in HW and deferred
+	 * error interrupts.
+	 */
+	      succor		: 1,
+
+	/*
+	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+	 * the register space for each MCA bank and also increases number of
+	 * banks. Also, to accommodate the new banks and registers, the MCA
+	 * register space is moved to a new MSR range.
+	 */
+	      smca		: 1,
+
+	      __reserved_0	: 61;
 };
 extern struct mce_vendor_flags mce_flags;
 
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 9e6278c7140e..34e62b1dcfce 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -27,7 +27,6 @@ struct cpu_signature {
 struct device;
 
 enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
-extern bool dis_ucode_ldr;
 
 struct microcode_ops {
 	enum ucode_state (*request_microcode_user) (int cpu,
@@ -55,6 +54,12 @@ struct ucode_cpu_info {
 };
 extern struct ucode_cpu_info ucode_cpu_info[];
 
+#ifdef CONFIG_MICROCODE
+int __init microcode_init(void);
+#else
+static inline int __init microcode_init(void)	{ return 0; };
+#endif
+
 #ifdef CONFIG_MICROCODE_INTEL
 extern struct microcode_ops * __init init_intel_microcode(void);
 #else
@@ -75,7 +80,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
 static inline void __exit exit_amd_microcode(void) {}
 #endif
 
-#ifdef CONFIG_MICROCODE_EARLY
 #define MAX_UCODE_COUNT 128
 
 #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -150,22 +154,18 @@ static inline unsigned int x86_model(unsigned int sig)
 	return model;
 }
 
+#ifdef CONFIG_MICROCODE
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 extern int __init save_microcode_in_initrd(void);
 void reload_early_microcode(void);
 extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
 #else
-static inline void __init load_ucode_bsp(void) {}
-static inline void load_ucode_ap(void) {}
-static inline int __init save_microcode_in_initrd(void)
-{
-	return 0;
-}
-static inline void reload_early_microcode(void) {}
-static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name)
-{
-	return false;
-}
+static inline void __init load_ucode_bsp(void)			{ }
+static inline void load_ucode_ap(void)				{ }
+static inline int __init save_microcode_in_initrd(void)		{ return 0; }
+static inline void reload_early_microcode(void)			{ }
+static inline bool
+get_builtin_firmware(struct cpio_data *cd, const char *name)	{ return false; }
 #endif
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index ac6d328977a6..adfc847a395e 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -64,7 +64,7 @@ extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, s
 #define PATCH_MAX_SIZE PAGE_SIZE
 extern u8 amd_ucode_patch[PATCH_MAX_SIZE];
 
-#ifdef CONFIG_MICROCODE_AMD_EARLY
+#ifdef CONFIG_MICROCODE_AMD
 extern void __init load_ucode_amd_bsp(unsigned int family);
 extern void load_ucode_amd_ap(void);
 extern int __init save_microcode_in_initrd_amd(void);
@@ -76,4 +76,5 @@ static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; }
 void reload_ucode_amd(void) {}
 #endif
 
+extern bool check_current_patch_level(u32 *rev, bool early);
 #endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index 7991c606125d..8559b0102ea1 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -57,7 +57,7 @@ extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev);
 extern int microcode_sanity_check(void *mc, int print_err);
 extern int find_matching_signature(void *mc, unsigned int csig, int cpf);
 
-#ifdef CONFIG_MICROCODE_INTEL_EARLY
+#ifdef CONFIG_MICROCODE_INTEL
 extern void __init load_ucode_intel_bsp(void);
 extern void load_ucode_intel_ap(void);
 extern void show_ucode_info_early(void);
@@ -71,13 +71,9 @@ static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL;
 static inline void reload_ucode_intel(void) {}
 #endif
 
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
+#ifdef CONFIG_HOTPLUG_CPU
 extern int save_mc_for_early(u8 *mc);
 #else
-static inline int save_mc_for_early(u8 *mc)
-{
-	return 0;
-}
+static inline int save_mc_for_early(u8 *mc) { return 0; }
 #endif
-
 #endif /* _ASM_X86_MICROCODE_INTEL_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b98b471a3b7e..9f3905697f12 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -141,6 +141,8 @@
 #define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
 #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
 
+#define MSR_PEBS_FRONTEND		0x000003f7
+
 #define MSR_IA32_POWER_CTL		0x000001fc
 
 #define MSR_IA32_MC0_CTL		0x00000400
@@ -204,6 +206,13 @@
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
 
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL		0x00000648
+#define MSR_CONFIG_TDP_LEVEL1		0x00000649
+#define MSR_CONFIG_TDP_LEVEL2		0x0000064A
+#define MSR_CONFIG_TDP_CONTROL		0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
+
 /* Hardware P state interface */
 #define MSR_PPERF			0x0000064e
 #define MSR_PERF_LIMIT_REASONS		0x0000064f
diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
index 1c6f7f6212c1..c64373a2d731 100644
--- a/arch/x86/include/asm/numachip/numachip.h
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -14,6 +14,7 @@
 #ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
 #define _ASM_X86_NUMACHIP_NUMACHIP_H
 
+extern u8 numachip_system;
 extern int __init pci_numachip_init(void);
 
 #endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
index 660f843df928..29719eecdc2e 100644
--- a/arch/x86/include/asm/numachip/numachip_csr.h
+++ b/arch/x86/include/asm/numachip/numachip_csr.h
@@ -14,12 +14,8 @@
 #ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
 #define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
 
-#include <linux/numa.h>
-#include <linux/percpu.h>
+#include <linux/smp.h>
 #include <linux/io.h>
-#include <linux/swab.h>
-#include <asm/types.h>
-#include <asm/processor.h>
 
 #define CSR_NODE_SHIFT		16
 #define CSR_NODE_BITS(p)	(((unsigned long)(p)) << CSR_NODE_SHIFT)
@@ -27,11 +23,8 @@
 
 /* 32K CSR space, b15 indicates geo/non-geo */
 #define CSR_OFFSET_MASK	0x7fffUL
-
-/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
-#define NUMACHIP_GCSR_BASE	0x3fff00000000ULL
-#define NUMACHIP_GCSR_LIM	0x3fff0fffffffULL
-#define NUMACHIP_GCSR_SIZE	(NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
+#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
+#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
 
 /*
  * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
@@ -41,12 +34,7 @@
 #define NUMACHIP_LCSR_BASE	0x3ffffe000000ULL
 #define NUMACHIP_LCSR_LIM	0x3fffffffffffULL
 #define NUMACHIP_LCSR_SIZE	(NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
-
-static inline void *gcsr_address(int node, unsigned long offset)
-{
-	return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
-		CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
-}
+#define NUMACHIP_LAPIC_BITS	8
 
 static inline void *lcsr_address(unsigned long offset)
 {
@@ -54,114 +42,57 @@ static inline void *lcsr_address(unsigned long offset)
 		CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
 }
 
-static inline unsigned int read_gcsr(int node, unsigned long offset)
+static inline unsigned int read_lcsr(unsigned long offset)
 {
-	return swab32(readl(gcsr_address(node, offset)));
+	return swab32(readl(lcsr_address(offset)));
 }
 
-static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
+static inline void write_lcsr(unsigned long offset, unsigned int val)
 {
-	writel(swab32(val), gcsr_address(node, offset));
+	writel(swab32(val), lcsr_address(offset));
 }
 
-static inline unsigned int read_lcsr(unsigned long offset)
+/*
+ * On NumaChip2, local CSR space is 16MB and starts at fixed offset below 4G
+ */
+
+#define NUMACHIP2_LCSR_BASE       0xf0000000UL
+#define NUMACHIP2_LCSR_SIZE       0x1000000UL
+#define NUMACHIP2_APIC_ICR        0x100000
+#define NUMACHIP2_TIMER_DEADLINE  0x200000
+#define NUMACHIP2_TIMER_INT       0x200008
+#define NUMACHIP2_TIMER_NOW       0x200018
+#define NUMACHIP2_TIMER_RESET     0x200020
+
+static inline void __iomem *numachip2_lcsr_address(unsigned long offset)
 {
-	return swab32(readl(lcsr_address(offset)));
+	return (void __iomem *)__va(NUMACHIP2_LCSR_BASE |
+		(offset & (NUMACHIP2_LCSR_SIZE - 1)));
 }
 
-static inline void write_lcsr(unsigned long offset, unsigned int val)
+static inline u32 numachip2_read32_lcsr(unsigned long offset)
 {
-	writel(swab32(val), lcsr_address(offset));
+	return readl(numachip2_lcsr_address(offset));
 }
 
-/* ========================================================================= */
-/*                   CSR_G0_STATE_CLEAR                                      */
-/* ========================================================================= */
-
-#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
-union numachip_csr_g0_state_clear {
-	unsigned int v;
-	struct numachip_csr_g0_state_clear_s {
-		unsigned int _state:2;
-		unsigned int _rsvd_2_6:5;
-		unsigned int _lost:1;
-		unsigned int _rsvd_8_31:24;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G0_NODE_IDS                                         */
-/* ========================================================================= */
+static inline u64 numachip2_read64_lcsr(unsigned long offset)
+{
+	return readq(numachip2_lcsr_address(offset));
+}
 
-#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
-union numachip_csr_g0_node_ids {
-	unsigned int v;
-	struct numachip_csr_g0_node_ids_s {
-		unsigned int _initialid:16;
-		unsigned int _nodeid:12;
-		unsigned int _rsvd_28_31:4;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G3_EXT_IRQ_GEN                                      */
-/* ========================================================================= */
+static inline void numachip2_write32_lcsr(unsigned long offset, u32 val)
+{
+	writel(val, numachip2_lcsr_address(offset));
+}
 
-#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
-union numachip_csr_g3_ext_irq_gen {
-	unsigned int v;
-	struct numachip_csr_g3_ext_irq_gen_s {
-		unsigned int _vector:8;
-		unsigned int _msgtype:3;
-		unsigned int _index:5;
-		unsigned int _destination_apic_id:16;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G3_EXT_IRQ_STATUS                                   */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
-union numachip_csr_g3_ext_irq_status {
-	unsigned int v;
-	struct numachip_csr_g3_ext_irq_status_s {
-		unsigned int _result:32;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G3_EXT_IRQ_DEST                                     */
-/* ========================================================================= */
-
-#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
-union numachip_csr_g3_ext_irq_dest {
-	unsigned int v;
-	struct numachip_csr_g3_ext_irq_dest_s {
-		unsigned int _irq:8;
-		unsigned int _rsvd_8_31:24;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G3_NC_ATT_MAP_SELECT                                */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
-union numachip_csr_g3_nc_att_map_select {
-	unsigned int v;
-	struct numachip_csr_g3_nc_att_map_select_s {
-		unsigned int _upper_address_bits:4;
-		unsigned int _select_ram:4;
-		unsigned int _rsvd_8_31:24;
-	} s;
-};
-
-/* ========================================================================= */
-/*                   CSR_G3_NC_ATT_MAP_SELECT_0-255                          */
-/* ========================================================================= */
-
-#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+static inline void numachip2_write64_lcsr(unsigned long offset, u64 val)
+{
+	writeq(val, numachip2_lcsr_address(offset));
+}
 
-#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+static inline unsigned int numachip2_timer(void)
+{
+	return (smp_processor_id() % 48) << 6;
+}
 
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4edd53b79a81..4928cf0d5af0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -26,9 +26,6 @@
 #define MCE_STACK 4
 #define N_EXCEPTION_STACKS 4  /* hw limit: 7 */
 
-#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
-
 /*
  * Set __PAGE_OFFSET to the most negative possible address +
  * PGDIR_SIZE*16 (pgd slot 272).  The gap is to allow a space for a
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c7c712f2648b..c5b7fb2774d0 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -20,6 +20,9 @@
 #define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
 
+#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 867da5bbb4a3..6ec0c8b2e9df 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -19,6 +19,13 @@
 #include <asm/x86_init.h>
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_checkwx(void);
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_walk_pgd_level_checkwx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
 
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
@@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte)
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+	return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+	return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
@@ -318,6 +325,16 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
 	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
+{
+	return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
+}
+
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 /*
@@ -379,7 +396,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	return __pgprot(preservebits | addbits);
 }
 
-#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+#define pte_pgprot(x) __pgprot(pte_flags(x))
+#define pmd_pgprot(x) __pgprot(pmd_flags(x))
+#define pud_pgprot(x) __pgprot(pud_flags(x))
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
@@ -502,14 +521,15 @@ static inline int pmd_none(pmd_t pmd)
 
 static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 {
-	return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+	return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd));
 }
 
 /*
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pmd_page(pmd)	pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT)
+#define pmd_page(pmd)		\
+	pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT)
 
 /*
  * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
@@ -570,14 +590,15 @@ static inline int pud_present(pud_t pud)
 
 static inline unsigned long pud_page_vaddr(pud_t pud)
 {
-	return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+	return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud));
 }
 
 /*
  * Currently stuck as a macro due to indirect forward reference to
  * linux/mmzone.h's __section_mem_map_addr() definition:
  */
-#define pud_page(pud)		pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+#define pud_page(pud)		\
+	pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT)
 
 /* Find an entry in the second-level page table.. */
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 13f310bfc09a..dd5b0aa9dd2f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -209,10 +209,10 @@ enum page_cache_mode {
 
 #include <linux/types.h>
 
-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */
 #define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
 
-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */
 #define PTE_FLAGS_MASK		(~PTE_PFN_MASK)
 
 typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
@@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 }
 #endif
 
+static inline pudval_t pud_pfn_mask(pud_t pud)
+{
+	if (native_pud_val(pud) & _PAGE_PSE)
+		return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+	else
+		return PTE_PFN_MASK;
+}
+
+static inline pudval_t pud_flags_mask(pud_t pud)
+{
+	if (native_pud_val(pud) & _PAGE_PSE)
+		return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
+	else
+		return ~PTE_PFN_MASK;
+}
+
 static inline pudval_t pud_flags(pud_t pud)
 {
-	return native_pud_val(pud) & PTE_FLAGS_MASK;
+	return native_pud_val(pud) & pud_flags_mask(pud);
+}
+
+static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
+{
+	if (native_pmd_val(pmd) & _PAGE_PSE)
+		return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+	else
+		return PTE_PFN_MASK;
+}
+
+static inline pmdval_t pmd_flags_mask(pmd_t pmd)
+{
+	if (native_pmd_val(pmd) & _PAGE_PSE)
+		return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
+	else
+		return ~PTE_PFN_MASK;
 }
 
 static inline pmdval_t pmd_flags(pmd_t pmd)
 {
-	return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+	return native_pmd_val(pmd) & pmd_flags_mask(pmd);
 }
 
 static inline pte_t native_make_pte(pteval_t val)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index b12f81022a6b..01bcde84d3e4 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -30,12 +30,9 @@ static __always_inline void preempt_count_set(int pc)
 /*
  * must be macros to avoid header recursion hell
  */
-#define init_task_preempt_count(p) do { \
-	task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
-} while (0)
+#define init_task_preempt_count(p) do { } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
 	per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
 } while (0)
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 19577dd325fa..67522256c7ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -11,7 +11,7 @@ struct vm86;
 #include <asm/math_emu.h>
 #include <asm/segment.h>
 #include <asm/types.h>
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
 #include <asm/current.h>
 #include <asm/cpufeature.h>
 #include <asm/page.h>
@@ -556,12 +556,12 @@ static inline unsigned int cpuid_edx(unsigned int op)
 }
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
+static __always_inline void rep_nop(void)
 {
 	asm volatile("rep; nop" ::: "memory");
 }
 
-static inline void cpu_relax(void)
+static __always_inline void cpu_relax(void)
 {
 	rep_nop();
 }
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 655e07a48f6c..67f08230103a 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,6 +41,7 @@ struct pvclock_wall_clock {
 
 #define PVCLOCK_TSC_STABLE_BIT	(1 << 0)
 #define PVCLOCK_GUEST_STOPPED	(1 << 1)
+/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
 #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..e6cd2c489dbb 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -1,79 +1,8 @@
 #ifndef _ASM_X86_SIGCONTEXT_H
 #define _ASM_X86_SIGCONTEXT_H
 
-#include <uapi/asm/sigcontext.h>
-
-#ifdef __i386__
-struct sigcontext {
-	unsigned short gs, __gsh;
-	unsigned short fs, __fsh;
-	unsigned short es, __esh;
-	unsigned short ds, __dsh;
-	unsigned long di;
-	unsigned long si;
-	unsigned long bp;
-	unsigned long sp;
-	unsigned long bx;
-	unsigned long dx;
-	unsigned long cx;
-	unsigned long ax;
-	unsigned long trapno;
-	unsigned long err;
-	unsigned long ip;
-	unsigned short cs, __csh;
-	unsigned long flags;
-	unsigned long sp_at_signal;
-	unsigned short ss, __ssh;
+/* This is a legacy header - all kernel code includes <uapi/asm/sigcontext.h> directly. */
 
-	/*
-	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
-	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
-	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the definition of
-	 * (struct _fpx_sw_bytes)
-	 */
-	void __user *fpstate;		/* zero when no FPU/extended context */
-	unsigned long oldmask;
-	unsigned long cr2;
-};
-#else /* __i386__ */
-struct sigcontext {
-	unsigned long r8;
-	unsigned long r9;
-	unsigned long r10;
-	unsigned long r11;
-	unsigned long r12;
-	unsigned long r13;
-	unsigned long r14;
-	unsigned long r15;
-	unsigned long di;
-	unsigned long si;
-	unsigned long bp;
-	unsigned long bx;
-	unsigned long dx;
-	unsigned long ax;
-	unsigned long cx;
-	unsigned long sp;
-	unsigned long ip;
-	unsigned long flags;
-	unsigned short cs;
-	unsigned short gs;
-	unsigned short fs;
-	unsigned short __pad0;
-	unsigned long err;
-	unsigned long trapno;
-	unsigned long oldmask;
-	unsigned long cr2;
+#include <uapi/asm/sigcontext.h>
 
-	/*
-	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
-	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
-	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
-	 * of extended memory layout. See comments at the definition of
-	 * (struct _fpx_sw_bytes)
-	 */
-	void __user *fpstate;		/* zero when no FPU/extended context */
-	unsigned long reserved1[8];
-};
-#endif /* !__i386__ */
 #endif /* _ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 1f3175bb994e..34edd1650bae 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_SIGFRAME_H
 #define _ASM_X86_SIGFRAME_H
 
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
 #include <asm/siginfo.h>
 #include <asm/ucontext.h>
 #include <linux/compat.h>
@@ -9,8 +9,6 @@
 #ifdef CONFIG_X86_32
 #define sigframe_ia32		sigframe
 #define rt_sigframe_ia32	rt_sigframe
-#define sigcontext_ia32		sigcontext
-#define _fpstate_ia32		_fpstate
 #define ucontext_ia32		ucontext
 #else /* !CONFIG_X86_32 */
 
@@ -24,7 +22,7 @@
 struct sigframe_ia32 {
 	u32 pretcode;
 	int sig;
-	struct sigcontext_ia32 sc;
+	struct sigcontext_32 sc;
 	/*
 	 * fpstate is unused. fpstate is moved/allocated after
 	 * retcode[] below. This movement allows to have the FP state and the
@@ -33,7 +31,7 @@ struct sigframe_ia32 {
 	 * the offset of extramask[] in the sigframe and thus prevent any
 	 * legacy application accessing/modifying it.
 	 */
-	struct _fpstate_ia32 fpstate_unused;
+	struct _fpstate_32 fpstate_unused;
 #ifdef CONFIG_IA32_EMULATION
 	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
 #else /* !CONFIG_IA32_EMULATION */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index c481be78fcf1..2138c9ae19ee 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -34,7 +34,7 @@ extern void do_signal(struct pt_regs *regs);
 
 #define __ARCH_HAS_SA_RESTORER
 
-#include <asm/sigcontext.h>
+#include <uapi/asm/sigcontext.h>
 
 #ifdef __i386__
 
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index e4661196994e..ff8b9a17dc4b 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
    function. */
 
 #define __HAVE_ARCH_MEMCPY 1
+extern void *memcpy(void *to, const void *from, size_t len);
 extern void *__memcpy(void *to, const void *from, size_t len);
 
 #ifndef CONFIG_KMEMCHECK
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-extern void *memcpy(void *to, const void *from, size_t len);
-#else
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
 #define memcpy(dst, src, len)					\
 ({								\
 	size_t __len = (len);					\
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index d7f3b3b78ac3..751bf4b7bf11 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -79,12 +79,12 @@ do {									\
 #else /* CONFIG_X86_32 */
 
 /* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
 
 #define __EXTRA_CLOBBER  \
 	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-	  "r12", "r13", "r14", "r15"
+	  "r12", "r13", "r14", "r15", "flags"
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #define __switch_canary							  \
@@ -100,7 +100,11 @@ do {									\
 #define __switch_canary_iparam
 #endif	/* CC_STACKPROTECTOR */
 
-/* Save restore flags to clear handle leaking NT */
+/*
+ * There is no need to save or restore flags, because flags are always
+ * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
+ * has no effect.
+ */
 #define switch_to(prev, next, last) \
 	asm volatile(SAVE_CONTEXT					  \
 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d6a756ae04c8..999b7cd2e78c 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,9 +20,21 @@
 #include <asm/thread_info.h>	/* for TS_COMPAT */
 #include <asm/unistd.h>
 
-typedef void (*sys_call_ptr_t)(void);
+typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
+					  unsigned long, unsigned long,
+					  unsigned long, unsigned long);
 extern const sys_call_ptr_t sys_call_table[];
 
+#if defined(CONFIG_X86_32)
+#define ia32_sys_call_table sys_call_table
+#define __NR_syscall_compat_max __NR_syscall_max
+#define IA32_NR_syscalls NR_syscalls
+#endif
+
+#if defined(CONFIG_IA32_EMULATION)
+extern const sys_call_ptr_t ia32_sys_call_table[];
+#endif
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8afdc3e44247..c7b551028740 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,9 +57,7 @@ struct thread_info {
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	int			saved_preempt_count;
 	mm_segment_t		addr_limit;
-	void __user		*sysenter_return;
 	unsigned int		sig_on_uaccess_error:1;
 	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
@@ -69,7 +67,6 @@ struct thread_info {
 	.task		= &tsk,			\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.saved_preempt_count = INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 }
 
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
index 173dd3ba108c..0f492fc50bce 100644
--- a/arch/x86/include/asm/trace/mpx.h
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -11,7 +11,7 @@
 TRACE_EVENT(mpx_bounds_register_exception,
 
 	TP_PROTO(void *addr_referenced,
-		 const struct bndreg *bndreg),
+		 const struct mpx_bndreg *bndreg),
 	TP_ARGS(addr_referenced, bndreg),
 
 	TP_STRUCT__entry(
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,
 
 TRACE_EVENT(bounds_exception_mpx,
 
-	TP_PROTO(const struct bndcsr *bndcsr),
+	TP_PROTO(const struct mpx_bndcsr *bndcsr),
 	TP_ARGS(bndcsr),
 
 	TP_STRUCT__entry(
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
 /*
  * This gets used outside of MPX-specific code, so we need a stub.
  */
-static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+static inline
+void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
 {
 }
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a8df874f3e88..09b1b0ab94b7 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -51,13 +51,13 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
 	 * limit, not add it to the address).
 	 */
 	if (__builtin_constant_p(size))
-		return addr > limit - size;
+		return unlikely(addr > limit - size);
 
 	/* Arbitrary sizes? Be careful about overflow */
 	addr += size;
-	if (addr < size)
+	if (unlikely(addr < size))
 		return true;
-	return addr > limit;
+	return unlikely(addr > limit);
 }
 
 #define __range_not_ok(addr, size, limit)				\
@@ -182,7 +182,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
 		     : "=a" (__ret_gu), "=r" (__val_gu)			\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
-	__ret_gu;							\
+	__builtin_expect(__ret_gu, 0);					\
 })
 
 #define __put_user_x(size, x, ptr, __ret_pu)			\
@@ -278,7 +278,7 @@ extern void __put_user_8(void);
 		__put_user_x(X, __pu_val, ptr, __ret_pu);	\
 		break;						\
 	}							\
-	__ret_pu;						\
+	__builtin_expect(__ret_pu, 0);				\
 })
 
 #define __put_user_size(x, ptr, size, retval, errret)			\
@@ -401,7 +401,7 @@ do {									\
 ({								\
 	int __pu_err;						\
 	__put_user_size((x), (ptr), (size), __pu_err, -EFAULT);	\
-	__pu_err;						\
+	__builtin_expect(__pu_err, 0);				\
 })
 
 #define __get_user_nocheck(x, ptr, size)				\
@@ -410,7 +410,7 @@ do {									\
 	unsigned long __gu_val;						\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-	__gu_err;							\
+	__builtin_expect(__gu_err, 0);					\
 })
 
 /* FIXME: this hack is definitely wrong -AK */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index a00ad8f2a657..ea7074784cc4 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -609,7 +609,7 @@ struct uv_cpu_nmi_s {
 
 DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 
-#define uv_hub_nmi			(uv_cpu_nmi.hub)
+#define uv_hub_nmi			this_cpu_read(uv_cpu_nmi.hub)
 #define uv_cpu_nmi_per(cpu)		(per_cpu(uv_cpu_nmi, cpu))
 #define uv_hub_nmi_per(cpu)		(uv_cpu_nmi_per(cpu).hub)
 
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 8021bd28c0f1..756de9190aec 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -26,7 +26,7 @@ struct vdso_image {
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
 	long sym___kernel_vsyscall;
-	long sym_VDSO32_SYSENTER_RETURN;
+	long sym_int80_landing_pad;
 };
 
 #ifdef CONFIG_X86_64
@@ -38,13 +38,7 @@ extern const struct vdso_image vdso_image_x32;
 #endif
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_int80;
-#ifdef CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32_syscall;
-#endif
-extern const struct vdso_image vdso_image_32_sysenter;
-
-extern const struct vdso_image *selected_vdso32;
+extern const struct vdso_image vdso_image_32;
 #endif
 
 extern void __init init_vdso_image(const struct vdso_image *image);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 448b7ca61aee..aa336ff3e03e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,7 @@
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES			0x00100000
-
+#define SECONDARY_EXEC_PCOMMIT			0x00200000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
@@ -416,6 +416,7 @@ enum vmcs_field {
 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
 
+#define VMX_VPID_INVVPID_BIT                    (1ull << 0) /* (32 - 32) */
 #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT      (1ull << 9) /* (41 - 32) */
 #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT      (1ull << 10) /* (42 - 32) */
 
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 83aea8055119..4c20dd333412 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -336,10 +336,10 @@ HYPERVISOR_update_descriptor(u64 ma, u64 desc)
 	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
 }
 
-static inline int
+static inline long
 HYPERVISOR_memory_op(unsigned int cmd, void *arg)
 {
-	return _hypercall2(int, memory_op, cmd, arg);
+	return _hypercall2(long, memory_op, cmd, arg);
 }
 
 static inline int
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d866959e5685..8b2d4bea9962 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -57,4 +57,9 @@ static inline bool xen_x2apic_para_available(void)
 }
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num);
+void xen_arch_unregister_cpu(int num);
+#endif
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 0679e11d2cf7..f5fb840b43e8 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,7 +12,7 @@
 #include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
-#include <xen/grant_table.h>
+#include <xen/interface/grant_table.h>
 #include <xen/features.h>
 
 /* Xen machine address */
@@ -43,6 +43,8 @@ extern unsigned long *xen_p2m_addr;
 extern unsigned long  xen_p2m_size;
 extern unsigned long  xen_max_p2m_pfn;
 
+extern int xen_alloc_p2m_entry(unsigned long pfn);
+
 extern unsigned long get_phys_to_machine(unsigned long pfn);
 extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -296,8 +298,8 @@ void make_lowmem_page_readwrite(void *vaddr);
 #define xen_unmap(cookie) iounmap((cookie))
 
 static inline bool xen_arch_need_swiotlb(struct device *dev,
-					 unsigned long pfn,
-					 unsigned long bfn)
+					 phys_addr_t phys,
+					 dma_addr_t dev_addr)
 {
 	return false;
 }
diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h
index b0ae1c4dc791..217909b4d6f5 100644
--- a/arch/x86/include/uapi/asm/bitsperlong.h
+++ b/arch/x86/include/uapi/asm/bitsperlong.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_X86_BITSPERLONG_H
 #define __ASM_X86_BITSPERLONG_H
 
-#ifdef __x86_64__
+#if defined(__x86_64__) && !defined(__ILP32__)
 # define __BITS_PER_LONG 64
 #else
 # define __BITS_PER_LONG 32
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index f0412c50c47b..040d4083c24f 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -153,6 +153,12 @@
 /* MSR used to provide vcpu index */
 #define HV_X64_MSR_VP_INDEX			0x40000002
 
+/* MSR used to reset the guest OS. */
+#define HV_X64_MSR_RESET			0x40000003
+
+/* MSR used to provide vcpu runtime in 100ns units */
+#define HV_X64_MSR_VP_RUNTIME			0x40000010
+
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 
@@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 	__s64 tsc_offset;
 } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
 
+/* Define the number of synthetic interrupt sources. */
+#define HV_SYNIC_SINT_COUNT		(16)
+/* Define the expected SynIC version. */
+#define HV_SYNIC_VERSION_1		(0x1)
+
+#define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SIEFP_ENABLE		(1ULL << 0)
+#define HV_SYNIC_SINT_MASKED		(1ULL << 16)
+#define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
+#define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
+
 #endif
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 76880ede9a35..03429da2fa80 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -2,7 +2,7 @@
 #define _UAPI_ASM_X86_MCE_H
 
 #include <linux/types.h>
-#include <asm/ioctls.h>
+#include <linux/ioctl.h>
 
 /* Fields are zero when not available */
 struct mce {
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 40836a9a7250..d485232f1e9f 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -1,221 +1,360 @@
 #ifndef _UAPI_ASM_X86_SIGCONTEXT_H
 #define _UAPI_ASM_X86_SIGCONTEXT_H
 
+/*
+ * Linux signal context definitions. The sigcontext includes a complex
+ * hierarchy of CPU and FPU state, available to user-space (on the stack) when
+ * a signal handler is executed.
+ *
+ * As over the years this ABI grew from its very simple roots towards
+ * supporting more and more CPU state organically, some of the details (which
+ * were rather clever hacks back in the days) became a bit quirky by today.
+ *
+ * The current ABI includes flexible provisions for future extensions, so we
+ * won't have to grow new quirks for quite some time. Promise!
+ */
+
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-#define FP_XSTATE_MAGIC1	0x46505853U
-#define FP_XSTATE_MAGIC2	0x46505845U
-#define FP_XSTATE_MAGIC2_SIZE	sizeof(FP_XSTATE_MAGIC2)
+#define FP_XSTATE_MAGIC1		0x46505853U
+#define FP_XSTATE_MAGIC2		0x46505845U
+#define FP_XSTATE_MAGIC2_SIZE		sizeof(FP_XSTATE_MAGIC2)
 
 /*
- * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
- * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
- * are used to extended the fpstate pointer in the sigcontext, which now
- * includes the extended state information along with fpstate information.
+ * Bytes 464..511 in the current 512-byte layout of the FXSAVE/FXRSTOR frame
+ * are reserved for SW usage. On CPUs supporting XSAVE/XRSTOR, these bytes are
+ * used to extend the fpstate pointer in the sigcontext, which now includes the
+ * extended state information along with fpstate information.
+ *
+ * If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then there's a
+ * sw_reserved.extended_size bytes large extended context area present. (The
+ * last 32-bit word of this extended area (at the
+ * fpstate+extended_size-FP_XSTATE_MAGIC2_SIZE address) is set to
+ * FP_XSTATE_MAGIC2 so that you can sanity check your size calculations.)
  *
- * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
- * area and FP_XSTATE_MAGIC2 at the end of memory layout
- * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
- * extended state information in the memory layout pointed by the fpstate
- * pointer in sigcontext.
+ * This extended area typically grows with newer CPUs that have larger and
+ * larger XSAVE areas.
  */
 struct _fpx_sw_bytes {
-	__u32 magic1;		/* FP_XSTATE_MAGIC1 */
-	__u32 extended_size;	/* total size of the layout referred by
-				 * fpstate pointer in the sigcontext.
-				 */
-	__u64 xfeatures;
-				/* feature bit mask (including fp/sse/extended
-				 * state) that is present in the memory
-				 * layout.
-				 */
-	__u32 xstate_size;	/* actual xsave state size, based on the
-				 * features saved in the layout.
-				 * 'extended_size' will be greater than
-				 * 'xstate_size'.
-				 */
-	__u32 padding[7];	/*  for future use. */
+	/*
+	 * If set to FP_XSTATE_MAGIC1 then this is an xstate context.
+	 * 0 if a legacy frame.
+	 */
+	__u32				magic1;
+
+	/*
+	 * Total size of the fpstate area:
+	 *
+	 *  - if magic1 == 0 then it's sizeof(struct _fpstate)
+	 *  - if magic1 == FP_XSTATE_MAGIC1 then it's sizeof(struct _xstate)
+	 *    plus extensions (if any)
+	 */
+	__u32				extended_size;
+
+	/*
+	 * Feature bit mask (including FP/SSE/extended state) that is present
+	 * in the memory layout:
+	 */
+	__u64				xfeatures;
+
+	/*
+	 * Actual XSAVE state size, based on the xfeatures saved in the layout.
+	 * 'extended_size' is greater than 'xstate_size':
+	 */
+	__u32				xstate_size;
+
+	/* For future use: */
+	__u32				padding[7];
 };
 
-#ifdef __i386__
 /*
- * As documented in the iBCS2 standard..
- *
- * The first part of "struct _fpstate" is just the normal i387
- * hardware setup, the extra "status" word is used to save the
- * coprocessor status word before entering the handler.
+ * As documented in the iBCS2 standard:
  *
- * Pentium III FXSR, SSE support
- *	Gareth Hughes <gareth@valinux.com>, May 2000
+ * The first part of "struct _fpstate" is just the normal i387 hardware setup,
+ * the extra "status" word is used to save the coprocessor status word before
+ * entering the handler.
  *
- * The FPU state data structure has had to grow to accommodate the
- * extended FPU state required by the Streaming SIMD Extensions.
- * There is no documented standard to accomplish this at the moment.
+ * The FPU state data structure has had to grow to accommodate the extended FPU
+ * state required by the Streaming SIMD Extensions.  There is no documented
+ * standard to accomplish this at the moment.
  */
+
+/* 10-byte legacy floating point register: */
 struct _fpreg {
-	unsigned short significand[4];
-	unsigned short exponent;
+	__u16				significand[4];
+	__u16				exponent;
 };
 
+/* 16-byte floating point register: */
 struct _fpxreg {
-	unsigned short significand[4];
-	unsigned short exponent;
-	unsigned short padding[3];
+	__u16				significand[4];
+	__u16				exponent;
+	__u16				padding[3];
 };
 
+/* 16-byte XMM register: */
 struct _xmmreg {
-	unsigned long element[4];
+	__u32				element[4];
 };
 
-struct _fpstate {
-	/* Regular FPU environment */
-	unsigned long	cw;
-	unsigned long	sw;
-	unsigned long	tag;
-	unsigned long	ipoff;
-	unsigned long	cssel;
-	unsigned long	dataoff;
-	unsigned long	datasel;
-	struct _fpreg	_st[8];
-	unsigned short	status;
-	unsigned short	magic;		/* 0xffff = regular FPU data only */
+#define X86_FXSR_MAGIC			0x0000
+
+/*
+ * The 32-bit FPU frame:
+ */
+struct _fpstate_32 {
+	/* Legacy FPU environment: */
+	__u32				cw;
+	__u32				sw;
+	__u32				tag;
+	__u32				ipoff;
+	__u32				cssel;
+	__u32				dataoff;
+	__u32				datasel;
+	struct _fpreg			_st[8];
+	__u16				status;
+	__u16				magic;		/* 0xffff: regular FPU data only */
+							/* 0x0000: FXSR FPU data */
 
 	/* FXSR FPU environment */
-	unsigned long	_fxsr_env[6];	/* FXSR FPU env is ignored */
-	unsigned long	mxcsr;
-	unsigned long	reserved;
-	struct _fpxreg	_fxsr_st[8];	/* FXSR FPU reg data is ignored */
-	struct _xmmreg	_xmm[8];
-	unsigned long	padding1[44];
+	__u32				_fxsr_env[6];	/* FXSR FPU env is ignored */
+	__u32				mxcsr;
+	__u32				reserved;
+	struct _fpxreg			_fxsr_st[8];	/* FXSR FPU reg data is ignored */
+	struct _xmmreg			_xmm[8];	/* First 8 XMM registers */
+	union {
+		__u32			padding1[44];	/* Second 8 XMM registers plus padding */
+		__u32			padding[44];	/* Alias name for old user-space */
+	};
 
 	union {
-		unsigned long	padding2[12];
-		struct _fpx_sw_bytes sw_reserved; /* represents the extended
-						   * state info */
+		__u32			padding2[12];
+		struct _fpx_sw_bytes	sw_reserved;	/* Potential extended state is encoded here */
 	};
 };
 
-#define X86_FXSR_MAGIC		0x0000
-
-#ifndef __KERNEL__
 /*
- * User-space might still rely on the old definition:
+ * The 64-bit FPU frame. (FXSAVE format and later)
+ *
+ * Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
+ *        larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ *        'struct _fpstate' so that you can always assume the _fpstate portion
+ *        exists so that you can check the magic value.
+ *
+ * Note2: Reserved fields may someday contain valuable data. Always
+ *	  save/restore them when you change signal frames.
  */
-struct sigcontext {
-	unsigned short gs, __gsh;
-	unsigned short fs, __fsh;
-	unsigned short es, __esh;
-	unsigned short ds, __dsh;
-	unsigned long edi;
-	unsigned long esi;
-	unsigned long ebp;
-	unsigned long esp;
-	unsigned long ebx;
-	unsigned long edx;
-	unsigned long ecx;
-	unsigned long eax;
-	unsigned long trapno;
-	unsigned long err;
-	unsigned long eip;
-	unsigned short cs, __csh;
-	unsigned long eflags;
-	unsigned long esp_at_signal;
-	unsigned short ss, __ssh;
-	struct _fpstate __user *fpstate;
-	unsigned long oldmask;
-	unsigned long cr2;
-};
-#endif /* !__KERNEL__ */
-
-#else /* __i386__ */
-
-/* FXSAVE frame */
-/* Note: reserved1/2 may someday contain valuable data. Always save/restore
-   them when you change signal frames. */
-struct _fpstate {
-	__u16	cwd;
-	__u16	swd;
-	__u16	twd;		/* Note this is not the same as the
-				   32bit/x87/FSAVE twd */
-	__u16	fop;
-	__u64	rip;
-	__u64	rdp;
-	__u32	mxcsr;
-	__u32	mxcsr_mask;
-	__u32	st_space[32];	/* 8*16 bytes for each FP-reg */
-	__u32	xmm_space[64];	/* 16*16 bytes for each XMM-reg  */
-	__u32	reserved2[12];
+struct _fpstate_64 {
+	__u16				cwd;
+	__u16				swd;
+	/* Note this is not the same as the 32-bit/x87/FSAVE twd: */
+	__u16				twd;
+	__u16				fop;
+	__u64				rip;
+	__u64				rdp;
+	__u32				mxcsr;
+	__u32				mxcsr_mask;
+	__u32				st_space[32];	/*  8x  FP registers, 16 bytes each */
+	__u32				xmm_space[64];	/* 16x XMM registers, 16 bytes each */
+	__u32				reserved2[12];
 	union {
-		__u32	reserved3[12];
-		struct _fpx_sw_bytes sw_reserved; /* represents the extended
-						   * state information */
+		__u32			reserved3[12];
+		struct _fpx_sw_bytes	sw_reserved;	/* Potential extended state is encoded here */
 	};
 };
 
-#ifndef __KERNEL__
-/*
- * User-space might still rely on the old definition:
- */
-struct sigcontext {
-	__u64 r8;
-	__u64 r9;
-	__u64 r10;
-	__u64 r11;
-	__u64 r12;
-	__u64 r13;
-	__u64 r14;
-	__u64 r15;
-	__u64 rdi;
-	__u64 rsi;
-	__u64 rbp;
-	__u64 rbx;
-	__u64 rdx;
-	__u64 rax;
-	__u64 rcx;
-	__u64 rsp;
-	__u64 rip;
-	__u64 eflags;		/* RFLAGS */
-	__u16 cs;
-	__u16 gs;
-	__u16 fs;
-	__u16 __pad0;
-	__u64 err;
-	__u64 trapno;
-	__u64 oldmask;
-	__u64 cr2;
-	struct _fpstate __user *fpstate;	/* zero when no FPU context */
-#ifdef __ILP32__
-	__u32 __fpstate_pad;
+#ifdef __i386__
+# define _fpstate _fpstate_32
+#else
+# define _fpstate _fpstate_64
 #endif
-	__u64 reserved1[8];
-};
-#endif /* !__KERNEL__ */
-
-#endif /* !__i386__ */
 
 struct _header {
-	__u64 xfeatures;
-	__u64 reserved1[2];
-	__u64 reserved2[5];
+	__u64				xfeatures;
+	__u64				reserved1[2];
+	__u64				reserved2[5];
 };
 
 struct _ymmh_state {
-	/* 16 * 16 bytes for each YMMH-reg */
-	__u32 ymmh_space[64];
+	/* 16x YMM registers, 16 bytes each: */
+	__u32				ymmh_space[64];
 };
 
 /*
- * Extended state pointed by the fpstate pointer in the sigcontext.
- * In addition to the fpstate, information encoded in the xstate_hdr
- * indicates the presence of other extended state information
- * supported by the processor and OS.
+ * Extended state pointed to by sigcontext::fpstate.
+ *
+ * In addition to the fpstate, information encoded in _xstate::xstate_hdr
+ * indicates the presence of other extended state information supported
+ * by the CPU and kernel:
  */
 struct _xstate {
-	struct _fpstate fpstate;
-	struct _header xstate_hdr;
-	struct _ymmh_state ymmh;
-	/* new processor state extensions go here */
+	struct _fpstate			fpstate;
+	struct _header			xstate_hdr;
+	struct _ymmh_state		ymmh;
+	/* New processor state extensions go here: */
+};
+
+/*
+ * The 32-bit signal frame:
+ */
+struct sigcontext_32 {
+	__u16				gs, __gsh;
+	__u16				fs, __fsh;
+	__u16				es, __esh;
+	__u16				ds, __dsh;
+	__u32				di;
+	__u32				si;
+	__u32				bp;
+	__u32				sp;
+	__u32				bx;
+	__u32				dx;
+	__u32				cx;
+	__u32				ax;
+	__u32				trapno;
+	__u32				err;
+	__u32				ip;
+	__u16				cs, __csh;
+	__u32				flags;
+	__u32				sp_at_signal;
+	__u16				ss, __ssh;
+
+	/*
+	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
+	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+	 * of extended memory layout. See comments at the definition of
+	 * (struct _fpx_sw_bytes)
+	 */
+	__u32				fpstate; /* Zero when no FPU/extended context */
+	__u32				oldmask;
+	__u32				cr2;
+};
+
+/*
+ * The 64-bit signal frame:
+ */
+struct sigcontext_64 {
+	__u64				r8;
+	__u64				r9;
+	__u64				r10;
+	__u64				r11;
+	__u64				r12;
+	__u64				r13;
+	__u64				r14;
+	__u64				r15;
+	__u64				di;
+	__u64				si;
+	__u64				bp;
+	__u64				bx;
+	__u64				dx;
+	__u64				ax;
+	__u64				cx;
+	__u64				sp;
+	__u64				ip;
+	__u64				flags;
+	__u16				cs;
+	__u16				gs;
+	__u16				fs;
+	__u16				__pad0;
+	__u64				err;
+	__u64				trapno;
+	__u64				oldmask;
+	__u64				cr2;
+
+	/*
+	 * fpstate is really (struct _fpstate *) or (struct _xstate *)
+	 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
+	 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
+	 * of extended memory layout. See comments at the definition of
+	 * (struct _fpx_sw_bytes)
+	 */
+	__u64				fpstate; /* Zero when no FPU/extended context */
+	__u64				reserved1[8];
+};
+
+/*
+ * Create the real 'struct sigcontext' type:
+ */
+#ifdef __KERNEL__
+# ifdef __i386__
+#  define sigcontext sigcontext_32
+# else
+#  define sigcontext sigcontext_64
+# endif
+#endif
+
+/*
+ * The old user-space sigcontext definition, just in case user-space still
+ * relies on it. The kernel definition (in asm/sigcontext.h) has unified
+ * field names but otherwise the same layout.
+ */
+#ifndef __KERNEL__
+
+#define _fpstate_ia32			_fpstate_32
+#define sigcontext_ia32			sigcontext_32
+
+
+# ifdef __i386__
+struct sigcontext {
+	__u16				gs, __gsh;
+	__u16				fs, __fsh;
+	__u16				es, __esh;
+	__u16				ds, __dsh;
+	__u32				edi;
+	__u32				esi;
+	__u32				ebp;
+	__u32				esp;
+	__u32				ebx;
+	__u32				edx;
+	__u32				ecx;
+	__u32				eax;
+	__u32				trapno;
+	__u32				err;
+	__u32				eip;
+	__u16				cs, __csh;
+	__u32				eflags;
+	__u32				esp_at_signal;
+	__u16				ss, __ssh;
+	struct _fpstate __user		*fpstate;
+	__u32				oldmask;
+	__u32				cr2;
 };
+# else /* __x86_64__: */
+struct sigcontext {
+	__u64				r8;
+	__u64				r9;
+	__u64				r10;
+	__u64				r11;
+	__u64				r12;
+	__u64				r13;
+	__u64				r14;
+	__u64				r15;
+	__u64				rdi;
+	__u64				rsi;
+	__u64				rbp;
+	__u64				rbx;
+	__u64				rdx;
+	__u64				rax;
+	__u64				rcx;
+	__u64				rsp;
+	__u64				rip;
+	__u64				eflags;		/* RFLAGS */
+	__u16				cs;
+	__u16				gs;
+	__u16				fs;
+	__u16				__pad0;
+	__u64				err;
+	__u64				trapno;
+	__u64				oldmask;
+	__u64				cr2;
+	struct _fpstate __user		*fpstate;	/* Zero when no FPU context */
+#  ifdef __ILP32__
+	__u32				__fpstate_pad;
+#  endif
+	__u64				reserved1[8];
+};
+# endif /* __x86_64__ */
+#endif /* !__KERNEL__ */
 
 #endif /* _UAPI_ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index ad1478c4ae12..a92b0f0dc09e 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,77 +1,8 @@
 #ifndef _ASM_X86_SIGCONTEXT32_H
 #define _ASM_X86_SIGCONTEXT32_H
 
-#include <linux/types.h>
+/* This is a legacy file - all the type definitions are in sigcontext.h: */
 
-/* signal context for 32bit programs. */
-
-#define X86_FXSR_MAGIC		0x0000
-
-struct _fpreg {
-	unsigned short significand[4];
-	unsigned short exponent;
-};
-
-struct _fpxreg {
-	unsigned short significand[4];
-	unsigned short exponent;
-	unsigned short padding[3];
-};
-
-struct _xmmreg {
-	__u32	element[4];
-};
-
-/* FSAVE frame with extensions */
-struct _fpstate_ia32 {
-	/* Regular FPU environment */
-	__u32 	cw;
-	__u32	sw;
-	__u32	tag;	/* not compatible to 64bit twd */
-	__u32	ipoff;
-	__u32	cssel;
-	__u32	dataoff;
-	__u32	datasel;
-	struct _fpreg	_st[8];
-	unsigned short	status;
-	unsigned short	magic;		/* 0xffff = regular FPU data only */
-
-	/* FXSR FPU environment */
-	__u32	_fxsr_env[6];
-	__u32	mxcsr;
-	__u32	reserved;
-	struct _fpxreg	_fxsr_st[8];
-	struct _xmmreg	_xmm[8];	/* It's actually 16 */
-	__u32	padding[44];
-	union {
-		__u32 padding2[12];
-		struct _fpx_sw_bytes sw_reserved;
-	};
-};
-
-struct sigcontext_ia32 {
-       unsigned short gs, __gsh;
-       unsigned short fs, __fsh;
-       unsigned short es, __esh;
-       unsigned short ds, __dsh;
-       unsigned int di;
-       unsigned int si;
-       unsigned int bp;
-       unsigned int sp;
-       unsigned int bx;
-       unsigned int dx;
-       unsigned int cx;
-       unsigned int ax;
-       unsigned int trapno;
-       unsigned int err;
-       unsigned int ip;
-       unsigned short cs, __csh;
-       unsigned int flags;
-       unsigned int sp_at_signal;
-       unsigned short ss, __ssh;
-       unsigned int fpstate;		/* really (struct _fpstate_ia32 *) */
-       unsigned int oldmask;
-       unsigned int cr2;
-};
+#include <asm/sigcontext.h>
 
 #endif /* _ASM_X86_SIGCONTEXT32_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee272618f..5b15d94a33f8 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -78,6 +78,7 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
+#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -126,7 +127,8 @@
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
+	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ded848c20e05..e75907601a41 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -976,6 +976,8 @@ static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count;
 	int x2count = 0;
+	int ret;
+	struct acpi_subtable_proc madt_proc[2];
 
 	if (!cpu_has_apic)
 		return -ENODEV;
@@ -999,10 +1001,22 @@ static int __init acpi_parse_madt_lapic_entries(void)
 				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
 	if (!count) {
-		x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-					acpi_parse_x2apic, MAX_LOCAL_APIC);
-		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-					acpi_parse_lapic, MAX_LOCAL_APIC);
+		memset(madt_proc, 0, sizeof(madt_proc));
+		madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
+		madt_proc[0].handler = acpi_parse_lapic;
+		madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
+		madt_proc[1].handler = acpi_parse_x2apic;
+		ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
+				sizeof(struct acpi_table_madt),
+				madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
+		if (ret < 0) {
+			printk(KERN_ERR PREFIX
+					"Error parsing LAPIC/X2APIC entries\n");
+			return ret;
+		}
+
+		x2count = madt_proc[0].count;
+		count = madt_proc[1].count;
 	}
 	if (!count && !x2count) {
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24e94ce454e2..2f69e3b184f6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1431,7 +1431,7 @@ enum {
 };
 static int x2apic_state;
 
-static inline void __x2apic_disable(void)
+static void __x2apic_disable(void)
 {
 	u64 msr;
 
@@ -1447,7 +1447,7 @@ static inline void __x2apic_disable(void)
 	printk_once(KERN_INFO "x2apic disabled\n");
 }
 
-static inline void __x2apic_enable(void)
+static void __x2apic_enable(void)
 {
 	u64 msr;
 
@@ -1807,7 +1807,7 @@ int apic_version[MAX_LOCAL_APIC];
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static inline void __smp_spurious_interrupt(u8 vector)
+static void __smp_spurious_interrupt(u8 vector)
 {
 	u32 v;
 
@@ -1848,7 +1848,7 @@ __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-static inline void __smp_error_interrupt(struct pt_regs *regs)
+static void __smp_error_interrupt(struct pt_regs *regs)
 {
 	u32 v;
 	u32 i = 0;
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index b548fd3b764b..38dd5efdd04c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -11,30 +11,21 @@
  *
  */
 
-#include <linux/errno.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ctype.h>
 #include <linux/init.h>
-#include <linux/hardirq.h>
-#include <linux/delay.h>
 
 #include <asm/numachip/numachip.h>
 #include <asm/numachip/numachip_csr.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
 #include <asm/ipi.h>
 #include <asm/apic_flat_64.h>
 #include <asm/pgtable.h>
+#include <asm/pci_x86.h>
 
-static int numachip_system __read_mostly;
+u8 numachip_system __read_mostly;
+static const struct apic apic_numachip1;
+static const struct apic apic_numachip2;
+static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly;
 
-static const struct apic apic_numachip;
-
-static unsigned int get_apic_id(unsigned long x)
+static unsigned int numachip1_get_apic_id(unsigned long x)
 {
 	unsigned long value;
 	unsigned int id = (x >> 24) & 0xff;
@@ -47,7 +38,7 @@ static unsigned int get_apic_id(unsigned long x)
 	return id;
 }
 
-static unsigned long set_apic_id(unsigned int id)
+static unsigned long numachip1_set_apic_id(unsigned int id)
 {
 	unsigned long x;
 
@@ -55,9 +46,17 @@ static unsigned long set_apic_id(unsigned int id)
 	return x;
 }
 
-static unsigned int read_xapic_id(void)
+static unsigned int numachip2_get_apic_id(unsigned long x)
+{
+	u64 mcfg;
+
+	rdmsrl(MSR_FAM10H_MMIO_CONF_BASE, mcfg);
+	return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
+}
+
+static unsigned long numachip2_set_apic_id(unsigned int id)
 {
-	return get_apic_id(apic_read(APIC_ID));
+	return id << 24;
 }
 
 static int numachip_apic_id_valid(int apicid)
@@ -68,7 +67,7 @@ static int numachip_apic_id_valid(int apicid)
 
 static int numachip_apic_id_registered(void)
 {
-	return physid_isset(read_xapic_id(), phys_cpu_present_map);
+	return 1;
 }
 
 static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
@@ -76,36 +75,48 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
 	return initial_apic_id >> index_msb;
 }
 
-static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static void numachip1_apic_icr_write(int apicid, unsigned int val)
 {
-	union numachip_csr_g3_ext_irq_gen int_gen;
-
-	int_gen.s._destination_apic_id = phys_apicid;
-	int_gen.s._vector = 0;
-	int_gen.s._msgtype = APIC_DM_INIT >> 8;
-	int_gen.s._index = 0;
-
-	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val);
+}
 
-	int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
-	int_gen.s._vector = start_rip >> 12;
+static void numachip2_apic_icr_write(int apicid, unsigned int val)
+{
+	numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val);
+}
 
-	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+	numachip_apic_icr_write(phys_apicid, APIC_DM_INIT);
+	numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP |
+		(start_rip >> 12));
 
 	return 0;
 }
 
 static void numachip_send_IPI_one(int cpu, int vector)
 {
-	union numachip_csr_g3_ext_irq_gen int_gen;
-	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
-
-	int_gen.s._destination_apic_id = apicid;
-	int_gen.s._vector = vector;
-	int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
-	int_gen.s._index = 0;
+	int local_apicid, apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	unsigned int dmode;
+
+	preempt_disable();
+	local_apicid = __this_cpu_read(x86_cpu_to_apicid);
+
+	/* Send via local APIC where non-local part matches */
+	if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		__default_send_IPI_dest_field(apicid, vector,
+			APIC_DEST_PHYSICAL);
+		local_irq_restore(flags);
+		preempt_enable();
+		return;
+	}
+	preempt_enable();
 
-	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+	dmode = (vector == NMI_VECTOR) ? APIC_DM_NMI : APIC_DM_FIXED;
+	numachip_apic_icr_write(apicid, dmode | vector);
 }
 
 static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
@@ -149,9 +160,14 @@ static void numachip_send_IPI_self(int vector)
 	apic_write(APIC_SELF_IPI, vector);
 }
 
-static int __init numachip_probe(void)
+static int __init numachip1_probe(void)
 {
-	return apic == &apic_numachip;
+	return apic == &apic_numachip1;
+}
+
+static int __init numachip2_probe(void)
+{
+	return apic == &apic_numachip2;
 }
 
 static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
@@ -172,34 +188,118 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 
 static int __init numachip_system_init(void)
 {
-	if (!numachip_system)
+	/* Map the LCSR area and set up the apic_icr_write function */
+	switch (numachip_system) {
+	case 1:
+		init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+		numachip_apic_icr_write = numachip1_apic_icr_write;
+		x86_init.pci.arch_init = pci_numachip_init;
+		break;
+	case 2:
+		init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE);
+		numachip_apic_icr_write = numachip2_apic_icr_write;
+
+		/* Use MCFG config cycles rather than locked CF8 cycles */
+		raw_pci_ops = &pci_mmcfg;
+		break;
+	default:
 		return 0;
-
-	init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
-	init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+	}
 
 	x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
-	x86_init.pci.arch_init = pci_numachip_init;
 
 	return 0;
 }
 early_initcall(numachip_system_init);
 
-static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int numachip1_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-	if (!strncmp(oem_id, "NUMASC", 6)) {
-		numachip_system = 1;
-		return 1;
-	}
+	if ((strncmp(oem_id, "NUMASC", 6) != 0) ||
+	    (strncmp(oem_table_id, "NCONNECT", 8) != 0))
+		return 0;
 
-	return 0;
+	numachip_system = 1;
+
+	return 1;
+}
+
+static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if ((strncmp(oem_id, "NUMASC", 6) != 0) ||
+	    (strncmp(oem_table_id, "NCONECT2", 8) != 0))
+		return 0;
+
+	numachip_system = 2;
+
+	return 1;
+}
+
+/* APIC IPIs are queued */
+static void numachip_apic_wait_icr_idle(void)
+{
 }
 
-static const struct apic apic_numachip __refconst = {
+/* APIC NMI IPIs are queued */
+static u32 numachip_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
 
+static const struct apic apic_numachip1 __refconst = {
 	.name				= "NumaConnect system",
-	.probe				= numachip_probe,
-	.acpi_madt_oem_check		= numachip_acpi_madt_oem_check,
+	.probe				= numachip1_probe,
+	.acpi_madt_oem_check		= numachip1_acpi_madt_oem_check,
+	.apic_id_valid			= numachip_apic_id_valid,
+	.apic_id_registered		= numachip_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= 0,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	.init_apic_ldr			= flat_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= numachip_phys_pkg_id,
+
+	.get_apic_id			= numachip1_get_apic_id,
+	.set_apic_id			= numachip1_set_apic_id,
+	.apic_id_mask			= 0xffU << 24,
+
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= numachip_send_IPI_mask,
+	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= numachip_send_IPI_allbutself,
+	.send_IPI_all			= numachip_send_IPI_all,
+	.send_IPI_self			= numachip_send_IPI_self,
+
+	.wakeup_secondary_cpu		= numachip_wakeup_secondary,
+	.inquire_remote_apic		= NULL, /* REMRD not supported */
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
+};
+
+apic_driver(apic_numachip1);
+
+static const struct apic apic_numachip2 __refconst = {
+	.name				= "NumaConnect2 system",
+	.probe				= numachip2_probe,
+	.acpi_madt_oem_check		= numachip2_acpi_madt_oem_check,
 	.apic_id_valid			= numachip_apic_id_valid,
 	.apic_id_registered		= numachip_apic_id_registered,
 
@@ -221,8 +321,8 @@ static const struct apic apic_numachip __refconst = {
 	.check_phys_apicid_present	= default_check_phys_apicid_present,
 	.phys_pkg_id			= numachip_phys_pkg_id,
 
-	.get_apic_id			= get_apic_id,
-	.set_apic_id			= set_apic_id,
+	.get_apic_id			= numachip2_get_apic_id,
+	.set_apic_id			= numachip2_set_apic_id,
 	.apic_id_mask			= 0xffU << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
@@ -241,8 +341,8 @@ static const struct apic apic_numachip __refconst = {
 	.eoi_write			= native_apic_mem_write,
 	.icr_read			= native_apic_icr_read,
 	.icr_write			= native_apic_icr_write,
-	.wait_icr_idle			= native_apic_wait_icr_idle,
-	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+	.wait_icr_idle			= numachip_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= numachip_safe_apic_wait_icr_idle,
 };
-apic_driver(apic_numachip);
 
+apic_driver(apic_numachip2);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c60bb162622..f25321894ad2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -529,7 +529,7 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector)
 	}
 }
 
-void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
+static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
 {
 	unsigned long flags;
 	struct irq_pin_list *entry;
@@ -2547,7 +2547,9 @@ void __init setup_ioapic_dest(void)
 			mask = apic->target_cpus();
 
 		chip = irq_data_get_irq_chip(idata);
-		chip->irq_set_affinity(idata, mask, false);
+		/* Might be lapic_chip for irq 0 */
+		if (chip->irq_set_affinity)
+			chip->irq_set_affinity(idata, mask, false);
 	}
 }
 #endif
@@ -2907,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 	struct irq_data *irq_data;
 	struct mp_chip_data *data;
 	struct irq_alloc_info *info = arg;
+	unsigned long flags;
 
 	if (!info || nr_irqs > 1)
 		return -EINVAL;
@@ -2939,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 
 	cfg = irqd_cfg(irq_data);
 	add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+
+	local_irq_save(flags);
 	if (info->ioapic_entry)
 		mp_setup_entry(cfg, data, info->ioapic_entry);
 	mp_register_handler(virq, data->trigger);
 	if (virq < nr_legacy_irqs())
 		legacy_pic->mask(virq);
+	local_irq_restore(flags);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8e3d22a1af94..439df975bc7a 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -43,18 +43,15 @@ void common(void) {
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 	BLANK();
-	OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax);
-	OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx);
-	OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx);
-	OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx);
-	OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si);
-	OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di);
-	OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp);
-	OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp);
-	OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip);
-
-	BLANK();
-	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+	OFFSET(IA32_SIGCONTEXT_ax, sigcontext_32, ax);
+	OFFSET(IA32_SIGCONTEXT_bx, sigcontext_32, bx);
+	OFFSET(IA32_SIGCONTEXT_cx, sigcontext_32, cx);
+	OFFSET(IA32_SIGCONTEXT_dx, sigcontext_32, dx);
+	OFFSET(IA32_SIGCONTEXT_si, sigcontext_32, si);
+	OFFSET(IA32_SIGCONTEXT_di, sigcontext_32, di);
+	OFFSET(IA32_SIGCONTEXT_bp, sigcontext_32, bp);
+	OFFSET(IA32_SIGCONTEXT_sp, sigcontext_32, sp);
+	OFFSET(IA32_SIGCONTEXT_ip, sigcontext_32, ip);
 
 	BLANK();
 	OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c6bed2..58031303e304 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_rapl.o perf_event_intel_cqm.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_pt.o perf_event_intel_bts.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_cstate.o
 
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
 					   perf_event_intel_uncore_snb.o \
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index de22ea7ff82f..4ddd780aeac9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -670,6 +670,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
+		c->x86_capability[13] = cpuid_ebx(0x80000008);
 	}
 #ifdef CONFIG_X86_32
 	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index be4febc58b94..e38d338a6447 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -157,7 +157,7 @@ struct _cpuid4_info_regs {
 	struct amd_northbridge *nb;
 };
 
-unsigned short			num_cache_leaves;
+static unsigned short num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
    information to the user.  This makes some assumptions about the machine:
@@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb)
  *
  * @returns: the disabled index if used or negative value if slot free.
  */
-int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
+static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 {
 	unsigned int reg = 0;
 
@@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
  *
  * @return: 0 on success, error status on failure
  */
-int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
-			    unsigned long index)
+static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
+			    unsigned slot, unsigned long index)
 {
 	int ret = 0;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9d014b82a124..c5b0d562dbf5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1586,6 +1586,8 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
 		winchip_mcheck_init(c);
 		return 1;
 		break;
+	default:
+		return 0;
 	}
 
 	return 0;
@@ -1605,6 +1607,8 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 		mce_amd_feature_init(c);
 		mce_flags.overflow_recov = !!(ebx & BIT(0));
 		mce_flags.succor	 = !!(ebx & BIT(1));
+		mce_flags.smca		 = !!(ebx & BIT(3));
+
 		break;
 		}
 
@@ -2042,7 +2046,7 @@ int __init mcheck_init(void)
  * Disable machine checks on suspend and shutdown. We can't really handle
  * them later.
  */
-static int mce_disable_error_reporting(void)
+static void mce_disable_error_reporting(void)
 {
 	int i;
 
@@ -2052,17 +2056,32 @@ static int mce_disable_error_reporting(void)
 		if (b->init)
 			wrmsrl(MSR_IA32_MCx_CTL(i), 0);
 	}
-	return 0;
+	return;
+}
+
+static void vendor_disable_error_reporting(void)
+{
+	/*
+	 * Don't clear on Intel CPUs. Some of these MSRs are socket-wide.
+	 * Disabling them for just a single offlined CPU is bad, since it will
+	 * inhibit reporting for all shared resources on the socket like the
+	 * last level cache (LLC), the integrated memory controller (iMC), etc.
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		return;
+
+	mce_disable_error_reporting();
 }
 
 static int mce_syscore_suspend(void)
 {
-	return mce_disable_error_reporting();
+	vendor_disable_error_reporting();
+	return 0;
 }
 
 static void mce_syscore_shutdown(void)
 {
-	mce_disable_error_reporting();
+	vendor_disable_error_reporting();
 }
 
 /*
@@ -2342,19 +2361,14 @@ static void mce_device_remove(unsigned int cpu)
 static void mce_disable_cpu(void *h)
 {
 	unsigned long action = *(unsigned long *)h;
-	int i;
 
 	if (!mce_available(raw_cpu_ptr(&cpu_info)))
 		return;
 
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_clear();
-	for (i = 0; i < mca_cfg.banks; i++) {
-		struct mce_bank *b = &mce_banks[i];
 
-		if (b->init)
-			wrmsrl(MSR_IA32_MCx_CTL(i), 0);
-	}
+	vendor_disable_error_reporting();
 }
 
 static void mce_reenable_cpu(void *h)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 1af51b1586d7..2c5aaf8c2e2f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -503,14 +503,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 		return;
 	}
 
-	/* Check whether a vector already exists */
-	if (h & APIC_VECTOR_MASK) {
-		printk(KERN_DEBUG
-		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
-		       cpu, (h & APIC_VECTOR_MASK));
-		return;
-	}
-
 	/* early Pentium M models use different method for enabling TM2 */
 	if (cpu_has(c, X86_FEATURE_TM2)) {
 		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile
index 285c85427c32..220b1a508513 100644
--- a/arch/x86/kernel/cpu/microcode/Makefile
+++ b/arch/x86/kernel/cpu/microcode/Makefile
@@ -2,6 +2,3 @@ microcode-y				:= core.o
 obj-$(CONFIG_MICROCODE)			+= microcode.o
 microcode-$(CONFIG_MICROCODE_INTEL)	+= intel.o intel_lib.o
 microcode-$(CONFIG_MICROCODE_AMD)	+= amd.o
-obj-$(CONFIG_MICROCODE_EARLY)		+= core_early.o
-obj-$(CONFIG_MICROCODE_INTEL_EARLY)	+= intel_early.o
-obj-$(CONFIG_MICROCODE_AMD_EARLY)	+= amd_early.o
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 12829c3ced3c..2233f8a76615 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -1,5 +1,9 @@
 /*
  *  AMD CPU Microcode Update Driver for Linux
+ *
+ *  This driver allows to upgrade microcode on F10h AMD
+ *  CPUs and later.
+ *
  *  Copyright (C) 2008-2011 Advanced Micro Devices Inc.
  *
  *  Author: Peter Oruba <peter.oruba@amd.com>
@@ -7,34 +11,31 @@
  *  Based on work by:
  *  Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
  *
- *  Maintainers:
- *  Andreas Herrmann <herrmann.der.user@googlemail.com>
- *  Borislav Petkov <bp@alien8.de>
+ *  early loader:
+ *  Copyright (C) 2013 Advanced Micro Devices, Inc.
  *
- *  This driver allows to upgrade microcode on F10h AMD
- *  CPUs and later.
+ *  Author: Jacob Shin <jacob.shin@amd.com>
+ *  Fixes: Borislav Petkov <bp@suse.de>
  *
  *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
  */
+#define pr_fmt(fmt) "microcode: " fmt
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
+#include <linux/earlycpio.h>
 #include <linux/firmware.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/initrd.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/pci.h>
 
+#include <asm/microcode_amd.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/cpu.h>
 #include <asm/msr.h>
-#include <asm/microcode_amd.h>
-
-MODULE_DESCRIPTION("AMD Microcode Update Driver");
-MODULE_AUTHOR("Peter Oruba");
-MODULE_LICENSE("GPL v2");
 
 static struct equiv_cpu_entry *equiv_cpu_table;
 
@@ -47,6 +48,432 @@ struct ucode_patch {
 
 static LIST_HEAD(pcache);
 
+/*
+ * This points to the current valid container of microcode patches which we will
+ * save from the initrd before jettisoning its contents.
+ */
+static u8 *container;
+static size_t container_size;
+
+static u32 ucode_new_rev;
+u8 amd_ucode_patch[PATCH_MAX_SIZE];
+static u16 this_equiv_id;
+
+static struct cpio_data ucode_cpio;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+	long offset = 0;
+	char *path;
+	void *start;
+	size_t size;
+
+#ifdef CONFIG_X86_32
+	struct boot_params *p;
+
+	/*
+	 * On 32-bit, early load occurs before paging is turned on so we need
+	 * to use physical addresses.
+	 */
+	p       = (struct boot_params *)__pa_nodebug(&boot_params);
+	path    = (char *)__pa_nodebug(ucode_path);
+	start   = (void *)p->hdr.ramdisk_image;
+	size    = p->hdr.ramdisk_size;
+#else
+	path    = ucode_path;
+	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+	size    = boot_params.hdr.ramdisk_size;
+#endif
+
+	return find_cpio_data(path, start, size, &offset);
+}
+
+static size_t compute_container_size(u8 *data, u32 total_size)
+{
+	size_t size = 0;
+	u32 *header = (u32 *)data;
+
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return size;
+
+	size = header[2] + CONTAINER_HDR_SZ;
+	total_size -= size;
+	data += size;
+
+	while (total_size) {
+		u16 patch_size;
+
+		header = (u32 *)data;
+
+		if (header[0] != UCODE_UCODE_TYPE)
+			break;
+
+		/*
+		 * Sanity-check patch size.
+		 */
+		patch_size = header[1];
+		if (patch_size > PATCH_MAX_SIZE)
+			break;
+
+		size	   += patch_size + SECTION_HDR_SIZE;
+		data	   += patch_size + SECTION_HDR_SIZE;
+		total_size -= patch_size + SECTION_HDR_SIZE;
+	}
+
+	return size;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
+{
+	struct equiv_cpu_entry *eq;
+	size_t *cont_sz;
+	u32 *header;
+	u8  *data, **cont;
+	u8 (*patch)[PATCH_MAX_SIZE];
+	u16 eq_id = 0;
+	int offset, left;
+	u32 rev, eax, ebx, ecx, edx;
+	u32 *new_rev;
+
+#ifdef CONFIG_X86_32
+	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+	cont_sz = (size_t *)__pa_nodebug(&container_size);
+	cont	= (u8 **)__pa_nodebug(&container);
+	patch	= (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
+#else
+	new_rev = &ucode_new_rev;
+	cont_sz = &container_size;
+	cont	= &container;
+	patch	= &amd_ucode_patch;
+#endif
+
+	data   = ucode;
+	left   = size;
+	header = (u32 *)data;
+
+	/* find equiv cpu table */
+	if (header[0] != UCODE_MAGIC ||
+	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+	    header[2] == 0)                            /* size */
+		return;
+
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	while (left > 0) {
+		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+		*cont = data;
+
+		/* Advance past the container header */
+		offset = header[2] + CONTAINER_HDR_SZ;
+		data  += offset;
+		left  -= offset;
+
+		eq_id = find_equiv_id(eq, eax);
+		if (eq_id) {
+			this_equiv_id = eq_id;
+			*cont_sz = compute_container_size(*cont, left + offset);
+
+			/*
+			 * truncate how much we need to iterate over in the
+			 * ucode update loop below
+			 */
+			left = *cont_sz - offset;
+			break;
+		}
+
+		/*
+		 * support multiple container files appended together. if this
+		 * one does not have a matching equivalent cpu entry, we fast
+		 * forward to the next container file.
+		 */
+		while (left > 0) {
+			header = (u32 *)data;
+			if (header[0] == UCODE_MAGIC &&
+			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+				break;
+
+			offset = header[1] + SECTION_HDR_SIZE;
+			data  += offset;
+			left  -= offset;
+		}
+
+		/* mark where the next microcode container file starts */
+		offset    = data - (u8 *)ucode;
+		ucode     = data;
+	}
+
+	if (!eq_id) {
+		*cont = NULL;
+		*cont_sz = 0;
+		return;
+	}
+
+	if (check_current_patch_level(&rev, true))
+		return;
+
+	while (left > 0) {
+		struct microcode_amd *mc;
+
+		header = (u32 *)data;
+		if (header[0] != UCODE_UCODE_TYPE || /* type */
+		    header[1] == 0)                  /* size */
+			break;
+
+		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+
+		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+
+			if (!__apply_microcode_amd(mc)) {
+				rev = mc->hdr.patch_id;
+				*new_rev = rev;
+
+				if (save_patch)
+					memcpy(patch, mc,
+					       min_t(u32, header[1], PATCH_MAX_SIZE));
+			}
+		}
+
+		offset  = header[1] + SECTION_HDR_SIZE;
+		data   += offset;
+		left   -= offset;
+	}
+}
+
+static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
+					      unsigned int family)
+{
+#ifdef CONFIG_X86_64
+	char fw_name[36] = "amd-ucode/microcode_amd.bin";
+
+	if (family >= 0x15)
+		snprintf(fw_name, sizeof(fw_name),
+			 "amd-ucode/microcode_amd_fam%.2xh.bin", family);
+
+	return get_builtin_firmware(cp, fw_name);
+#else
+	return false;
+#endif
+}
+
+void __init load_ucode_amd_bsp(unsigned int family)
+{
+	struct cpio_data cp;
+	void **data;
+	size_t *size;
+
+#ifdef CONFIG_X86_32
+	data =  (void **)__pa_nodebug(&ucode_cpio.data);
+	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+#else
+	data = &ucode_cpio.data;
+	size = &ucode_cpio.size;
+#endif
+
+	cp = find_ucode_in_initrd();
+	if (!cp.data) {
+		if (!load_builtin_amd_microcode(&cp, family))
+			return;
+	}
+
+	*data = cp.data;
+	*size = cp.size;
+
+	apply_ucode_in_initrd(cp.data, cp.size, true);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
+ * which is used upon resume from suspend.
+ */
+void load_ucode_amd_ap(void)
+{
+	struct microcode_amd *mc;
+	size_t *usize;
+	void **ucode;
+
+	mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
+	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+		__apply_microcode_amd(mc);
+		return;
+	}
+
+	ucode = (void *)__pa_nodebug(&container);
+	usize = (size_t *)__pa_nodebug(&container_size);
+
+	if (!*ucode || !*usize)
+		return;
+
+	apply_ucode_in_initrd(*ucode, *usize, false);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+	unsigned int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+
+static void __init get_bsp_sig(void)
+{
+	unsigned int bsp = boot_cpu_data.cpu_index;
+	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+	if (!uci->cpu_sig.sig)
+		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+}
+#else
+void load_ucode_amd_ap(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct equiv_cpu_entry *eq;
+	struct microcode_amd *mc;
+	u32 rev, eax;
+	u16 eq_id;
+
+	/* Exit if called on the BSP. */
+	if (!cpu)
+		return;
+
+	if (!container)
+		return;
+
+	/*
+	 * 64-bit runs with paging enabled, thus early==false.
+	 */
+	if (check_current_patch_level(&rev, false))
+		return;
+
+	eax = cpuid_eax(0x00000001);
+	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+
+	eq_id = find_equiv_id(eq, eax);
+	if (!eq_id)
+		return;
+
+	if (eq_id == this_equiv_id) {
+		mc = (struct microcode_amd *)amd_ucode_patch;
+
+		if (mc && rev < mc->hdr.patch_id) {
+			if (!__apply_microcode_amd(mc))
+				ucode_new_rev = mc->hdr.patch_id;
+		}
+
+	} else {
+		if (!ucode_cpio.data)
+			return;
+
+		/*
+		 * AP has a different equivalence ID than BSP, looks like
+		 * mixed-steppings silicon so go through the ucode blob anew.
+		 */
+		apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
+	}
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+	unsigned long cont;
+	int retval = 0;
+	enum ucode_state ret;
+	u8 *cont_va;
+	u32 eax;
+
+	if (!container)
+		return -EINVAL;
+
+#ifdef CONFIG_X86_32
+	get_bsp_sig();
+	cont	= (unsigned long)container;
+	cont_va = __va(container);
+#else
+	/*
+	 * We need the physical address of the container for both bitness since
+	 * boot_params.hdr.ramdisk_image is a physical address.
+	 */
+	cont    = __pa(container);
+	cont_va = container;
+#endif
+
+	/*
+	 * Take into account the fact that the ramdisk might get relocated and
+	 * therefore we need to recompute the container's position in virtual
+	 * memory space.
+	 */
+	if (relocated_ramdisk)
+		container = (u8 *)(__va(relocated_ramdisk) +
+			     (cont - boot_params.hdr.ramdisk_image));
+	else
+		container = cont_va;
+
+	if (ucode_new_rev)
+		pr_info("microcode: updated early to new patch_level=0x%08x\n",
+			ucode_new_rev);
+
+	eax   = cpuid_eax(0x00000001);
+	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+	ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
+	if (ret != UCODE_OK)
+		retval = -EINVAL;
+
+	/*
+	 * This will be freed any msec now, stash patches for the current
+	 * family and switch to patch cache for cpu hotplug, etc later.
+	 */
+	container = NULL;
+	container_size = 0;
+
+	return retval;
+}
+
+void reload_ucode_amd(void)
+{
+	struct microcode_amd *mc;
+	u32 rev;
+
+	/*
+	 * early==false because this is a syscore ->resume path and by
+	 * that time paging is long enabled.
+	 */
+	if (check_current_patch_level(&rev, false))
+		return;
+
+	mc = (struct microcode_amd *)amd_ucode_patch;
+
+	if (mc && rev < mc->hdr.patch_id) {
+		if (!__apply_microcode_amd(mc)) {
+			ucode_new_rev = mc->hdr.patch_id;
+			pr_info("microcode: reload patch_level=0x%08x\n",
+				ucode_new_rev);
+		}
+	}
+}
 static u16 __find_equiv_id(unsigned int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -177,6 +604,53 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
 	return patch_size;
 }
 
+/*
+ * Those patch levels cannot be updated to newer ones and thus should be final.
+ */
+static u32 final_levels[] = {
+	0x01000098,
+	0x0100009f,
+	0x010000af,
+	0, /* T-101 terminator */
+};
+
+/*
+ * Check the current patch level on this CPU.
+ *
+ * @rev: Use it to return the patch level. It is set to 0 in the case of
+ * error.
+ *
+ * Returns:
+ *  - true: if update should stop
+ *  - false: otherwise
+ */
+bool check_current_patch_level(u32 *rev, bool early)
+{
+	u32 lvl, dummy, i;
+	bool ret = false;
+	u32 *levels;
+
+	native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
+
+	if (IS_ENABLED(CONFIG_X86_32) && early)
+		levels = (u32 *)__pa_nodebug(&final_levels);
+	else
+		levels = final_levels;
+
+	for (i = 0; levels[i]; i++) {
+		if (lvl == levels[i]) {
+			lvl = 0;
+			ret = true;
+			break;
+		}
+	}
+
+	if (rev)
+		*rev = lvl;
+
+	return ret;
+}
+
 int __apply_microcode_amd(struct microcode_amd *mc_amd)
 {
 	u32 rev, dummy;
@@ -197,7 +671,7 @@ int apply_microcode_amd(int cpu)
 	struct microcode_amd *mc_amd;
 	struct ucode_cpu_info *uci;
 	struct ucode_patch *p;
-	u32 rev, dummy;
+	u32 rev;
 
 	BUG_ON(raw_smp_processor_id() != cpu);
 
@@ -210,7 +684,8 @@ int apply_microcode_amd(int cpu)
 	mc_amd  = p->data;
 	uci->mc = p->data;
 
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+	if (check_current_patch_level(&rev, false))
+		return -1;
 
 	/* need to apply patch? */
 	if (rev >= mc_amd->hdr.patch_id) {
@@ -387,7 +862,7 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s
 	if (ret != UCODE_OK)
 		cleanup();
 
-#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+#ifdef CONFIG_X86_32
 	/* save BSP's matching patch for early load */
 	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
 		struct ucode_patch *p = find_patch(cpu);
@@ -475,7 +950,7 @@ static struct microcode_ops microcode_amd_ops = {
 
 struct microcode_ops * __init init_amd_microcode(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(0);
+	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
 		pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
deleted file mode 100644
index e8a215a9a345..000000000000
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- * Fixes: Borislav Petkov <bp@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-#include <asm/cpu.h>
-#include <asm/setup.h>
-#include <asm/microcode_amd.h>
-
-/*
- * This points to the current valid container of microcode patches which we will
- * save from the initrd before jettisoning its contents.
- */
-static u8 *container;
-static size_t container_size;
-
-static u32 ucode_new_rev;
-u8 amd_ucode_patch[PATCH_MAX_SIZE];
-static u16 this_equiv_id;
-
-static struct cpio_data ucode_cpio;
-
-/*
- * Microcode patch container file is prepended to the initrd in cpio format.
- * See Documentation/x86/early-microcode.txt
- */
-static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
-
-static struct cpio_data __init find_ucode_in_initrd(void)
-{
-	long offset = 0;
-	char *path;
-	void *start;
-	size_t size;
-
-#ifdef CONFIG_X86_32
-	struct boot_params *p;
-
-	/*
-	 * On 32-bit, early load occurs before paging is turned on so we need
-	 * to use physical addresses.
-	 */
-	p       = (struct boot_params *)__pa_nodebug(&boot_params);
-	path    = (char *)__pa_nodebug(ucode_path);
-	start   = (void *)p->hdr.ramdisk_image;
-	size    = p->hdr.ramdisk_size;
-#else
-	path    = ucode_path;
-	start   = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
-	size    = boot_params.hdr.ramdisk_size;
-#endif
-
-	return find_cpio_data(path, start, size, &offset);
-}
-
-static size_t compute_container_size(u8 *data, u32 total_size)
-{
-	size_t size = 0;
-	u32 *header = (u32 *)data;
-
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return size;
-
-	size = header[2] + CONTAINER_HDR_SZ;
-	total_size -= size;
-	data += size;
-
-	while (total_size) {
-		u16 patch_size;
-
-		header = (u32 *)data;
-
-		if (header[0] != UCODE_UCODE_TYPE)
-			break;
-
-		/*
-		 * Sanity-check patch size.
-		 */
-		patch_size = header[1];
-		if (patch_size > PATCH_MAX_SIZE)
-			break;
-
-		size	   += patch_size + SECTION_HDR_SIZE;
-		data	   += patch_size + SECTION_HDR_SIZE;
-		total_size -= patch_size + SECTION_HDR_SIZE;
-	}
-
-	return size;
-}
-
-/*
- * Early load occurs before we can vmalloc(). So we look for the microcode
- * patch container file in initrd, traverse equivalent cpu table, look for a
- * matching microcode patch, and update, all in initrd memory in place.
- * When vmalloc() is available for use later -- on 64-bit during first AP load,
- * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
- * load_microcode_amd() to save equivalent cpu table and microcode patches in
- * kernel heap memory.
- */
-static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
-{
-	struct equiv_cpu_entry *eq;
-	size_t *cont_sz;
-	u32 *header;
-	u8  *data, **cont;
-	u8 (*patch)[PATCH_MAX_SIZE];
-	u16 eq_id = 0;
-	int offset, left;
-	u32 rev, eax, ebx, ecx, edx;
-	u32 *new_rev;
-
-#ifdef CONFIG_X86_32
-	new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-	cont_sz = (size_t *)__pa_nodebug(&container_size);
-	cont	= (u8 **)__pa_nodebug(&container);
-	patch	= (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
-#else
-	new_rev = &ucode_new_rev;
-	cont_sz = &container_size;
-	cont	= &container;
-	patch	= &amd_ucode_patch;
-#endif
-
-	data   = ucode;
-	left   = size;
-	header = (u32 *)data;
-
-	/* find equiv cpu table */
-	if (header[0] != UCODE_MAGIC ||
-	    header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
-	    header[2] == 0)                            /* size */
-		return;
-
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	while (left > 0) {
-		eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
-
-		*cont = data;
-
-		/* Advance past the container header */
-		offset = header[2] + CONTAINER_HDR_SZ;
-		data  += offset;
-		left  -= offset;
-
-		eq_id = find_equiv_id(eq, eax);
-		if (eq_id) {
-			this_equiv_id = eq_id;
-			*cont_sz = compute_container_size(*cont, left + offset);
-
-			/*
-			 * truncate how much we need to iterate over in the
-			 * ucode update loop below
-			 */
-			left = *cont_sz - offset;
-			break;
-		}
-
-		/*
-		 * support multiple container files appended together. if this
-		 * one does not have a matching equivalent cpu entry, we fast
-		 * forward to the next container file.
-		 */
-		while (left > 0) {
-			header = (u32 *)data;
-			if (header[0] == UCODE_MAGIC &&
-			    header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
-				break;
-
-			offset = header[1] + SECTION_HDR_SIZE;
-			data  += offset;
-			left  -= offset;
-		}
-
-		/* mark where the next microcode container file starts */
-		offset    = data - (u8 *)ucode;
-		ucode     = data;
-	}
-
-	if (!eq_id) {
-		*cont = NULL;
-		*cont_sz = 0;
-		return;
-	}
-
-	/* find ucode and update if needed */
-
-	native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
-	while (left > 0) {
-		struct microcode_amd *mc;
-
-		header = (u32 *)data;
-		if (header[0] != UCODE_UCODE_TYPE || /* type */
-		    header[1] == 0)                  /* size */
-			break;
-
-		mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
-
-		if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
-
-			if (!__apply_microcode_amd(mc)) {
-				rev = mc->hdr.patch_id;
-				*new_rev = rev;
-
-				if (save_patch)
-					memcpy(patch, mc,
-					       min_t(u32, header[1], PATCH_MAX_SIZE));
-			}
-		}
-
-		offset  = header[1] + SECTION_HDR_SIZE;
-		data   += offset;
-		left   -= offset;
-	}
-}
-
-static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
-					      unsigned int family)
-{
-#ifdef CONFIG_X86_64
-	char fw_name[36] = "amd-ucode/microcode_amd.bin";
-
-	if (family >= 0x15)
-		snprintf(fw_name, sizeof(fw_name),
-			 "amd-ucode/microcode_amd_fam%.2xh.bin", family);
-
-	return get_builtin_firmware(cp, fw_name);
-#else
-	return false;
-#endif
-}
-
-void __init load_ucode_amd_bsp(unsigned int family)
-{
-	struct cpio_data cp;
-	void **data;
-	size_t *size;
-
-#ifdef CONFIG_X86_32
-	data =  (void **)__pa_nodebug(&ucode_cpio.data);
-	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
-#else
-	data = &ucode_cpio.data;
-	size = &ucode_cpio.size;
-#endif
-
-	cp = find_ucode_in_initrd();
-	if (!cp.data) {
-		if (!load_builtin_amd_microcode(&cp, family))
-			return;
-	}
-
-	*data = cp.data;
-	*size = cp.size;
-
-	apply_ucode_in_initrd(cp.data, cp.size, true);
-}
-
-#ifdef CONFIG_X86_32
-/*
- * On 32-bit, since AP's early load occurs before paging is turned on, we
- * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
- * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
- * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
- * which is used upon resume from suspend.
- */
-void load_ucode_amd_ap(void)
-{
-	struct microcode_amd *mc;
-	size_t *usize;
-	void **ucode;
-
-	mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
-	if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
-		__apply_microcode_amd(mc);
-		return;
-	}
-
-	ucode = (void *)__pa_nodebug(&container);
-	usize = (size_t *)__pa_nodebug(&container_size);
-
-	if (!*ucode || !*usize)
-		return;
-
-	apply_ucode_in_initrd(*ucode, *usize, false);
-}
-
-static void __init collect_cpu_sig_on_bsp(void *arg)
-{
-	unsigned int cpu = smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	uci->cpu_sig.sig = cpuid_eax(0x00000001);
-}
-
-static void __init get_bsp_sig(void)
-{
-	unsigned int bsp = boot_cpu_data.cpu_index;
-	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
-
-	if (!uci->cpu_sig.sig)
-		smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
-}
-#else
-void load_ucode_amd_ap(void)
-{
-	unsigned int cpu = smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct equiv_cpu_entry *eq;
-	struct microcode_amd *mc;
-	u32 rev, eax;
-	u16 eq_id;
-
-	/* Exit if called on the BSP. */
-	if (!cpu)
-		return;
-
-	if (!container)
-		return;
-
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
-	uci->cpu_sig.rev = rev;
-	uci->cpu_sig.sig = eax;
-
-	eax = cpuid_eax(0x00000001);
-	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
-
-	eq_id = find_equiv_id(eq, eax);
-	if (!eq_id)
-		return;
-
-	if (eq_id == this_equiv_id) {
-		mc = (struct microcode_amd *)amd_ucode_patch;
-
-		if (mc && rev < mc->hdr.patch_id) {
-			if (!__apply_microcode_amd(mc))
-				ucode_new_rev = mc->hdr.patch_id;
-		}
-
-	} else {
-		if (!ucode_cpio.data)
-			return;
-
-		/*
-		 * AP has a different equivalence ID than BSP, looks like
-		 * mixed-steppings silicon so go through the ucode blob anew.
-		 */
-		apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
-	}
-}
-#endif
-
-int __init save_microcode_in_initrd_amd(void)
-{
-	unsigned long cont;
-	int retval = 0;
-	enum ucode_state ret;
-	u8 *cont_va;
-	u32 eax;
-
-	if (!container)
-		return -EINVAL;
-
-#ifdef CONFIG_X86_32
-	get_bsp_sig();
-	cont	= (unsigned long)container;
-	cont_va = __va(container);
-#else
-	/*
-	 * We need the physical address of the container for both bitness since
-	 * boot_params.hdr.ramdisk_image is a physical address.
-	 */
-	cont    = __pa(container);
-	cont_va = container;
-#endif
-
-	/*
-	 * Take into account the fact that the ramdisk might get relocated and
-	 * therefore we need to recompute the container's position in virtual
-	 * memory space.
-	 */
-	if (relocated_ramdisk)
-		container = (u8 *)(__va(relocated_ramdisk) +
-			     (cont - boot_params.hdr.ramdisk_image));
-	else
-		container = cont_va;
-
-	if (ucode_new_rev)
-		pr_info("microcode: updated early to new patch_level=0x%08x\n",
-			ucode_new_rev);
-
-	eax   = cpuid_eax(0x00000001);
-	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-
-	ret = load_microcode_amd(smp_processor_id(), eax, container, container_size);
-	if (ret != UCODE_OK)
-		retval = -EINVAL;
-
-	/*
-	 * This will be freed any msec now, stash patches for the current
-	 * family and switch to patch cache for cpu hotplug, etc later.
-	 */
-	container = NULL;
-	container_size = 0;
-
-	return retval;
-}
-
-void reload_ucode_amd(void)
-{
-	struct microcode_amd *mc;
-	u32 rev, eax;
-
-	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
-
-	mc = (struct microcode_amd *)amd_ucode_patch;
-
-	if (mc && rev < mc->hdr.patch_id) {
-		if (!__apply_microcode_amd(mc)) {
-			ucode_new_rev = mc->hdr.patch_id;
-			pr_info("microcode: reload patch_level=0x%08x\n",
-				ucode_new_rev);
-		}
-	}
-}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 9e3f3c7dd5d7..7fc27f1cca58 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -5,6 +5,12 @@
  *	      2006	Shaohua Li <shaohua.li@intel.com>
  *	      2013-2015	Borislav Petkov <bp@alien8.de>
  *
+ * X86 CPU microcode early update for Linux:
+ *
+ *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ *			   H Peter Anvin" <hpa@zytor.com>
+ *		  (C) 2015 Borislav Petkov <bp@alien8.de>
+ *
  * This driver allows to upgrade microcode on x86 processors.
  *
  * This program is free software; you can redistribute it and/or
@@ -13,34 +19,39 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "microcode: " fmt
 
 #include <linux/platform_device.h>
+#include <linux/syscore_ops.h>
 #include <linux/miscdevice.h>
 #include <linux/capability.h>
+#include <linux/firmware.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/syscore_ops.h>
 
-#include <asm/microcode.h>
-#include <asm/processor.h>
+#include <asm/microcode_intel.h>
 #include <asm/cpu_device_id.h>
+#include <asm/microcode_amd.h>
 #include <asm/perf_event.h>
+#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/cmdline.h>
 
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
-
-#define MICROCODE_VERSION	"2.00"
+#define MICROCODE_VERSION	"2.01"
 
 static struct microcode_ops	*microcode_ops;
 
-bool dis_ucode_ldr;
-module_param(dis_ucode_ldr, bool, 0);
+static bool dis_ucode_ldr;
+
+static int __init disable_loader(char *str)
+{
+	dis_ucode_ldr = true;
+	return 1;
+}
+__setup("dis_ucode_ldr", disable_loader);
 
 /*
  * Synchronization.
@@ -68,6 +79,150 @@ struct cpu_info_ctx {
 	int			err;
 };
 
+static bool __init check_loader_disabled_bsp(void)
+{
+#ifdef CONFIG_X86_32
+	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
+	const char *opt	    = "dis_ucode_ldr";
+	const char *option  = (const char *)__pa_nodebug(opt);
+	bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
+
+#else /* CONFIG_X86_64 */
+	const char *cmdline = boot_command_line;
+	const char *option  = "dis_ucode_ldr";
+	bool *res = &dis_ucode_ldr;
+#endif
+
+	if (cmdline_find_option_bool(cmdline, option))
+		*res = true;
+
+	return *res;
+}
+
+extern struct builtin_fw __start_builtin_fw[];
+extern struct builtin_fw __end_builtin_fw[];
+
+bool get_builtin_firmware(struct cpio_data *cd, const char *name)
+{
+#ifdef CONFIG_FW_LOADER
+	struct builtin_fw *b_fw;
+
+	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
+		if (!strcmp(name, b_fw->name)) {
+			cd->size = b_fw->size;
+			cd->data = b_fw->data;
+			return true;
+		}
+	}
+#endif
+	return false;
+}
+
+void __init load_ucode_bsp(void)
+{
+	int vendor;
+	unsigned int family;
+
+	if (check_loader_disabled_bsp())
+		return;
+
+	if (!have_cpuid_p())
+		return;
+
+	vendor = x86_vendor();
+	family = x86_family();
+
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (family >= 6)
+			load_ucode_intel_bsp();
+		break;
+	case X86_VENDOR_AMD:
+		if (family >= 0x10)
+			load_ucode_amd_bsp(family);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool check_loader_disabled_ap(void)
+{
+#ifdef CONFIG_X86_32
+	return *((bool *)__pa_nodebug(&dis_ucode_ldr));
+#else
+	return dis_ucode_ldr;
+#endif
+}
+
+void load_ucode_ap(void)
+{
+	int vendor, family;
+
+	if (check_loader_disabled_ap())
+		return;
+
+	if (!have_cpuid_p())
+		return;
+
+	vendor = x86_vendor();
+	family = x86_family();
+
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (family >= 6)
+			load_ucode_intel_ap();
+		break;
+	case X86_VENDOR_AMD:
+		if (family >= 0x10)
+			load_ucode_amd_ap();
+		break;
+	default:
+		break;
+	}
+}
+
+int __init save_microcode_in_initrd(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (c->x86 >= 6)
+			save_microcode_in_initrd_intel();
+		break;
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0x10)
+			save_microcode_in_initrd_amd();
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void reload_early_microcode(void)
+{
+	int vendor, family;
+
+	vendor = x86_vendor();
+	family = x86_family();
+
+	switch (vendor) {
+	case X86_VENDOR_INTEL:
+		if (family >= 6)
+			reload_ucode_intel();
+		break;
+	case X86_VENDOR_AMD:
+		if (family >= 0x10)
+			reload_ucode_amd();
+		break;
+	default:
+		break;
+	}
+}
+
 static void collect_cpu_info_local(void *arg)
 {
 	struct cpu_info_ctx *ctx = arg;
@@ -210,9 +365,6 @@ static void __exit microcode_dev_exit(void)
 {
 	misc_deregister(&microcode_dev);
 }
-
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
-MODULE_ALIAS("devname:cpu/microcode");
 #else
 #define microcode_dev_init()	0
 #define microcode_dev_exit()	do { } while (0)
@@ -463,20 +615,6 @@ static struct notifier_block mc_cpu_notifier = {
 	.notifier_call	= mc_cpu_callback,
 };
 
-#ifdef MODULE
-/* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id __initconst microcode_id[] = {
-#ifdef CONFIG_MICROCODE_INTEL
-	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
-#ifdef CONFIG_MICROCODE_AMD
-	{ X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
-#endif
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, microcode_id);
-#endif
-
 static struct attribute *cpu_root_microcode_attrs[] = {
 	&dev_attr_reload.attr,
 	NULL
@@ -487,9 +625,9 @@ static struct attribute_group cpu_root_microcode_group = {
 	.attrs = cpu_root_microcode_attrs,
 };
 
-static int __init microcode_init(void)
+int __init microcode_init(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(0);
+	struct cpuinfo_x86 *c = &boot_cpu_data;
 	int error;
 
 	if (paravirt_enabled() || dis_ucode_ldr)
@@ -560,35 +698,3 @@ static int __init microcode_init(void)
 	return error;
 
 }
-module_init(microcode_init);
-
-static void __exit microcode_exit(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	microcode_dev_exit();
-
-	unregister_hotcpu_notifier(&mc_cpu_notifier);
-	unregister_syscore_ops(&mc_syscore_ops);
-
-	sysfs_remove_group(&cpu_subsys.dev_root->kobj,
-			   &cpu_root_microcode_group);
-
-	get_online_cpus();
-	mutex_lock(&microcode_mutex);
-
-	subsys_interface_unregister(&mc_cpu_interface);
-
-	mutex_unlock(&microcode_mutex);
-	put_online_cpus();
-
-	platform_device_unregister(microcode_pdev);
-
-	microcode_ops = NULL;
-
-	if (c->x86_vendor == X86_VENDOR_AMD)
-		exit_amd_microcode();
-
-	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
-}
-module_exit(microcode_exit);
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
deleted file mode 100644
index 8ebc421d6299..000000000000
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- *	X86 CPU microcode early update for Linux
- *
- *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- *			   H Peter Anvin" <hpa@zytor.com>
- *		  (C) 2015 Borislav Petkov <bp@alien8.de>
- *
- *	This driver allows to early upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- *	Software Developer's Manual.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <linux/firmware.h>
-#include <asm/microcode.h>
-#include <asm/microcode_intel.h>
-#include <asm/microcode_amd.h>
-#include <asm/processor.h>
-#include <asm/cmdline.h>
-
-static bool __init check_loader_disabled_bsp(void)
-{
-#ifdef CONFIG_X86_32
-	const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
-	const char *opt	    = "dis_ucode_ldr";
-	const char *option  = (const char *)__pa_nodebug(opt);
-	bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr);
-
-#else /* CONFIG_X86_64 */
-	const char *cmdline = boot_command_line;
-	const char *option  = "dis_ucode_ldr";
-	bool *res = &dis_ucode_ldr;
-#endif
-
-	if (cmdline_find_option_bool(cmdline, option))
-		*res = true;
-
-	return *res;
-}
-
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
-
-bool get_builtin_firmware(struct cpio_data *cd, const char *name)
-{
-#ifdef CONFIG_FW_LOADER
-	struct builtin_fw *b_fw;
-
-	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
-		if (!strcmp(name, b_fw->name)) {
-			cd->size = b_fw->size;
-			cd->data = b_fw->data;
-			return true;
-		}
-	}
-#endif
-	return false;
-}
-
-void __init load_ucode_bsp(void)
-{
-	int vendor;
-	unsigned int family;
-
-	if (check_loader_disabled_bsp())
-		return;
-
-	if (!have_cpuid_p())
-		return;
-
-	vendor = x86_vendor();
-	family = x86_family();
-
-	switch (vendor) {
-	case X86_VENDOR_INTEL:
-		if (family >= 6)
-			load_ucode_intel_bsp();
-		break;
-	case X86_VENDOR_AMD:
-		if (family >= 0x10)
-			load_ucode_amd_bsp(family);
-		break;
-	default:
-		break;
-	}
-}
-
-static bool check_loader_disabled_ap(void)
-{
-#ifdef CONFIG_X86_32
-	return *((bool *)__pa_nodebug(&dis_ucode_ldr));
-#else
-	return dis_ucode_ldr;
-#endif
-}
-
-void load_ucode_ap(void)
-{
-	int vendor, family;
-
-	if (check_loader_disabled_ap())
-		return;
-
-	if (!have_cpuid_p())
-		return;
-
-	vendor = x86_vendor();
-	family = x86_family();
-
-	switch (vendor) {
-	case X86_VENDOR_INTEL:
-		if (family >= 6)
-			load_ucode_intel_ap();
-		break;
-	case X86_VENDOR_AMD:
-		if (family >= 0x10)
-			load_ucode_amd_ap();
-		break;
-	default:
-		break;
-	}
-}
-
-int __init save_microcode_in_initrd(void)
-{
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_INTEL:
-		if (c->x86 >= 6)
-			save_microcode_in_initrd_intel();
-		break;
-	case X86_VENDOR_AMD:
-		if (c->x86 >= 0x10)
-			save_microcode_in_initrd_amd();
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-void reload_early_microcode(void)
-{
-	int vendor, family;
-
-	vendor = x86_vendor();
-	family = x86_family();
-
-	switch (vendor) {
-	case X86_VENDOR_INTEL:
-		if (family >= 6)
-			reload_ucode_intel();
-		break;
-	case X86_VENDOR_AMD:
-		if (family >= 0x10)
-			reload_ucode_amd();
-		break;
-	default:
-		break;
-	}
-}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 969dc17eb1b4..ce47402eb2f9 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -4,27 +4,804 @@
  * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
  *		 2006 Shaohua Li <shaohua.li@intel.com>
  *
+ * Intel CPU microcode early update for Linux
+ *
+ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
+ *		      H Peter Anvin" <hpa@zytor.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+/*
+ * This needs to be before all headers so that pr_debug in printk.h doesn't turn
+ * printk calls into no_printk().
+ *
+ *#define DEBUG
+ */
+#define pr_fmt(fmt) "microcode: " fmt
 
+#include <linux/earlycpio.h>
 #include <linux/firmware.h>
 #include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/initrd.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
 
 #include <asm/microcode_intel.h>
 #include <asm/processor.h>
+#include <asm/tlbflush.h>
+#include <asm/setup.h>
 #include <asm/msr.h>
 
-MODULE_DESCRIPTION("Microcode Update Driver");
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
-MODULE_LICENSE("GPL");
+static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
+static struct mc_saved_data {
+	unsigned int mc_saved_count;
+	struct microcode_intel **mc_saved;
+} mc_saved_data;
+
+static enum ucode_state
+load_microcode_early(struct microcode_intel **saved,
+		     unsigned int num_saved, struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *ucode_ptr, *new_mc = NULL;
+	struct microcode_header_intel *mc_hdr;
+	int new_rev, ret, i;
+
+	new_rev = uci->cpu_sig.rev;
+
+	for (i = 0; i < num_saved; i++) {
+		ucode_ptr = saved[i];
+		mc_hdr	  = (struct microcode_header_intel *)ucode_ptr;
+
+		ret = has_newer_microcode(ucode_ptr,
+					  uci->cpu_sig.sig,
+					  uci->cpu_sig.pf,
+					  new_rev);
+		if (!ret)
+			continue;
+
+		new_rev = mc_hdr->rev;
+		new_mc  = ucode_ptr;
+	}
+
+	if (!new_mc)
+		return UCODE_NFOUND;
+
+	uci->mc = (struct microcode_intel *)new_mc;
+	return UCODE_OK;
+}
+
+static inline void
+copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
+		  unsigned long off, int num_saved)
+{
+	int i;
+
+	for (i = 0; i < num_saved; i++)
+		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
+}
+
+#ifdef CONFIG_X86_32
+static void
+microcode_phys(struct microcode_intel **mc_saved_tmp,
+	       struct mc_saved_data *mc_saved_data)
+{
+	int i;
+	struct microcode_intel ***mc_saved;
+
+	mc_saved = (struct microcode_intel ***)
+		   __pa_nodebug(&mc_saved_data->mc_saved);
+	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
+		struct microcode_intel *p;
+
+		p = *(struct microcode_intel **)
+			__pa_nodebug(mc_saved_data->mc_saved + i);
+		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
+	}
+}
+#endif
+
+static enum ucode_state
+load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long initrd_start, struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int count = mc_saved_data->mc_saved_count;
+
+	if (!mc_saved_data->mc_saved) {
+		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
+
+		return load_microcode_early(mc_saved_tmp, count, uci);
+	} else {
+#ifdef CONFIG_X86_32
+		microcode_phys(mc_saved_tmp, mc_saved_data);
+		return load_microcode_early(mc_saved_tmp, count, uci);
+#else
+		return load_microcode_early(mc_saved_data->mc_saved,
+						    count, uci);
+#endif
+	}
+}
+
+/*
+ * Given CPU signature and a microcode patch, this function finds if the
+ * microcode patch has matching family and model with the CPU.
+ */
+static enum ucode_state
+matching_model_microcode(struct microcode_header_intel *mc_header,
+			unsigned long sig)
+{
+	unsigned int fam, model;
+	unsigned int fam_ucode, model_ucode;
+	struct extended_sigtable *ext_header;
+	unsigned long total_size = get_totalsize(mc_header);
+	unsigned long data_size = get_datasize(mc_header);
+	int ext_sigcount, i;
+	struct extended_signature *ext_sig;
+
+	fam   = __x86_family(sig);
+	model = x86_model(sig);
+
+	fam_ucode   = __x86_family(mc_header->sig);
+	model_ucode = x86_model(mc_header->sig);
+
+	if (fam == fam_ucode && model == model_ucode)
+		return UCODE_OK;
+
+	/* Look for ext. headers: */
+	if (total_size <= data_size + MC_HEADER_SIZE)
+		return UCODE_NFOUND;
+
+	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
+	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
+	ext_sigcount = ext_header->count;
+
+	for (i = 0; i < ext_sigcount; i++) {
+		fam_ucode   = __x86_family(ext_sig->sig);
+		model_ucode = x86_model(ext_sig->sig);
+
+		if (fam == fam_ucode && model == model_ucode)
+			return UCODE_OK;
+
+		ext_sig++;
+	}
+	return UCODE_NFOUND;
+}
+
+static int
+save_microcode(struct mc_saved_data *mc_saved_data,
+	       struct microcode_intel **mc_saved_src,
+	       unsigned int mc_saved_count)
+{
+	int i, j;
+	struct microcode_intel **saved_ptr;
+	int ret;
+
+	if (!mc_saved_count)
+		return -EINVAL;
+
+	/*
+	 * Copy new microcode data.
+	 */
+	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+	if (!saved_ptr)
+		return -ENOMEM;
+
+	for (i = 0; i < mc_saved_count; i++) {
+		struct microcode_header_intel *mc_hdr;
+		struct microcode_intel *mc;
+		unsigned long size;
+
+		if (!mc_saved_src[i]) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		mc     = mc_saved_src[i];
+		mc_hdr = &mc->hdr;
+		size   = get_totalsize(mc_hdr);
+
+		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+		if (!saved_ptr[i]) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		memcpy(saved_ptr[i], mc, size);
+	}
+
+	/*
+	 * Point to newly saved microcode.
+	 */
+	mc_saved_data->mc_saved = saved_ptr;
+	mc_saved_data->mc_saved_count = mc_saved_count;
+
+	return 0;
+
+err:
+	for (j = 0; j <= i; j++)
+		kfree(saved_ptr[j]);
+	kfree(saved_ptr);
+
+	return ret;
+}
+
+/*
+ * A microcode patch in ucode_ptr is saved into mc_saved
+ * - if it has matching signature and newer revision compared to an existing
+ *   patch mc_saved.
+ * - or if it is a newly discovered microcode patch.
+ *
+ * The microcode patch should have matching model with CPU.
+ *
+ * Returns: The updated number @num_saved of saved microcode patches.
+ */
+static unsigned int _save_mc(struct microcode_intel **mc_saved,
+			     u8 *ucode_ptr, unsigned int num_saved)
+{
+	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+	unsigned int sig, pf;
+	int found = 0, i;
+
+	mc_hdr = (struct microcode_header_intel *)ucode_ptr;
+
+	for (i = 0; i < num_saved; i++) {
+		mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
+		sig	     = mc_saved_hdr->sig;
+		pf	     = mc_saved_hdr->pf;
+
+		if (!find_matching_signature(ucode_ptr, sig, pf))
+			continue;
+
+		found = 1;
+
+		if (mc_hdr->rev <= mc_saved_hdr->rev)
+			continue;
+
+		/*
+		 * Found an older ucode saved earlier. Replace it with
+		 * this newer one.
+		 */
+		mc_saved[i] = (struct microcode_intel *)ucode_ptr;
+		break;
+	}
+
+	/* Newly detected microcode, save it to memory. */
+	if (i >= num_saved && !found)
+		mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
+
+	return num_saved;
+}
+
+/*
+ * Get microcode matching with BSP's model. Only CPUs with the same model as
+ * BSP can stay in the platform.
+ */
+static enum ucode_state __init
+get_matching_model_microcode(int cpu, unsigned long start,
+			     void *data, size_t size,
+			     struct mc_saved_data *mc_saved_data,
+			     unsigned long *mc_saved_in_initrd,
+			     struct ucode_cpu_info *uci)
+{
+	u8 *ucode_ptr = data;
+	unsigned int leftover = size;
+	enum ucode_state state = UCODE_OK;
+	unsigned int mc_size;
+	struct microcode_header_intel *mc_header;
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
+	int i;
+
+	while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+
+		if (leftover < sizeof(mc_header))
+			break;
+
+		mc_header = (struct microcode_header_intel *)ucode_ptr;
+
+		mc_size = get_totalsize(mc_header);
+		if (!mc_size || mc_size > leftover ||
+			microcode_sanity_check(ucode_ptr, 0) < 0)
+			break;
+
+		leftover -= mc_size;
+
+		/*
+		 * Since APs with same family and model as the BSP may boot in
+		 * the platform, we need to find and save microcode patches
+		 * with the same family and model as the BSP.
+		 */
+		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
+			 UCODE_OK) {
+			ucode_ptr += mc_size;
+			continue;
+		}
+
+		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
+
+		ucode_ptr += mc_size;
+	}
+
+	if (leftover) {
+		state = UCODE_ERROR;
+		goto out;
+	}
+
+	if (mc_saved_count == 0) {
+		state = UCODE_NFOUND;
+		goto out;
+	}
+
+	for (i = 0; i < mc_saved_count; i++)
+		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
+
+	mc_saved_data->mc_saved_count = mc_saved_count;
+out:
+	return state;
+}
+
+static int collect_cpu_info_early(struct ucode_cpu_info *uci)
+{
+	unsigned int val[2];
+	unsigned int family, model;
+	struct cpu_signature csig;
+	unsigned int eax, ebx, ecx, edx;
+
+	csig.sig = 0;
+	csig.pf = 0;
+	csig.rev = 0;
+
+	memset(uci, 0, sizeof(*uci));
+
+	eax = 0x00000001;
+	ecx = 0;
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+	csig.sig = eax;
+
+	family = __x86_family(csig.sig);
+	model  = x86_model(csig.sig);
+
+	if ((model >= 5) || (family > 6)) {
+		/* get processor flags from MSR 0x17 */
+		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		csig.pf = 1 << ((val[1] >> 18) & 7);
+	}
+	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	sync_core();
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+
+	csig.rev = val[1];
+
+	uci->cpu_sig = csig;
+	uci->valid = 1;
+
+	return 0;
+}
+
+static void show_saved_mc(void)
+{
+#ifdef DEBUG
+	int i, j;
+	unsigned int sig, pf, rev, total_size, data_size, date;
+	struct ucode_cpu_info uci;
+
+	if (mc_saved_data.mc_saved_count == 0) {
+		pr_debug("no microcode data saved.\n");
+		return;
+	}
+	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
+
+	collect_cpu_info_early(&uci);
+
+	sig = uci.cpu_sig.sig;
+	pf = uci.cpu_sig.pf;
+	rev = uci.cpu_sig.rev;
+	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
+
+	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
+		struct microcode_header_intel *mc_saved_header;
+		struct extended_sigtable *ext_header;
+		int ext_sigcount;
+		struct extended_signature *ext_sig;
+
+		mc_saved_header = (struct microcode_header_intel *)
+				  mc_saved_data.mc_saved[i];
+		sig = mc_saved_header->sig;
+		pf = mc_saved_header->pf;
+		rev = mc_saved_header->rev;
+		total_size = get_totalsize(mc_saved_header);
+		data_size = get_datasize(mc_saved_header);
+		date = mc_saved_header->date;
+
+		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
+			 i, sig, pf, rev, total_size,
+			 date & 0xffff,
+			 date >> 24,
+			 (date >> 16) & 0xff);
+
+		/* Look for ext. headers: */
+		if (total_size <= data_size + MC_HEADER_SIZE)
+			continue;
+
+		ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
+		ext_sigcount = ext_header->count;
+		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
+
+		for (j = 0; j < ext_sigcount; j++) {
+			sig = ext_sig->sig;
+			pf = ext_sig->pf;
+
+			pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
+				 j, sig, pf);
+
+			ext_sig++;
+		}
+
+	}
+#endif
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_MUTEX(x86_cpu_microcode_mutex);
+/*
+ * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
+ * hot added or resumes.
+ *
+ * Please make sure this mc should be a valid microcode patch before calling
+ * this function.
+ */
+int save_mc_for_early(u8 *mc)
+{
+	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
+	unsigned int mc_saved_count_init;
+	unsigned int mc_saved_count;
+	struct microcode_intel **mc_saved;
+	int ret = 0;
+	int i;
+
+	/*
+	 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
+	 * hotplug.
+	 */
+	mutex_lock(&x86_cpu_microcode_mutex);
+
+	mc_saved_count_init = mc_saved_data.mc_saved_count;
+	mc_saved_count = mc_saved_data.mc_saved_count;
+	mc_saved = mc_saved_data.mc_saved;
+
+	if (mc_saved && mc_saved_count)
+		memcpy(mc_saved_tmp, mc_saved,
+		       mc_saved_count * sizeof(struct microcode_intel *));
+	/*
+	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
+	 * version.
+	 */
+	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
+
+	/*
+	 * Save the mc_save_tmp in global mc_saved_data.
+	 */
+	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
+	if (ret) {
+		pr_err("Cannot save microcode patch.\n");
+		goto out;
+	}
+
+	show_saved_mc();
+
+	/*
+	 * Free old saved microcode data.
+	 */
+	if (mc_saved) {
+		for (i = 0; i < mc_saved_count_init; i++)
+			kfree(mc_saved[i]);
+		kfree(mc_saved);
+	}
+
+out:
+	mutex_unlock(&x86_cpu_microcode_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(save_mc_for_early);
+#endif
+
+static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
+{
+#ifdef CONFIG_X86_64
+	unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
+	unsigned int family, model, stepping;
+	char name[30];
+
+	native_cpuid(&eax, &ebx, &ecx, &edx);
+
+	family   = __x86_family(eax);
+	model    = x86_model(eax);
+	stepping = eax & 0xf;
+
+	sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
+
+	return get_builtin_firmware(cp, name);
+#else
+	return false;
+#endif
+}
+
+static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
+static __init enum ucode_state
+scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+	       unsigned long start, unsigned long size,
+	       struct ucode_cpu_info *uci)
+{
+	struct cpio_data cd;
+	long offset = 0;
+#ifdef CONFIG_X86_32
+	char *p = (char *)__pa_nodebug(ucode_name);
+#else
+	char *p = ucode_name;
+#endif
+
+	cd.data = NULL;
+	cd.size = 0;
+
+	cd = find_cpio_data(p, (void *)start, size, &offset);
+	if (!cd.data) {
+		if (!load_builtin_intel_microcode(&cd))
+			return UCODE_ERROR;
+	}
+
+	return get_matching_model_microcode(0, start, cd.data, cd.size,
+					    mc_saved_data, initrd, uci);
+}
+
+/*
+ * Print ucode update info.
+ */
+static void
+print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
+{
+	int cpu = smp_processor_id();
+
+	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
+		cpu,
+		uci->cpu_sig.rev,
+		date & 0xffff,
+		date >> 24,
+		(date >> 16) & 0xff);
+}
+
+#ifdef CONFIG_X86_32
+
+static int delay_ucode_info;
+static int current_mc_date;
+
+/*
+ * Print early updated ucode info after printk works. This is delayed info dump.
+ */
+void show_ucode_info_early(void)
+{
+	struct ucode_cpu_info uci;
+
+	if (delay_ucode_info) {
+		collect_cpu_info_early(&uci);
+		print_ucode_info(&uci, current_mc_date);
+		delay_ucode_info = 0;
+	}
+}
+
+/*
+ * At this point, we can not call printk() yet. Keep microcode patch number in
+ * mc_saved_data.mc_saved and delay printing microcode info in
+ * show_ucode_info_early() until printk() works.
+ */
+static void print_ucode(struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_intel;
+	int *delay_ucode_info_p;
+	int *current_mc_date_p;
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return;
+
+	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
+
+	*delay_ucode_info_p = 1;
+	*current_mc_date_p = mc_intel->hdr.date;
+}
+#else
+
+/*
+ * Flush global tlb. We only do this in x86_64 where paging has been enabled
+ * already and PGE should be enabled as well.
+ */
+static inline void flush_tlb_early(void)
+{
+	__native_flush_tlb_global_irq_disabled();
+}
+
+static inline void print_ucode(struct ucode_cpu_info *uci)
+{
+	struct microcode_intel *mc_intel;
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return;
+
+	print_ucode_info(uci, mc_intel->hdr.date);
+}
+#endif
+
+static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
+{
+	struct microcode_intel *mc_intel;
+	unsigned int val[2];
+
+	mc_intel = uci->mc;
+	if (mc_intel == NULL)
+		return 0;
+
+	/* write microcode via MSR 0x79 */
+	native_wrmsr(MSR_IA32_UCODE_WRITE,
+	      (unsigned long) mc_intel->bits,
+	      (unsigned long) mc_intel->bits >> 16 >> 16);
+	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+
+	/* As documented in the SDM: Do a CPUID 1 here */
+	sync_core();
+
+	/* get the current revision from MSR 0x8B */
+	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+	if (val[1] != mc_intel->hdr.rev)
+		return -1;
+
+#ifdef CONFIG_X86_64
+	/* Flush global tlb. This is precaution. */
+	flush_tlb_early();
+#endif
+	uci->cpu_sig.rev = val[1];
+
+	if (early)
+		print_ucode(uci);
+	else
+		print_ucode_info(uci, mc_intel->hdr.date);
+
+	return 0;
+}
+
+/*
+ * This function converts microcode patch offsets previously stored in
+ * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
+ */
+int __init save_microcode_in_initrd_intel(void)
+{
+	unsigned int count = mc_saved_data.mc_saved_count;
+	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
+	int ret = 0;
+
+	if (count == 0)
+		return ret;
+
+	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
+	ret = save_microcode(&mc_saved_data, mc_saved, count);
+	if (ret)
+		pr_err("Cannot save microcode patches from initrd.\n");
+
+	show_saved_mc();
+
+	return ret;
+}
+
+static void __init
+_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
+		      unsigned long *initrd,
+		      unsigned long start, unsigned long size)
+{
+	struct ucode_cpu_info uci;
+	enum ucode_state ret;
+
+	collect_cpu_info_early(&uci);
+
+	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+	if (ret != UCODE_OK)
+		return;
+
+	ret = load_microcode(mc_saved_data, initrd, start, &uci);
+	if (ret != UCODE_OK)
+		return;
+
+	apply_microcode_early(&uci, true);
+}
+
+void __init load_ucode_intel_bsp(void)
+{
+	u64 start, size;
+#ifdef CONFIG_X86_32
+	struct boot_params *p;
+
+	p	= (struct boot_params *)__pa_nodebug(&boot_params);
+	start	= p->hdr.ramdisk_image;
+	size	= p->hdr.ramdisk_size;
+
+	_load_ucode_intel_bsp(
+			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+			start, size);
+#else
+	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
+	size	= boot_params.hdr.ramdisk_size;
+
+	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
+#endif
+}
+
+void load_ucode_intel_ap(void)
+{
+	struct mc_saved_data *mc_saved_data_p;
+	struct ucode_cpu_info uci;
+	unsigned long *mc_saved_in_initrd_p;
+	unsigned long initrd_start_addr;
+	enum ucode_state ret;
+#ifdef CONFIG_X86_32
+	unsigned long *initrd_start_p;
+
+	mc_saved_in_initrd_p =
+		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
+#else
+	mc_saved_data_p = &mc_saved_data;
+	mc_saved_in_initrd_p = mc_saved_in_initrd;
+	initrd_start_addr = initrd_start;
+#endif
+
+	/*
+	 * If there is no valid ucode previously saved in memory, no need to
+	 * update ucode on this AP.
+	 */
+	if (mc_saved_data_p->mc_saved_count == 0)
+		return;
+
+	collect_cpu_info_early(&uci);
+	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+			     initrd_start_addr, &uci);
+
+	if (ret != UCODE_OK)
+		return;
+
+	apply_microcode_early(&uci, true);
+}
+
+void reload_ucode_intel(void)
+{
+	struct ucode_cpu_info uci;
+	enum ucode_state ret;
+
+	if (!mc_saved_data.mc_saved_count)
+		return;
+
+	collect_cpu_info_early(&uci);
+
+	ret = load_microcode_early(mc_saved_data.mc_saved,
+				   mc_saved_data.mc_saved_count, &uci);
+	if (ret != UCODE_OK)
+		return;
+
+	apply_microcode_early(&uci, false);
+}
 
 static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 {
@@ -264,7 +1041,7 @@ static struct microcode_ops microcode_intel_ops = {
 
 struct microcode_ops * __init init_intel_microcode(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(0);
+	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
 	    cpu_has(c, X86_FEATURE_IA64)) {
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
deleted file mode 100644
index 37ea89c11520..000000000000
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ /dev/null
@@ -1,808 +0,0 @@
-/*
- *	Intel CPU microcode early update for Linux
- *
- *	Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
- *			   H Peter Anvin" <hpa@zytor.com>
- *
- *	This allows to early upgrade microcode on Intel processors
- *	belonging to IA-32 family - PentiumPro, Pentium II,
- *	Pentium III, Xeon, Pentium 4, etc.
- *
- *	Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture
- *	Software Developer's Manual.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-/*
- * This needs to be before all headers so that pr_debug in printk.h doesn't turn
- * printk calls into no_printk().
- *
- *#define DEBUG
- */
-
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-#include <linux/cpu.h>
-#include <asm/msr.h>
-#include <asm/microcode_intel.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/setup.h>
-
-#undef pr_fmt
-#define pr_fmt(fmt)	"microcode: " fmt
-
-static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
-static struct mc_saved_data {
-	unsigned int mc_saved_count;
-	struct microcode_intel **mc_saved;
-} mc_saved_data;
-
-static enum ucode_state
-load_microcode_early(struct microcode_intel **saved,
-		     unsigned int num_saved, struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *ucode_ptr, *new_mc = NULL;
-	struct microcode_header_intel *mc_hdr;
-	int new_rev, ret, i;
-
-	new_rev = uci->cpu_sig.rev;
-
-	for (i = 0; i < num_saved; i++) {
-		ucode_ptr = saved[i];
-		mc_hdr	  = (struct microcode_header_intel *)ucode_ptr;
-
-		ret = has_newer_microcode(ucode_ptr,
-					  uci->cpu_sig.sig,
-					  uci->cpu_sig.pf,
-					  new_rev);
-		if (!ret)
-			continue;
-
-		new_rev = mc_hdr->rev;
-		new_mc  = ucode_ptr;
-	}
-
-	if (!new_mc)
-		return UCODE_NFOUND;
-
-	uci->mc = (struct microcode_intel *)new_mc;
-	return UCODE_OK;
-}
-
-static inline void
-copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
-		  unsigned long off, int num_saved)
-{
-	int i;
-
-	for (i = 0; i < num_saved; i++)
-		mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
-}
-
-#ifdef CONFIG_X86_32
-static void
-microcode_phys(struct microcode_intel **mc_saved_tmp,
-	       struct mc_saved_data *mc_saved_data)
-{
-	int i;
-	struct microcode_intel ***mc_saved;
-
-	mc_saved = (struct microcode_intel ***)
-		   __pa_nodebug(&mc_saved_data->mc_saved);
-	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
-		struct microcode_intel *p;
-
-		p = *(struct microcode_intel **)
-			__pa_nodebug(mc_saved_data->mc_saved + i);
-		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
-	}
-}
-#endif
-
-static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
-	       unsigned long initrd_start, struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int count = mc_saved_data->mc_saved_count;
-
-	if (!mc_saved_data->mc_saved) {
-		copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
-
-		return load_microcode_early(mc_saved_tmp, count, uci);
-	} else {
-#ifdef CONFIG_X86_32
-		microcode_phys(mc_saved_tmp, mc_saved_data);
-		return load_microcode_early(mc_saved_tmp, count, uci);
-#else
-		return load_microcode_early(mc_saved_data->mc_saved,
-						    count, uci);
-#endif
-	}
-}
-
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- */
-static enum ucode_state
-matching_model_microcode(struct microcode_header_intel *mc_header,
-			unsigned long sig)
-{
-	unsigned int fam, model;
-	unsigned int fam_ucode, model_ucode;
-	struct extended_sigtable *ext_header;
-	unsigned long total_size = get_totalsize(mc_header);
-	unsigned long data_size = get_datasize(mc_header);
-	int ext_sigcount, i;
-	struct extended_signature *ext_sig;
-
-	fam   = __x86_family(sig);
-	model = x86_model(sig);
-
-	fam_ucode   = __x86_family(mc_header->sig);
-	model_ucode = x86_model(mc_header->sig);
-
-	if (fam == fam_ucode && model == model_ucode)
-		return UCODE_OK;
-
-	/* Look for ext. headers: */
-	if (total_size <= data_size + MC_HEADER_SIZE)
-		return UCODE_NFOUND;
-
-	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
-	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
-	ext_sigcount = ext_header->count;
-
-	for (i = 0; i < ext_sigcount; i++) {
-		fam_ucode   = __x86_family(ext_sig->sig);
-		model_ucode = x86_model(ext_sig->sig);
-
-		if (fam == fam_ucode && model == model_ucode)
-			return UCODE_OK;
-
-		ext_sig++;
-	}
-	return UCODE_NFOUND;
-}
-
-static int
-save_microcode(struct mc_saved_data *mc_saved_data,
-	       struct microcode_intel **mc_saved_src,
-	       unsigned int mc_saved_count)
-{
-	int i, j;
-	struct microcode_intel **saved_ptr;
-	int ret;
-
-	if (!mc_saved_count)
-		return -EINVAL;
-
-	/*
-	 * Copy new microcode data.
-	 */
-	saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
-	if (!saved_ptr)
-		return -ENOMEM;
-
-	for (i = 0; i < mc_saved_count; i++) {
-		struct microcode_header_intel *mc_hdr;
-		struct microcode_intel *mc;
-		unsigned long size;
-
-		if (!mc_saved_src[i]) {
-			ret = -EINVAL;
-			goto err;
-		}
-
-		mc     = mc_saved_src[i];
-		mc_hdr = &mc->hdr;
-		size   = get_totalsize(mc_hdr);
-
-		saved_ptr[i] = kmalloc(size, GFP_KERNEL);
-		if (!saved_ptr[i]) {
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		memcpy(saved_ptr[i], mc, size);
-	}
-
-	/*
-	 * Point to newly saved microcode.
-	 */
-	mc_saved_data->mc_saved = saved_ptr;
-	mc_saved_data->mc_saved_count = mc_saved_count;
-
-	return 0;
-
-err:
-	for (j = 0; j <= i; j++)
-		kfree(saved_ptr[j]);
-	kfree(saved_ptr);
-
-	return ret;
-}
-
-/*
- * A microcode patch in ucode_ptr is saved into mc_saved
- * - if it has matching signature and newer revision compared to an existing
- *   patch mc_saved.
- * - or if it is a newly discovered microcode patch.
- *
- * The microcode patch should have matching model with CPU.
- *
- * Returns: The updated number @num_saved of saved microcode patches.
- */
-static unsigned int _save_mc(struct microcode_intel **mc_saved,
-			     u8 *ucode_ptr, unsigned int num_saved)
-{
-	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
-	unsigned int sig, pf;
-	int found = 0, i;
-
-	mc_hdr = (struct microcode_header_intel *)ucode_ptr;
-
-	for (i = 0; i < num_saved; i++) {
-		mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
-		sig	     = mc_saved_hdr->sig;
-		pf	     = mc_saved_hdr->pf;
-
-		if (!find_matching_signature(ucode_ptr, sig, pf))
-			continue;
-
-		found = 1;
-
-		if (mc_hdr->rev <= mc_saved_hdr->rev)
-			continue;
-
-		/*
-		 * Found an older ucode saved earlier. Replace it with
-		 * this newer one.
-		 */
-		mc_saved[i] = (struct microcode_intel *)ucode_ptr;
-		break;
-	}
-
-	/* Newly detected microcode, save it to memory. */
-	if (i >= num_saved && !found)
-		mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
-
-	return num_saved;
-}
-
-/*
- * Get microcode matching with BSP's model. Only CPUs with the same model as
- * BSP can stay in the platform.
- */
-static enum ucode_state __init
-get_matching_model_microcode(int cpu, unsigned long start,
-			     void *data, size_t size,
-			     struct mc_saved_data *mc_saved_data,
-			     unsigned long *mc_saved_in_initrd,
-			     struct ucode_cpu_info *uci)
-{
-	u8 *ucode_ptr = data;
-	unsigned int leftover = size;
-	enum ucode_state state = UCODE_OK;
-	unsigned int mc_size;
-	struct microcode_header_intel *mc_header;
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
-	int i;
-
-	while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
-
-		if (leftover < sizeof(mc_header))
-			break;
-
-		mc_header = (struct microcode_header_intel *)ucode_ptr;
-
-		mc_size = get_totalsize(mc_header);
-		if (!mc_size || mc_size > leftover ||
-			microcode_sanity_check(ucode_ptr, 0) < 0)
-			break;
-
-		leftover -= mc_size;
-
-		/*
-		 * Since APs with same family and model as the BSP may boot in
-		 * the platform, we need to find and save microcode patches
-		 * with the same family and model as the BSP.
-		 */
-		if (matching_model_microcode(mc_header, uci->cpu_sig.sig) !=
-			 UCODE_OK) {
-			ucode_ptr += mc_size;
-			continue;
-		}
-
-		mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
-
-		ucode_ptr += mc_size;
-	}
-
-	if (leftover) {
-		state = UCODE_ERROR;
-		goto out;
-	}
-
-	if (mc_saved_count == 0) {
-		state = UCODE_NFOUND;
-		goto out;
-	}
-
-	for (i = 0; i < mc_saved_count; i++)
-		mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start;
-
-	mc_saved_data->mc_saved_count = mc_saved_count;
-out:
-	return state;
-}
-
-static int collect_cpu_info_early(struct ucode_cpu_info *uci)
-{
-	unsigned int val[2];
-	unsigned int family, model;
-	struct cpu_signature csig;
-	unsigned int eax, ebx, ecx, edx;
-
-	csig.sig = 0;
-	csig.pf = 0;
-	csig.rev = 0;
-
-	memset(uci, 0, sizeof(*uci));
-
-	eax = 0x00000001;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-	csig.sig = eax;
-
-	family = __x86_family(csig.sig);
-	model  = x86_model(csig.sig);
-
-	if ((model >= 5) || (family > 6)) {
-		/* get processor flags from MSR 0x17 */
-		native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-		csig.pf = 1 << ((val[1] >> 18) & 7);
-	}
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-
-	csig.rev = val[1];
-
-	uci->cpu_sig = csig;
-	uci->valid = 1;
-
-	return 0;
-}
-
-#ifdef DEBUG
-static void show_saved_mc(void)
-{
-	int i, j;
-	unsigned int sig, pf, rev, total_size, data_size, date;
-	struct ucode_cpu_info uci;
-
-	if (mc_saved_data.mc_saved_count == 0) {
-		pr_debug("no microcode data saved.\n");
-		return;
-	}
-	pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count);
-
-	collect_cpu_info_early(&uci);
-
-	sig = uci.cpu_sig.sig;
-	pf = uci.cpu_sig.pf;
-	rev = uci.cpu_sig.rev;
-	pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
-
-	for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
-		struct microcode_header_intel *mc_saved_header;
-		struct extended_sigtable *ext_header;
-		int ext_sigcount;
-		struct extended_signature *ext_sig;
-
-		mc_saved_header = (struct microcode_header_intel *)
-				  mc_saved_data.mc_saved[i];
-		sig = mc_saved_header->sig;
-		pf = mc_saved_header->pf;
-		rev = mc_saved_header->rev;
-		total_size = get_totalsize(mc_saved_header);
-		data_size = get_datasize(mc_saved_header);
-		date = mc_saved_header->date;
-
-		pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n",
-			 i, sig, pf, rev, total_size,
-			 date & 0xffff,
-			 date >> 24,
-			 (date >> 16) & 0xff);
-
-		/* Look for ext. headers: */
-		if (total_size <= data_size + MC_HEADER_SIZE)
-			continue;
-
-		ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
-		ext_sigcount = ext_header->count;
-		ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-		for (j = 0; j < ext_sigcount; j++) {
-			sig = ext_sig->sig;
-			pf = ext_sig->pf;
-
-			pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
-				 j, sig, pf);
-
-			ext_sig++;
-		}
-
-	}
-}
-#else
-static inline void show_saved_mc(void)
-{
-}
-#endif
-
-#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU)
-static DEFINE_MUTEX(x86_cpu_microcode_mutex);
-/*
- * Save this mc into mc_saved_data. So it will be loaded early when a CPU is
- * hot added or resumes.
- *
- * Please make sure this mc should be a valid microcode patch before calling
- * this function.
- */
-int save_mc_for_early(u8 *mc)
-{
-	struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
-	unsigned int mc_saved_count_init;
-	unsigned int mc_saved_count;
-	struct microcode_intel **mc_saved;
-	int ret = 0;
-	int i;
-
-	/*
-	 * Hold hotplug lock so mc_saved_data is not accessed by a CPU in
-	 * hotplug.
-	 */
-	mutex_lock(&x86_cpu_microcode_mutex);
-
-	mc_saved_count_init = mc_saved_data.mc_saved_count;
-	mc_saved_count = mc_saved_data.mc_saved_count;
-	mc_saved = mc_saved_data.mc_saved;
-
-	if (mc_saved && mc_saved_count)
-		memcpy(mc_saved_tmp, mc_saved,
-		       mc_saved_count * sizeof(struct microcode_intel *));
-	/*
-	 * Save the microcode patch mc in mc_save_tmp structure if it's a newer
-	 * version.
-	 */
-	mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
-
-	/*
-	 * Save the mc_save_tmp in global mc_saved_data.
-	 */
-	ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
-	if (ret) {
-		pr_err("Cannot save microcode patch.\n");
-		goto out;
-	}
-
-	show_saved_mc();
-
-	/*
-	 * Free old saved microcode data.
-	 */
-	if (mc_saved) {
-		for (i = 0; i < mc_saved_count_init; i++)
-			kfree(mc_saved[i]);
-		kfree(mc_saved);
-	}
-
-out:
-	mutex_unlock(&x86_cpu_microcode_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(save_mc_for_early);
-#endif
-
-static bool __init load_builtin_intel_microcode(struct cpio_data *cp)
-{
-#ifdef CONFIG_X86_64
-	unsigned int eax = 0x00000001, ebx, ecx = 0, edx;
-	unsigned int family, model, stepping;
-	char name[30];
-
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	family   = __x86_family(eax);
-	model    = x86_model(eax);
-	stepping = eax & 0xf;
-
-	sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping);
-
-	return get_builtin_firmware(cp, name);
-#else
-	return false;
-#endif
-}
-
-static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
-static __init enum ucode_state
-scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
-	       unsigned long start, unsigned long size,
-	       struct ucode_cpu_info *uci)
-{
-	struct cpio_data cd;
-	long offset = 0;
-#ifdef CONFIG_X86_32
-	char *p = (char *)__pa_nodebug(ucode_name);
-#else
-	char *p = ucode_name;
-#endif
-
-	cd.data = NULL;
-	cd.size = 0;
-
-	cd = find_cpio_data(p, (void *)start, size, &offset);
-	if (!cd.data) {
-		if (!load_builtin_intel_microcode(&cd))
-			return UCODE_ERROR;
-	}
-
-	return get_matching_model_microcode(0, start, cd.data, cd.size,
-					    mc_saved_data, initrd, uci);
-}
-
-/*
- * Print ucode update info.
- */
-static void
-print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
-{
-	int cpu = smp_processor_id();
-
-	pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n",
-		cpu,
-		uci->cpu_sig.rev,
-		date & 0xffff,
-		date >> 24,
-		(date >> 16) & 0xff);
-}
-
-#ifdef CONFIG_X86_32
-
-static int delay_ucode_info;
-static int current_mc_date;
-
-/*
- * Print early updated ucode info after printk works. This is delayed info dump.
- */
-void show_ucode_info_early(void)
-{
-	struct ucode_cpu_info uci;
-
-	if (delay_ucode_info) {
-		collect_cpu_info_early(&uci);
-		print_ucode_info(&uci, current_mc_date);
-		delay_ucode_info = 0;
-	}
-}
-
-/*
- * At this point, we can not call printk() yet. Keep microcode patch number in
- * mc_saved_data.mc_saved and delay printing microcode info in
- * show_ucode_info_early() until printk() works.
- */
-static void print_ucode(struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_intel;
-	int *delay_ucode_info_p;
-	int *current_mc_date_p;
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return;
-
-	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
-	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
-
-	*delay_ucode_info_p = 1;
-	*current_mc_date_p = mc_intel->hdr.date;
-}
-#else
-
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-	__native_flush_tlb_global_irq_disabled();
-}
-
-static inline void print_ucode(struct ucode_cpu_info *uci)
-{
-	struct microcode_intel *mc_intel;
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return;
-
-	print_ucode_info(uci, mc_intel->hdr.date);
-}
-#endif
-
-static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
-{
-	struct microcode_intel *mc_intel;
-	unsigned int val[2];
-
-	mc_intel = uci->mc;
-	if (mc_intel == NULL)
-		return 0;
-
-	/* write microcode via MSR 0x79 */
-	native_wrmsr(MSR_IA32_UCODE_WRITE,
-	      (unsigned long) mc_intel->bits,
-	      (unsigned long) mc_intel->bits >> 16 >> 16);
-	native_wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-
-	/* As documented in the SDM: Do a CPUID 1 here */
-	sync_core();
-
-	/* get the current revision from MSR 0x8B */
-	native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
-	if (val[1] != mc_intel->hdr.rev)
-		return -1;
-
-#ifdef CONFIG_X86_64
-	/* Flush global tlb. This is precaution. */
-	flush_tlb_early();
-#endif
-	uci->cpu_sig.rev = val[1];
-
-	if (early)
-		print_ucode(uci);
-	else
-		print_ucode_info(uci, mc_intel->hdr.date);
-
-	return 0;
-}
-
-/*
- * This function converts microcode patch offsets previously stored in
- * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
- */
-int __init save_microcode_in_initrd_intel(void)
-{
-	unsigned int count = mc_saved_data.mc_saved_count;
-	struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
-	int ret = 0;
-
-	if (count == 0)
-		return ret;
-
-	copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
-	ret = save_microcode(&mc_saved_data, mc_saved, count);
-	if (ret)
-		pr_err("Cannot save microcode patches from initrd.\n");
-
-	show_saved_mc();
-
-	return ret;
-}
-
-static void __init
-_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-		      unsigned long *initrd,
-		      unsigned long start, unsigned long size)
-{
-	struct ucode_cpu_info uci;
-	enum ucode_state ret;
-
-	collect_cpu_info_early(&uci);
-
-	ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
-	if (ret != UCODE_OK)
-		return;
-
-	ret = load_microcode(mc_saved_data, initrd, start, &uci);
-	if (ret != UCODE_OK)
-		return;
-
-	apply_microcode_early(&uci, true);
-}
-
-void __init load_ucode_intel_bsp(void)
-{
-	u64 start, size;
-#ifdef CONFIG_X86_32
-	struct boot_params *p;
-
-	p	= (struct boot_params *)__pa_nodebug(&boot_params);
-	start	= p->hdr.ramdisk_image;
-	size	= p->hdr.ramdisk_size;
-
-	_load_ucode_intel_bsp(
-			(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-			(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-			start, size);
-#else
-	start	= boot_params.hdr.ramdisk_image + PAGE_OFFSET;
-	size	= boot_params.hdr.ramdisk_size;
-
-	_load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
-#endif
-}
-
-void load_ucode_intel_ap(void)
-{
-	struct mc_saved_data *mc_saved_data_p;
-	struct ucode_cpu_info uci;
-	unsigned long *mc_saved_in_initrd_p;
-	unsigned long initrd_start_addr;
-	enum ucode_state ret;
-#ifdef CONFIG_X86_32
-	unsigned long *initrd_start_p;
-
-	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
-	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
-#else
-	mc_saved_data_p = &mc_saved_data;
-	mc_saved_in_initrd_p = mc_saved_in_initrd;
-	initrd_start_addr = initrd_start;
-#endif
-
-	/*
-	 * If there is no valid ucode previously saved in memory, no need to
-	 * update ucode on this AP.
-	 */
-	if (mc_saved_data_p->mc_saved_count == 0)
-		return;
-
-	collect_cpu_info_early(&uci);
-	ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-			     initrd_start_addr, &uci);
-
-	if (ret != UCODE_OK)
-		return;
-
-	apply_microcode_early(&uci, true);
-}
-
-void reload_ucode_intel(void)
-{
-	struct ucode_cpu_info uci;
-	enum ucode_state ret;
-
-	if (!mc_saved_data.mc_saved_count)
-		return;
-
-	collect_cpu_info_early(&uci);
-
-	ret = load_microcode_early(mc_saved_data.mc_saved,
-				   mc_saved_data.mc_saved_count, &uci);
-	if (ret != UCODE_OK)
-		return;
-
-	apply_microcode_early(&uci, false);
-}
diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c
index 1883d252ff7d..b96896bcbdaf 100644
--- a/arch/x86/kernel/cpu/microcode/intel_lib.c
+++ b/arch/x86/kernel/cpu/microcode/intel_lib.c
@@ -25,7 +25,6 @@
 #include <linux/firmware.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 
 #include <asm/microcode_intel.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 381c8b9b3a33..20e242ea1bc4 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -34,11 +34,10 @@
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
 
-static void (*hv_kexec_handler)(void);
-static void (*hv_crash_handler)(struct pt_regs *regs);
-
 #if IS_ENABLED(CONFIG_HYPERV)
 static void (*vmbus_handler)(void);
+static void (*hv_kexec_handler)(void);
+static void (*hv_crash_handler)(struct pt_regs *regs);
 
 void hyperv_vector_handler(struct pt_regs *regs)
 {
@@ -96,8 +95,8 @@ void hv_remove_crash_handler(void)
 	hv_crash_handler = NULL;
 }
 EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
-#endif
 
+#ifdef CONFIG_KEXEC_CORE
 static void hv_machine_shutdown(void)
 {
 	if (kexec_in_progress && hv_kexec_handler)
@@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
 		hv_crash_handler(regs);
 	native_machine_crash_shutdown(regs);
 }
-
+#endif /* CONFIG_KEXEC_CORE */
+#endif /* CONFIG_HYPERV */
 
 static uint32_t  __init ms_hyperv_platform(void)
 {
@@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void)
 	no_timer_check = 1;
 #endif
 
+#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
 	machine_ops.shutdown = hv_machine_shutdown;
 	machine_ops.crash_shutdown = hv_machine_crash_shutdown;
+#endif
 	mark_tsc_unstable("running on Hyper-V");
 }
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 66dd3fe99b82..4562cf070c27 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
 	 * skip the schedulability test here, it will be performed
 	 * at commit time (->commit_txn) as a whole.
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto done_collect;
 
 	ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	 * XXX assumes any ->del() called during a TXN will only be on
 	 * an event added during that same TXN.
 	 */
-	if (cpuc->group_flag & PERF_EVENT_TXN)
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		return;
 
 	/*
@@ -1748,11 +1748,22 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Start group events scheduling transaction
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
  */
-static void x86_pmu_start_txn(struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(cpuc->txn_flags);		/* txn already in flight */
+
+	cpuc->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	perf_pmu_disable(pmu);
-	__this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
 	__this_cpu_write(cpu_hw_events.n_txn, 0);
 }
 
@@ -1763,7 +1774,16 @@ static void x86_pmu_start_txn(struct pmu *pmu)
  */
 static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
-	__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
+	unsigned int txn_flags;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuc->txn_flags;
+	cpuc->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
 	/*
 	 * Truncate collected array by the number of events added in this
 	 * transaction. See x86_pmu_add() and x86_pmu_*_txn().
@@ -1786,6 +1806,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	int assign[X86_PMC_IDX_MAX];
 	int n, ret;
 
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuc->txn_flags = 0;
+		return 0;
+	}
+
 	n = cpuc->n_events;
 
 	if (!x86_pmu_initialized())
@@ -1801,7 +1828,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
 	 */
 	memcpy(cpuc->assign, assign, n*sizeof(int));
 
-	cpuc->group_flag &= ~PERF_EVENT_TXN;
+	cpuc->txn_flags = 0;
 	perf_pmu_enable(pmu);
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 5edf6d868fc1..499f533dd3cc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -47,6 +47,7 @@ enum extra_reg_type {
 	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
 	EXTRA_REG_LBR   = 2,	/* lbr_select */
 	EXTRA_REG_LDLAT = 3,	/* ld_lat_threshold */
+	EXTRA_REG_FE    = 4,    /* fe_* */
 
 	EXTRA_REG_MAX		/* number of entries needed */
 };
@@ -195,7 +196,7 @@ struct cpu_hw_events {
 
 	int			n_excl; /* the number of exclusive events */
 
-	unsigned int		group_flag;
+	unsigned int		txn_flags;
 	int			is_fake;
 
 	/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3fefebfbdf4b..f63360be2238 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
 	INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+	/*
+	 * Note the low 8 bits eventsel code is not a continuous field, containing
+	 * some #GPing bits. These are masked out.
+	 */
+	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
 	EVENT_EXTRA_END
 };
 
@@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
-	INTEL_EVENT_CONSTRAINT(0xa3, 0x4),	/* CYCLE_ACTIVITY.* */
+	INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4),	/* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
 	EVENT_CONSTRAINT_END
 };
 
@@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
 
+PMU_FORMAT_ATTR(frontend, "config1:0-23");
+
 static struct attribute *intel_arch3_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+static struct attribute *skl_format_attr[] = {
+	&format_attr_frontend.attr,
+	NULL,
+};
+
 static __initconst const struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
@@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
-		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+						  skl_format_attr);
 		WARN_ON(!x86_pmu.format_attrs);
 		x86_pmu.cpu_events = hsw_events_attrs;
 		pr_cont("Skylake events, ");
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index d1c0f254afbe..2cad71d1b14c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -495,6 +495,19 @@ static int bts_event_init(struct perf_event *event)
 	if (x86_add_exclusive(x86_lbr_exclusive_bts))
 		return -EBUSY;
 
+	/*
+	 * BTS leaks kernel addresses even when CPL0 tracing is
+	 * disabled, so disallow intel_bts driver for unprivileged
+	 * users on paranoid systems since it provides trace data
+	 * to the user in a zero-copy fashion.
+	 *
+	 * Note that the default paranoia setting permits unprivileged
+	 * users to profile the kernel.
+	 */
+	if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	ret = x86_reserve_hardware();
 	if (ret) {
 		x86_del_exclusive(x86_lbr_exclusive_bts);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
new file mode 100644
index 000000000000..75a38b5a2e26
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ *  - 'cstate_core': The counter is available for each physical core.
+ *    The counters include CORE_C*_RESIDENCY.
+ *  - 'cstate_pkg': The counter is available for each physical package.
+ *    The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ *	MSR_CORE_C1_RES: CORE C1 Residency Counter
+ *			 perf code: 0x00
+ *			 Available model: SLM,AMT
+ *			 Scope: Core (each processor core has a MSR)
+ *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ *			       perf code: 0x01
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Core
+ *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ *			       perf code: 0x02
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Core
+ *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ *			       perf code: 0x03
+ *			       Available model: SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Core
+ *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
+ *			       perf code: 0x00
+ *			       Available model: SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
+ *			       perf code: 0x01
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
+ *			       perf code: 0x02
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
+ *			       perf code: 0x03
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
+ *			       perf code: 0x04
+ *			       Available model: HSW ULT only
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
+ *			       perf code: 0x05
+ *			       Available model: HSW ULT only
+ *			       Scope: Package (physical package)
+ *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ *			       perf code: 0x06
+ *			       Available model: HSW ULT only
+ *			       Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
+static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
+				struct kobj_attribute *attr,	\
+				char *page)			\
+{								\
+	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
+	return sprintf(page, _format "\n");			\
+}								\
+static struct kobj_attribute format_attr_##_var =		\
+	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf);
+
+struct perf_cstate_msr {
+	u64	msr;
+	struct	perf_pmu_events_attr *attr;
+	bool	(*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+	/*
+	 * cstate_core events
+	 */
+	PERF_CSTATE_CORE_C1_RES = 0,
+	PERF_CSTATE_CORE_C3_RES,
+	PERF_CSTATE_CORE_C6_RES,
+	PERF_CSTATE_CORE_C7_RES,
+
+	PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_data.x86 != 6)
+		return false;
+
+	switch (boot_cpu_data.x86_model) {
+	case 30: /* 45nm Nehalem    */
+	case 26: /* 45nm Nehalem-EP */
+	case 46: /* 45nm Nehalem-EX */
+
+	case 37: /* 32nm Westmere    */
+	case 44: /* 32nm Westmere-EP */
+	case 47: /* 32nm Westmere-EX */
+		if (idx == PERF_CSTATE_CORE_C3_RES ||
+		    idx == PERF_CSTATE_CORE_C6_RES)
+			return true;
+		break;
+	case 42: /* 32nm SandyBridge         */
+	case 45: /* 32nm SandyBridge-E/EN/EP */
+
+	case 58: /* 22nm IvyBridge       */
+	case 62: /* 22nm IvyBridge-EP/EX */
+
+	case 60: /* 22nm Haswell Core */
+	case 63: /* 22nm Haswell Server */
+	case 69: /* 22nm Haswell ULT */
+	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+	case 79: /* 14nm Broadwell Server */
+
+	case 78: /* 14nm Skylake Mobile */
+	case 94: /* 14nm Skylake Desktop */
+		if (idx == PERF_CSTATE_CORE_C3_RES ||
+		    idx == PERF_CSTATE_CORE_C6_RES ||
+		    idx == PERF_CSTATE_CORE_C7_RES)
+			return true;
+		break;
+	case 55: /* 22nm Atom "Silvermont"                */
+	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+	case 76: /* 14nm Atom "Airmont"                   */
+		if (idx == PERF_CSTATE_CORE_C1_RES ||
+		    idx == PERF_CSTATE_CORE_C6_RES)
+			return true;
+		break;
+	}
+
+	return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&evattr_cstate_core_c1,	test_core, },
+	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&evattr_cstate_core_c3, test_core, },
+	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&evattr_cstate_core_c6, test_core, },
+	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&evattr_cstate_core_c7,	test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+	NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+	.name = "events",
+	.attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+	&format_attr_core_event.attr,
+	NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+	.name = "format",
+	.attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+	.attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+	&core_events_attr_group,
+	&core_format_attr_group,
+	&cpumask_attr_group,
+	NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+	/*
+	 * cstate_pkg events
+	 */
+	PERF_CSTATE_PKG_C2_RES = 0,
+	PERF_CSTATE_PKG_C3_RES,
+	PERF_CSTATE_PKG_C6_RES,
+	PERF_CSTATE_PKG_C7_RES,
+	PERF_CSTATE_PKG_C8_RES,
+	PERF_CSTATE_PKG_C9_RES,
+	PERF_CSTATE_PKG_C10_RES,
+
+	PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+	    boot_cpu_data.x86 != 6)
+		return false;
+
+	switch (boot_cpu_data.x86_model) {
+	case 30: /* 45nm Nehalem    */
+	case 26: /* 45nm Nehalem-EP */
+	case 46: /* 45nm Nehalem-EX */
+
+	case 37: /* 32nm Westmere    */
+	case 44: /* 32nm Westmere-EP */
+	case 47: /* 32nm Westmere-EX */
+		if (idx == PERF_CSTATE_CORE_C3_RES ||
+		    idx == PERF_CSTATE_CORE_C6_RES ||
+		    idx == PERF_CSTATE_CORE_C7_RES)
+			return true;
+		break;
+	case 42: /* 32nm SandyBridge         */
+	case 45: /* 32nm SandyBridge-E/EN/EP */
+
+	case 58: /* 22nm IvyBridge       */
+	case 62: /* 22nm IvyBridge-EP/EX */
+
+	case 60: /* 22nm Haswell Core */
+	case 63: /* 22nm Haswell Server */
+	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+	case 61: /* 14nm Broadwell Core-M */
+	case 86: /* 14nm Broadwell Xeon D */
+	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+	case 79: /* 14nm Broadwell Server */
+
+	case 78: /* 14nm Skylake Mobile */
+	case 94: /* 14nm Skylake Desktop */
+		if (idx == PERF_CSTATE_PKG_C2_RES ||
+		    idx == PERF_CSTATE_PKG_C3_RES ||
+		    idx == PERF_CSTATE_PKG_C6_RES ||
+		    idx == PERF_CSTATE_PKG_C7_RES)
+			return true;
+		break;
+	case 55: /* 22nm Atom "Silvermont"                */
+	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+	case 76: /* 14nm Atom "Airmont"                   */
+		if (idx == PERF_CSTATE_CORE_C6_RES)
+			return true;
+		break;
+	case 69: /* 22nm Haswell ULT */
+		if (idx == PERF_CSTATE_PKG_C2_RES ||
+		    idx == PERF_CSTATE_PKG_C3_RES ||
+		    idx == PERF_CSTATE_PKG_C6_RES ||
+		    idx == PERF_CSTATE_PKG_C7_RES ||
+		    idx == PERF_CSTATE_PKG_C8_RES ||
+		    idx == PERF_CSTATE_PKG_C9_RES ||
+		    idx == PERF_CSTATE_PKG_C10_RES)
+			return true;
+		break;
+	}
+
+	return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+	[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,	&evattr_cstate_pkg_c2,	test_pkg, },
+	[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,	&evattr_cstate_pkg_c3,	test_pkg, },
+	[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,	&evattr_cstate_pkg_c6,	test_pkg, },
+	[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,	&evattr_cstate_pkg_c7,	test_pkg, },
+	[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,	&evattr_cstate_pkg_c8,	test_pkg, },
+	[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,	&evattr_cstate_pkg_c9,	test_pkg, },
+	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&evattr_cstate_pkg_c10,	test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+	NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+	.name = "events",
+	.attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+	&format_attr_pkg_event.attr,
+	NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+	.name = "format",
+	.attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+	&pkg_events_attr_group,
+	&pkg_format_attr_group,
+	&cpumask_attr_group,
+	NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+
+	if (pmu == &cstate_core_pmu)
+		return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+	else if (pmu == &cstate_pkg_pmu)
+		return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+	else
+		return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+	int ret = 0;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    event->attr.sample_period) /* no sampling */
+		return -EINVAL;
+
+	if (event->pmu == &cstate_core_pmu) {
+		if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+			return -EINVAL;
+		if (!core_msr[cfg].attr)
+			return -EINVAL;
+		event->hw.event_base = core_msr[cfg].msr;
+	} else if (event->pmu == &cstate_pkg_pmu) {
+		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+			return -EINVAL;
+		if (!pkg_msr[cfg].attr)
+			return -EINVAL;
+		event->hw.event_base = pkg_msr[cfg].msr;
+	} else
+		return -ENOENT;
+
+	/* must be done before validate_group */
+	event->hw.config = cfg;
+	event->hw.idx = -1;
+
+	return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+	u64 val;
+
+	rdmsrl(event->hw.event_base, val);
+	return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = cstate_pmu_read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count)
+		goto again;
+
+	local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+	local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+	cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+	cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+	if (mode & PERF_EF_START)
+		cstate_pmu_event_start(event, mode);
+
+	return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+	int i, id, target;
+
+	/* cpu exit for cstate core */
+	if (has_cstate_core) {
+		id = topology_core_id(cpu);
+		target = -1;
+
+		for_each_online_cpu(i) {
+			if (i == cpu)
+				continue;
+			if (id == topology_core_id(i)) {
+				target = i;
+				break;
+			}
+		}
+		if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+			cpumask_set_cpu(target, &cstate_core_cpu_mask);
+		WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+		if (target >= 0)
+			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+	}
+
+	/* cpu exit for cstate pkg */
+	if (has_cstate_pkg) {
+		id = topology_physical_package_id(cpu);
+		target = -1;
+
+		for_each_online_cpu(i) {
+			if (i == cpu)
+				continue;
+			if (id == topology_physical_package_id(i)) {
+				target = i;
+				break;
+			}
+		}
+		if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+		WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+		if (target >= 0)
+			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+	}
+}
+
+static void cstate_cpu_init(int cpu)
+{
+	int i, id;
+
+	/* cpu init for cstate core */
+	if (has_cstate_core) {
+		id = topology_core_id(cpu);
+		for_each_cpu(i, &cstate_core_cpu_mask) {
+			if (id == topology_core_id(i))
+				break;
+		}
+		if (i >= nr_cpu_ids)
+			cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+	}
+
+	/* cpu init for cstate pkg */
+	if (has_cstate_pkg) {
+		id = topology_physical_package_id(cpu);
+		for_each_cpu(i, &cstate_pkg_cpu_mask) {
+			if (id == topology_physical_package_id(i))
+				break;
+		}
+		if (i >= nr_cpu_ids)
+			cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+	}
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+				  unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		break;
+	case CPU_STARTING:
+		cstate_cpu_init(cpu);
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_DYING:
+		break;
+	case CPU_ONLINE:
+	case CPU_DEAD:
+		break;
+	case CPU_DOWN_PREPARE:
+		cstate_cpu_exit(cpu);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+			     struct attribute	**events_attrs,
+			     int max_event_nr)
+{
+	int i, j = 0;
+	u64 val;
+
+	/* Probe the cstate events. */
+	for (i = 0; i < max_event_nr; i++) {
+		if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+			msr[i].attr = NULL;
+	}
+
+	/* List remaining events in the sysfs attrs. */
+	for (i = 0; i < max_event_nr; i++) {
+		if (msr[i].attr)
+			events_attrs[j++] = &msr[i].attr->attr.attr;
+	}
+	events_attrs[j] = NULL;
+
+	return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+	/* SLM has different MSR for PKG C6 */
+	switch (boot_cpu_data.x86_model) {
+	case 55:
+	case 76:
+	case 77:
+		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+	}
+
+	if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+		has_cstate_core = true;
+
+	if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+		has_cstate_pkg = true;
+
+	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+	int cpu;
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu)
+		cstate_cpu_init(cpu);
+
+	__perf_cpu_notifier(cstate_cpu_notifier);
+
+	cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+	.attr_groups	= core_attr_groups,
+	.name		= "cstate_core",
+	.task_ctx_nr	= perf_invalid_context,
+	.event_init	= cstate_pmu_event_init,
+	.add		= cstate_pmu_event_add, /* must have */
+	.del		= cstate_pmu_event_del, /* must have */
+	.start		= cstate_pmu_event_start,
+	.stop		= cstate_pmu_event_stop,
+	.read		= cstate_pmu_event_update,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+	.attr_groups	= pkg_attr_groups,
+	.name		= "cstate_pkg",
+	.task_ctx_nr	= perf_invalid_context,
+	.event_init	= cstate_pmu_event_init,
+	.add		= cstate_pmu_event_add, /* must have */
+	.del		= cstate_pmu_event_del, /* must have */
+	.start		= cstate_pmu_event_start,
+	.stop		= cstate_pmu_event_stop,
+	.read		= cstate_pmu_event_update,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+	int err;
+
+	if (has_cstate_core) {
+		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+		if (WARN_ON(err))
+			pr_info("Failed to register PMU %s error %d\n",
+				cstate_core_pmu.name, err);
+	}
+
+	if (has_cstate_pkg) {
+		err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+		if (WARN_ON(err))
+			pr_info("Failed to register PMU %s error %d\n",
+				cstate_pkg_pmu.name, err);
+	}
+}
+
+static int __init cstate_pmu_init(void)
+{
+	int err;
+
+	if (cpu_has_hypervisor)
+		return -ENODEV;
+
+	err = cstate_init();
+	if (err)
+		return err;
+
+	cstate_cpumask_init();
+
+	cstate_pmus_register();
+
+	return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 84f236ab96b0..5db1c7755548 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -510,10 +510,11 @@ int intel_pmu_drain_bts_buffer(void)
 		u64	flags;
 	};
 	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
+	struct bts_record *at, *base, *top;
 	struct perf_output_handle handle;
 	struct perf_event_header header;
 	struct perf_sample_data data;
+	unsigned long skip = 0;
 	struct pt_regs regs;
 
 	if (!event)
@@ -522,10 +523,10 @@ int intel_pmu_drain_bts_buffer(void)
 	if (!x86_pmu.bts_active)
 		return 0;
 
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
+	base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top  = (struct bts_record *)(unsigned long)ds->bts_index;
 
-	if (top <= at)
+	if (top <= base)
 		return 0;
 
 	memset(&regs, 0, sizeof(regs));
@@ -535,16 +536,43 @@ int intel_pmu_drain_bts_buffer(void)
 	perf_sample_data_init(&data, 0, event->hw.last_period);
 
 	/*
+	 * BTS leaks kernel addresses in branches across the cpl boundary,
+	 * such as traps or system calls, so unless the user is asking for
+	 * kernel tracing (and right now it's not possible), we'd need to
+	 * filter them out. But first we need to count how many of those we
+	 * have in the current batch. This is an extra O(n) pass, however,
+	 * it's much faster than the other one especially considering that
+	 * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+	 * alloc_bts_buffer()).
+	 */
+	for (at = base; at < top; at++) {
+		/*
+		 * Note that right now *this* BTS code only works if
+		 * attr::exclude_kernel is set, but let's keep this extra
+		 * check here in case that changes.
+		 */
+		if (event->attr.exclude_kernel &&
+		    (kernel_ip(at->from) || kernel_ip(at->to)))
+			skip++;
+	}
+
+	/*
 	 * Prepare a generic sample, i.e. fill in the invariant fields.
 	 * We will overwrite the from and to address before we output
 	 * the sample.
 	 */
 	perf_prepare_sample(&header, &data, event, &regs);
 
-	if (perf_output_begin(&handle, event, header.size * (top - at)))
+	if (perf_output_begin(&handle, event, header.size *
+			      (top - base - skip)))
 		return 1;
 
-	for (; at < top; at++) {
+	for (at = base; at < top; at++) {
+		/* Filter out any records that contain kernel addresses. */
+		if (event->attr.exclude_kernel &&
+		    (kernel_ip(at->from) || kernel_ip(at->to)))
+			continue;
+
 		data.ip		= at->from;
 		data.addr	= at->to;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index b2c9475b7ff2..bfd0b717e944 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -151,10 +151,10 @@ static void __intel_pmu_lbr_enable(bool pmi)
 	 * No need to reprogram LBR_SELECT in a PMI, as it
 	 * did not change.
 	 */
-	if (cpuc->lbr_sel && !pmi) {
+	if (cpuc->lbr_sel)
 		lbr_select = cpuc->lbr_sel->config;
+	if (!pmi)
 		wrmsrl(MSR_LBR_SELECT, lbr_select);
-	}
 
 	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 	orig_debugctl = debugctl;
@@ -555,6 +555,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
 		mask |= X86_BR_IND_JMP;
 
+	if (br_type & PERF_SAMPLE_BRANCH_CALL)
+		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -890,6 +892,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
 };
 
 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -905,6 +908,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
 						| LBR_RETURN | LBR_CALL_STACK,
 	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
 };
 
 /* core */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 42169283448b..868e1194337f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -139,9 +139,6 @@ static int __init pt_pmu_hw_init(void)
 	long i;
 
 	attrs = NULL;
-	ret = -ENODEV;
-	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
-		goto fail;
 
 	for (i = 0; i < PT_CPUID_LEAVES; i++) {
 		cpuid_count(20, i,
@@ -1130,6 +1127,10 @@ static __init int pt_init(void)
 	int ret, cpu, prior_warn = 0;
 
 	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+		return -ENODEV;
+
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		u64 ctl;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 560e5255b15e..61215a69b03d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
 static bool pcidrv_registered;
 struct pci_driver *uncore_pci_driver;
 /* pci bus to socket mapping */
-int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 
 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
@@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed =
 struct event_constraint uncore_constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
+int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+	struct pci2phy_map *map;
+	int phys_id = -1;
+
+	raw_spin_lock(&pci2phy_map_lock);
+	list_for_each_entry(map, &pci2phy_map_head, list) {
+		if (map->segment == pci_domain_nr(bus)) {
+			phys_id = map->pbus_to_physid[bus->number];
+			break;
+		}
+	}
+	raw_spin_unlock(&pci2phy_map_lock);
+
+	return phys_id;
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+	struct pci2phy_map *map, *alloc = NULL;
+	int i;
+
+	lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+	list_for_each_entry(map, &pci2phy_map_head, list) {
+		if (map->segment == segment)
+			goto end;
+	}
+
+	if (!alloc) {
+		raw_spin_unlock(&pci2phy_map_lock);
+		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+		raw_spin_lock(&pci2phy_map_lock);
+
+		if (!alloc)
+			return NULL;
+
+		goto lookup;
+	}
+
+	map = alloc;
+	alloc = NULL;
+	map->segment = segment;
+	for (i = 0; i < 256; i++)
+		map->pbus_to_physid[i] = -1;
+	list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+	kfree(alloc);
+	return map;
+}
+
 ssize_t uncore_event_show(struct kobject *kobj,
 			  struct kobj_attribute *attr, char *buf)
 {
@@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	int phys_id;
 	bool first_box = false;
 
-	phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+	phys_id = uncore_pcibus_to_physid(pdev->bus);
 	if (phys_id < 0)
 		return -ENODEV;
 
@@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev)
 {
 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
 	struct intel_uncore_pmu *pmu;
-	int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
+	int i, cpu, phys_id;
 	bool last_box = false;
 
+	phys_id = uncore_pcibus_to_physid(pdev->bus);
 	box = pci_get_drvdata(pdev);
 	if (!box) {
 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 72c54c2e5b1a..2f0a4a98e16b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -117,6 +117,15 @@ struct uncore_event_desc {
 	const char *config;
 };
 
+struct pci2phy_map {
+	struct list_head list;
+	int segment;
+	int pbus_to_physid[256];
+};
+
+int uncore_pcibus_to_physid(struct pci_bus *bus);
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
 ssize_t uncore_event_show(struct kobject *kobj,
 			  struct kobj_attribute *attr, char *buf);
 
@@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
 extern struct intel_uncore_type **uncore_msr_uncores;
 extern struct intel_uncore_type **uncore_pci_uncores;
 extern struct pci_driver *uncore_pci_driver;
-extern int uncore_pcibus_to_physid[256];
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
 extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
 extern struct event_constraint uncore_constraint_empty;
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index f78574b3cb55..845256158a10 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 static int snb_pci2phy_map_init(int devid)
 {
 	struct pci_dev *dev = NULL;
-	int bus;
+	struct pci2phy_map *map;
+	int bus, segment;
 
 	dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
 	if (!dev)
 		return -ENOTTY;
 
 	bus = dev->bus->number;
-
-	uncore_pcibus_to_physid[bus] = 0;
+	segment = pci_domain_nr(dev->bus);
+
+	raw_spin_lock(&pci2phy_map_lock);
+	map = __find_pci2phy_map(segment);
+	if (!map) {
+		raw_spin_unlock(&pci2phy_map_lock);
+		pci_dev_put(dev);
+		return -ENOMEM;
+	}
+	map->pbus_to_physid[bus] = 0;
+	raw_spin_unlock(&pci2phy_map_lock);
 
 	pci_dev_put(dev);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 694510a887dc..f0f4fcba252e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
 static int snbep_pci2phy_map_init(int devid)
 {
 	struct pci_dev *ubox_dev = NULL;
-	int i, bus, nodeid;
+	int i, bus, nodeid, segment;
+	struct pci2phy_map *map;
 	int err = 0;
 	u32 config = 0;
 
@@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid)
 		err = pci_read_config_dword(ubox_dev, 0x54, &config);
 		if (err)
 			break;
+
+		segment = pci_domain_nr(ubox_dev->bus);
+		raw_spin_lock(&pci2phy_map_lock);
+		map = __find_pci2phy_map(segment);
+		if (!map) {
+			raw_spin_unlock(&pci2phy_map_lock);
+			err = -ENOMEM;
+			break;
+		}
+
 		/*
 		 * every three bits in the Node ID mapping register maps
 		 * to a particular node.
 		 */
 		for (i = 0; i < 8; i++) {
 			if (nodeid == ((config >> (3 * i)) & 0x7)) {
-				uncore_pcibus_to_physid[bus] = i;
+				map->pbus_to_physid[bus] = i;
 				break;
 			}
 		}
+		raw_spin_unlock(&pci2phy_map_lock);
 	}
 
 	if (!err) {
@@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid)
 		 * For PCI bus with no UBOX device, find the next bus
 		 * that has UBOX device and use its mapping.
 		 */
-		i = -1;
-		for (bus = 255; bus >= 0; bus--) {
-			if (uncore_pcibus_to_physid[bus] >= 0)
-				i = uncore_pcibus_to_physid[bus];
-			else
-				uncore_pcibus_to_physid[bus] = i;
+		raw_spin_lock(&pci2phy_map_lock);
+		list_for_each_entry(map, &pci2phy_map_head, list) {
+			i = -1;
+			for (bus = 255; bus >= 0; bus--) {
+				if (map->pbus_to_physid[bus] >= 0)
+					i = map->pbus_to_physid[bus];
+				else
+					map->pbus_to_physid[bus] = i;
+			}
 		}
+		raw_spin_unlock(&pci2phy_map_lock);
 	}
 
 	pci_dev_put(ubox_dev);
@@ -2444,7 +2460,7 @@ static struct intel_uncore_type *bdx_pci_uncores[] = {
 	NULL,
 };
 
-static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = {
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
 	{ /* Home Agent 0 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
 		.driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
index 086b12eae794..f32ac13934f2 100644
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -10,12 +10,12 @@ enum perf_msr_id {
 	PERF_MSR_EVENT_MAX,
 };
 
-bool test_aperfmperf(int idx)
+static bool test_aperfmperf(int idx)
 {
 	return boot_cpu_has(X86_FEATURE_APERFMPERF);
 }
 
-bool test_intel(int idx)
+static bool test_intel(int idx)
 {
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 	    boot_cpu_data.x86 != 6)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 3d423a101fae..608fb26c7254 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 		{ X86_FEATURE_PLN,		CR_EAX, 4, 0x00000006, 0 },
 		{ X86_FEATURE_PTS,		CR_EAX, 6, 0x00000006, 0 },
 		{ X86_FEATURE_HWP,		CR_EAX, 7, 0x00000006, 0 },
-		{ X86_FEATURE_HWP_NOITFY,	CR_EAX, 8, 0x00000006, 0 },
+		{ X86_FEATURE_HWP_NOTIFY,	CR_EAX, 8, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_ACT_WINDOW,	CR_EAX, 9, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_EPP,		CR_EAX,10, 0x00000006, 0 },
 		{ X86_FEATURE_HWP_PKG_REQ,	CR_EAX,11, 0x00000006, 0 },
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d6683dba..2c1910f6717e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -75,8 +75,6 @@ struct crash_memmap_data {
 	unsigned int type;
 };
 
-int in_crash_kexec;
-
 /*
  * This is used to VMCLEAR all VMCSs loaded on the
  * processor. And when loading kvm_intel module, the
@@ -132,7 +130,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 
 static void kdump_nmi_shootdown_cpus(void)
 {
-	in_crash_kexec = 1;
 	nmi_shootdown_cpus(kdump_nmi_callback);
 
 	disable_local_APIC();
@@ -185,10 +182,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(unsigned long start_pfn,
-				unsigned long nr_pfn, void *arg)
+static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
 {
-	int *nr_ranges = arg;
+	unsigned int *nr_ranges = arg;
 
 	(*nr_ranges)++;
 	return 0;
@@ -214,7 +210,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
 
 	ced->image = image;
 
-	walk_system_ram_range(0, -1, &nr_ranges,
+	walk_system_ram_res(0, -1, &nr_ranges,
 				get_nr_ram_ranges_callback);
 
 	ced->max_nr_ranges = nr_ranges;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a102564d08eb..569c1e4f96fe 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -911,7 +911,7 @@ void __init finish_e820_parsing(void)
 	}
 }
 
-static inline const char *e820_type_to_string(int e820_type)
+static const char *e820_type_to_string(int e820_type)
 {
 	switch (e820_type) {
 	case E820_RESERVED_KERN:
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f9cc682e561..db9a675e751b 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -584,7 +584,7 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
 static void __init force_disable_hpet(int num, int slot, int func)
 {
 #ifdef CONFIG_HPET_TIMER
-	boot_hpet_disable = 1;
+	boot_hpet_disable = true;
 	pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
 #endif
 }
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index eec40f595ab9..21bf92490a7b 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -195,14 +195,14 @@ static __init void early_serial_init(char *s)
 #ifdef CONFIG_PCI
 static void mem32_serial_out(unsigned long addr, int offset, int value)
 {
-	u32 *vaddr = (u32 *)addr;
+	u32 __iomem *vaddr = (u32 __iomem *)addr;
 	/* shift implied by pointer type */
 	writel(value, vaddr + offset);
 }
 
 static unsigned int mem32_serial_in(unsigned long addr, int offset)
 {
-	u32 *vaddr = (u32 *)addr;
+	u32 __iomem *vaddr = (u32 __iomem *)addr;
 	/* shift implied by pointer type */
 	return readl(vaddr + offset);
 }
@@ -316,7 +316,7 @@ static struct console early_serial_console = {
 	.index =	-1,
 };
 
-static inline void early_console_register(struct console *con, int keep_early)
+static void early_console_register(struct console *con, int keep_early)
 {
 	if (con->index != -1) {
 		printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d14e9ac3235a..be39b5fde4b9 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -290,11 +290,11 @@ static void __init fpu__init_system_ctx_switch(void)
 	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
 		eagerfpu = ENABLE;
 
-	if (xfeatures_mask & XSTATE_EAGER) {
+	if (xfeatures_mask & XFEATURE_MASK_EAGER) {
 		if (eagerfpu == DISABLE) {
 			pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
-			       xfeatures_mask & XSTATE_EAGER);
-			xfeatures_mask &= ~XSTATE_EAGER;
+			       xfeatures_mask & XFEATURE_MASK_EAGER);
+			xfeatures_mask &= ~XFEATURE_MASK_EAGER;
 		} else {
 			eagerfpu = ENABLE;
 		}
@@ -354,17 +354,7 @@ static int __init x86_noxsave_setup(char *s)
 	if (strlen(s))
 		return 0;
 
-	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
-	setup_clear_cpu_cap(X86_FEATURE_MPX);
+	fpu__xstate_clear_all_cpu_caps();
 
 	return 1;
 }
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index dc60810c1c74..0bc3490420c5 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -66,7 +66,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	 * presence of FP and SSE state.
 	 */
 	if (cpu_has_xsave)
-		fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE;
+		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 
 	return ret;
 }
@@ -326,7 +326,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	 * presence of FP.
 	 */
 	if (cpu_has_xsave)
-		fpu->state.xsave.header.xfeatures |= XSTATE_FP;
+		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
 	return ret;
 }
 
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 50ec9af1bd51..ef29b742cea7 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -56,7 +56,7 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
 	if (use_fxsr()) {
 		struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
 		struct user_i387_ia32_struct env;
-		struct _fpstate_ia32 __user *fp = buf;
+		struct _fpstate_32 __user *fp = buf;
 
 		convert_from_fxsr(&env, tsk);
 
@@ -107,7 +107,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	 * header as well as change any contents in the memory layout.
 	 * xrestore as part of sigreturn will capture all the changes.
 	 */
-	xfeatures |= XSTATE_FPSSE;
+	xfeatures |= XFEATURE_MASK_FPSSE;
 
 	err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
 
@@ -165,7 +165,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(current, NULL, 0,
 			sizeof(struct user_i387_ia32_struct), NULL,
-			(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
+			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
 	if (fpregs_active()) {
 		/* Save the live register state to the user directly. */
@@ -207,7 +207,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 		 * layout and not enabled by the OS.
 		 */
 		if (fx_only)
-			header->xfeatures = XSTATE_FPSSE;
+			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
 			header->xfeatures &= (xfeatures_mask & xfeatures);
 	}
@@ -230,7 +230,7 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_
 {
 	if (use_xsave()) {
 		if ((unsigned long)buf % 64 || fx_only) {
-			u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE;
+			u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
 			copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
 			return copy_user_to_fxregs(buf);
 		} else {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 62fc001c7846..6454f2731b56 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -31,12 +31,28 @@ static const char *xfeature_names[] =
  */
 u64 xfeatures_mask __read_mostly;
 
-static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
-static unsigned int xstate_sizes[XFEATURES_NR_MAX]   = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
+static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
+static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
-/* The number of supported xfeatures in xfeatures_mask: */
-static unsigned int xfeatures_nr;
+/*
+ * Clear all of the X86_FEATURE_* bits that are unavailable
+ * when the CPU has no XSAVE support.
+ */
+void fpu__xstate_clear_all_cpu_caps(void)
+{
+	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+	setup_clear_cpu_cap(X86_FEATURE_AVX);
+	setup_clear_cpu_cap(X86_FEATURE_AVX2);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
+	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
+	setup_clear_cpu_cap(X86_FEATURE_MPX);
+}
 
 /*
  * Return whether the system supports a given xfeature.
@@ -53,7 +69,7 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 		/*
 		 * So we use FLS here to be able to print the most advanced
 		 * feature that was requested but is missing. So if a driver
-		 * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
+		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
 		 * missing AVX feature - this is the most informative message
 		 * to users:
 		 */
@@ -112,7 +128,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	/*
 	 * FP is in init state
 	 */
-	if (!(xfeatures & XSTATE_FP)) {
+	if (!(xfeatures & XFEATURE_MASK_FP)) {
 		fx->cwd = 0x37f;
 		fx->swd = 0;
 		fx->twd = 0;
@@ -125,7 +141,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	/*
 	 * SSE is in init state
 	 */
-	if (!(xfeatures & XSTATE_SSE))
+	if (!(xfeatures & XFEATURE_MASK_SSE))
 		memset(&fx->xmm_space[0], 0, 256);
 
 	/*
@@ -169,25 +185,43 @@ void fpu__init_cpu_xstate(void)
 }
 
 /*
+ * Note that in the future we will likely need a pair of
+ * functions here: one for user xstates and the other for
+ * system xstates.  For now, they are the same.
+ */
+static int xfeature_enabled(enum xfeature xfeature)
+{
+	return !!(xfeatures_mask & (1UL << xfeature));
+}
+
+/*
  * Record the offsets and sizes of various xstates contained
  * in the XSAVE state memory layout.
- *
- * ( Note that certain features might be non-present, for them
- *   we'll have 0 offset and 0 size. )
  */
 static void __init setup_xstate_features(void)
 {
-	u32 eax, ebx, ecx, edx, leaf;
-
-	xfeatures_nr = fls64(xfeatures_mask);
-
-	for (leaf = 2; leaf < xfeatures_nr; leaf++) {
-		cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
-
-		xstate_offsets[leaf] = ebx;
-		xstate_sizes[leaf] = eax;
+	u32 eax, ebx, ecx, edx, i;
+	/* start at the beginnning of the "extended state" */
+	unsigned int last_good_offset = offsetof(struct xregs_state,
+						 extended_state_area);
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i))
+			continue;
+
+		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+		xstate_offsets[i] = ebx;
+		xstate_sizes[i] = eax;
+		/*
+		 * In our xstate size checks, we assume that the
+		 * highest-numbered xstate feature has the
+		 * highest offset in the buffer.  Ensure it does.
+		 */
+		WARN_ONCE(last_good_offset > xstate_offsets[i],
+			"x86/fpu: misordered xstate at %d\n", last_good_offset);
+		last_good_offset = xstate_offsets[i];
 
-		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
+		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax);
 	}
 }
 
@@ -204,14 +238,14 @@ static void __init print_xstate_feature(u64 xstate_mask)
  */
 static void __init print_xstate_features(void)
 {
-	print_xstate_feature(XSTATE_FP);
-	print_xstate_feature(XSTATE_SSE);
-	print_xstate_feature(XSTATE_YMM);
-	print_xstate_feature(XSTATE_BNDREGS);
-	print_xstate_feature(XSTATE_BNDCSR);
-	print_xstate_feature(XSTATE_OPMASK);
-	print_xstate_feature(XSTATE_ZMM_Hi256);
-	print_xstate_feature(XSTATE_Hi16_ZMM);
+	print_xstate_feature(XFEATURE_MASK_FP);
+	print_xstate_feature(XFEATURE_MASK_SSE);
+	print_xstate_feature(XFEATURE_MASK_YMM);
+	print_xstate_feature(XFEATURE_MASK_BNDREGS);
+	print_xstate_feature(XFEATURE_MASK_BNDCSR);
+	print_xstate_feature(XFEATURE_MASK_OPMASK);
+	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
+	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 }
 
 /*
@@ -233,8 +267,8 @@ static void __init setup_xstate_comp(void)
 	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 
 	if (!cpu_has_xsaves) {
-		for (i = 2; i < xfeatures_nr; i++) {
-			if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
+		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+			if (xfeature_enabled(i)) {
 				xstate_comp_offsets[i] = xstate_offsets[i];
 				xstate_comp_sizes[i] = xstate_sizes[i];
 			}
@@ -242,15 +276,16 @@ static void __init setup_xstate_comp(void)
 		return;
 	}
 
-	xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
+		FXSAVE_SIZE + XSAVE_HDR_SIZE;
 
-	for (i = 2; i < xfeatures_nr; i++) {
-		if (test_bit(i, (unsigned long *)&xfeatures_mask))
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (xfeature_enabled(i))
 			xstate_comp_sizes[i] = xstate_sizes[i];
 		else
 			xstate_comp_sizes[i] = 0;
 
-		if (i > 2)
+		if (i > FIRST_EXTENDED_XFEATURE)
 			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
 					+ xstate_comp_sizes[i-1];
 
@@ -290,27 +325,280 @@ static void __init setup_init_fpu_buf(void)
 	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
 }
 
+static int xfeature_is_supervisor(int xfeature_nr)
+{
+	/*
+	 * We currently do not support supervisor states, but if
+	 * we did, we could find out like this.
+	 *
+	 * SDM says: If state component i is a user state component,
+	 * ECX[0] return 0; if state component i is a supervisor
+	 * state component, ECX[0] returns 1.
+	u32 eax, ebx, ecx, edx;
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx;
+	return !!(ecx & 1);
+	*/
+	return 0;
+}
+/*
+static int xfeature_is_user(int xfeature_nr)
+{
+	return !xfeature_is_supervisor(xfeature_nr);
+}
+*/
+
+/*
+ * This check is important because it is easy to get XSTATE_*
+ * confused with XSTATE_BIT_*.
+ */
+#define CHECK_XFEATURE(nr) do {		\
+	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
+	WARN_ON(nr >= XFEATURE_MAX);	\
+} while (0)
+
+/*
+ * We could cache this like xstate_size[], but we only use
+ * it here, so it would be a waste of space.
+ */
+static int xfeature_is_aligned(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	/*
+	 * The value returned by ECX[1] indicates the alignment
+	 * of state component i when the compacted format
+	 * of the extended region of an XSAVE area is used
+	 */
+	return !!(ecx & 2);
+}
+
+static int xfeature_uncompacted_offset(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
+
+static int xfeature_size(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return eax;
+}
+
+/*
+ * 'XSAVES' implies two different things:
+ * 1. saving of supervisor/system state
+ * 2. using the compacted format
+ *
+ * Use this function when dealing with the compacted format so
+ * that it is obvious which aspect of 'XSAVES' is being handled
+ * by the calling code.
+ */
+static int using_compacted_format(void)
+{
+	return cpu_has_xsaves;
+}
+
+static void __xstate_dump_leaves(void)
+{
+	int i;
+	u32 eax, ebx, ecx, edx;
+	static int should_dump = 1;
+
+	if (!should_dump)
+		return;
+	should_dump = 0;
+	/*
+	 * Dump out a few leaves past the ones that we support
+	 * just in case there are some goodies up there
+	 */
+	for (i = 0; i < XFEATURE_MAX + 10; i++) {
+		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
+			XSTATE_CPUID, i, eax, ebx, ecx, edx);
+	}
+}
+
+#define XSTATE_WARN_ON(x) do {							\
+	if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) {	\
+		__xstate_dump_leaves();						\
+	}									\
+} while (0)
+
+#define XCHECK_SZ(sz, nr, nr_macro, __struct) do {			\
+	if ((nr == nr_macro) &&						\
+	    WARN_ONCE(sz != sizeof(__struct),				\
+		"%s: struct is %zu bytes, cpu state %d bytes\n",	\
+		__stringify(nr_macro), sizeof(__struct), sz)) {		\
+		__xstate_dump_leaves();					\
+	}								\
+} while (0)
+
+/*
+ * We have a C struct for each 'xstate'.  We need to ensure
+ * that our software representation matches what the CPU
+ * tells us about the state's size.
+ */
+static void check_xstate_against_struct(int nr)
+{
+	/*
+	 * Ask the CPU for the size of the state.
+	 */
+	int sz = xfeature_size(nr);
+	/*
+	 * Match each CPU state with the corresponding software
+	 * structure.
+	 */
+	XCHECK_SZ(sz, nr, XFEATURE_YMM,       struct ymmh_struct);
+	XCHECK_SZ(sz, nr, XFEATURE_BNDREGS,   struct mpx_bndreg_state);
+	XCHECK_SZ(sz, nr, XFEATURE_BNDCSR,    struct mpx_bndcsr_state);
+	XCHECK_SZ(sz, nr, XFEATURE_OPMASK,    struct avx_512_opmask_state);
+	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
+	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
+
+	/*
+	 * Make *SURE* to add any feature numbers in below if
+	 * there are "holes" in the xsave state component
+	 * numbers.
+	 */
+	if ((nr < XFEATURE_YMM) ||
+	    (nr >= XFEATURE_MAX)) {
+		WARN_ONCE(1, "no structure for xstate: %d\n", nr);
+		XSTATE_WARN_ON(1);
+	}
+}
+
+/*
+ * This essentially double-checks what the cpu told us about
+ * how large the XSAVE buffer needs to be.  We are recalculating
+ * it to be safe.
+ */
+static void do_extra_xstate_size_checks(void)
+{
+	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	int i;
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i))
+			continue;
+
+		check_xstate_against_struct(i);
+		/*
+		 * Supervisor state components can be managed only by
+		 * XSAVES, which is compacted-format only.
+		 */
+		if (!using_compacted_format())
+			XSTATE_WARN_ON(xfeature_is_supervisor(i));
+
+		/* Align from the end of the previous feature */
+		if (xfeature_is_aligned(i))
+			paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
+		/*
+		 * The offset of a given state in the non-compacted
+		 * format is given to us in a CPUID leaf.  We check
+		 * them for being ordered (increasing offsets) in
+		 * setup_xstate_features().
+		 */
+		if (!using_compacted_format())
+			paranoid_xstate_size = xfeature_uncompacted_offset(i);
+		/*
+		 * The compacted-format offset always depends on where
+		 * the previous state ended.
+		 */
+		paranoid_xstate_size += xfeature_size(i);
+	}
+	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+}
+
 /*
  * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ *
+ * Note the SDM's wording here.  "sub-function 0" only enumerates
+ * the size of the *user* states.  If we use it to size a buffer
+ * that we use 'XSAVES' on, we could potentially overflow the
+ * buffer because 'XSAVES' saves system states too.
+ *
+ * Note that we do not currently set any bits on IA32_XSS so
+ * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static void __init init_xstate_size(void)
+static unsigned int __init calculate_xstate_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	int i;
+	unsigned int calculated_xstate_size;
 
 	if (!cpu_has_xsaves) {
+		/*
+		 * - CPUID function 0DH, sub-function 0:
+		 *    EBX enumerates the size (in bytes) required by
+		 *    the XSAVE instruction for an XSAVE area
+		 *    containing all the *user* state components
+		 *    corresponding to bits currently set in XCR0.
+		 */
 		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		xstate_size = ebx;
-		return;
+		calculated_xstate_size = ebx;
+	} else {
+		/*
+		 * - CPUID function 0DH, sub-function 1:
+		 *    EBX enumerates the size (in bytes) required by
+		 *    the XSAVES instruction for an XSAVE area
+		 *    containing all the state components
+		 *    corresponding to bits currently set in
+		 *    XCR0 | IA32_XSS.
+		 */
+		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+		calculated_xstate_size = ebx;
 	}
+	return calculated_xstate_size;
+}
 
-	xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
-	for (i = 2; i < 64; i++) {
-		if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
-			cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
-			xstate_size += eax;
-		}
-	}
+/*
+ * Will the runtime-enumerated 'xstate_size' fit in the init
+ * task's statically-allocated buffer?
+ */
+static bool is_supported_xstate_size(unsigned int test_xstate_size)
+{
+	if (test_xstate_size <= sizeof(union fpregs_state))
+		return true;
+
+	pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
+			sizeof(union fpregs_state), test_xstate_size);
+	return false;
+}
+
+static int init_xstate_size(void)
+{
+	/* Recompute the context size for enabled features: */
+	unsigned int possible_xstate_size = calculate_xstate_size();
+
+	/* Ensure we have the space to store all enabled: */
+	if (!is_supported_xstate_size(possible_xstate_size))
+		return -EINVAL;
+
+	/*
+	 * The size is OK, we are definitely going to use xsave,
+	 * make it known to the world that we need more space.
+	 */
+	xstate_size = possible_xstate_size;
+	do_extra_xstate_size_checks();
+	return 0;
+}
+
+/*
+ * We enabled the XSAVE hardware, but something went wrong and
+ * we can not use it.  Disable it.
+ */
+static void fpu__init_disable_system_xstate(void)
+{
+	xfeatures_mask = 0;
+	cr4_clear_bits(X86_CR4_OSXSAVE);
+	fpu__xstate_clear_all_cpu_caps();
 }
 
 /*
@@ -321,6 +609,7 @@ void __init fpu__init_system_xstate(void)
 {
 	unsigned int eax, ebx, ecx, edx;
 	static int on_boot_cpu = 1;
+	int err;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -338,7 +627,7 @@ void __init fpu__init_system_xstate(void)
 	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 	xfeatures_mask = eax + ((u64)edx << 32);
 
-	if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
 		BUG();
 	}
@@ -348,16 +637,19 @@ void __init fpu__init_system_xstate(void)
 
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
-
-	/* Recompute the context size for enabled features: */
-	init_xstate_size();
+	err = init_xstate_size();
+	if (err) {
+		/* something went wrong, boot without any XSAVE support */
+		fpu__init_disable_system_xstate();
+		return;
+	}
 
 	update_regset_xstate_info(xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
 
-	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n",
+	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
 		xstate_size,
 		cpu_has_xsaves ? "compacted" : "standard");
@@ -388,7 +680,7 @@ void fpu__resume_cpu(void)
  * Inputs:
  *	xstate: the thread's storage area for all FPU data
  *	xstate_feature: state which is defined in xsave.h (e.g.
- *	XSTATE_FP, XSTATE_SSE, etc...)
+ *	XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
  * Output:
  *	address of the state in the xsave area, or NULL if the
  *	field is not present in the xsave buffer.
@@ -439,8 +731,8 @@ EXPORT_SYMBOL_GPL(get_xsave_addr);
  * Note that this only works on the current task.
  *
  * Inputs:
- *	@xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP,
- *	XSTATE_SSE, etc...)
+ *	@xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
+ *	XFEATURE_MASK_SSE, etc...)
  * Output:
  *	address of the state in the xsave area or NULL if the state
  *	is not present or is in its 'init state'.
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 0e2d96ffd158..6bc9ae24b6d2 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -152,7 +152,7 @@ ENTRY(startup_32)
 	movl %eax, pa(olpc_ofw_pgd)
 #endif
 
-#ifdef CONFIG_MICROCODE_EARLY
+#ifdef CONFIG_MICROCODE
 	/* Early load ucode on BSP. */
 	call load_ucode_bsp
 #endif
@@ -311,12 +311,11 @@ ENTRY(startup_32_smp)
 	movl %eax,%ss
 	leal -__PAGE_OFFSET(%ecx),%esp
 
-#ifdef CONFIG_MICROCODE_EARLY
+#ifdef CONFIG_MICROCODE
 	/* Early load ucode on AP. */
 	call load_ucode_ap
 #endif
 
-
 default_entry:
 #define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
 			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 88b4da373081..b8e6ff5cd5d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -37,10 +37,10 @@
  */
 unsigned long				hpet_address;
 u8					hpet_blockid; /* OS timer block num */
-u8					hpet_msi_disable;
+bool					hpet_msi_disable;
 
 #ifdef CONFIG_PCI_MSI
-static unsigned long			hpet_num_timers;
+static unsigned int			hpet_num_timers;
 #endif
 static void __iomem			*hpet_virt_address;
 
@@ -86,9 +86,9 @@ static inline void hpet_clear_mapping(void)
 /*
  * HPET command line enable / disable
  */
-int boot_hpet_disable;
-int hpet_force_user;
-static int hpet_verbose;
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
 
 static int __init hpet_setup(char *str)
 {
@@ -98,11 +98,11 @@ static int __init hpet_setup(char *str)
 		if (next)
 			*next++ = 0;
 		if (!strncmp("disable", str, 7))
-			boot_hpet_disable = 1;
+			boot_hpet_disable = true;
 		if (!strncmp("force", str, 5))
-			hpet_force_user = 1;
+			hpet_force_user = true;
 		if (!strncmp("verbose", str, 7))
-			hpet_verbose = 1;
+			hpet_verbose = true;
 		str = next;
 	}
 	return 1;
@@ -111,7 +111,7 @@ __setup("hpet=", hpet_setup);
 
 static int __init disable_hpet(char *str)
 {
-	boot_hpet_disable = 1;
+	boot_hpet_disable = true;
 	return 1;
 }
 __setup("nohpet", disable_hpet);
@@ -124,7 +124,7 @@ static inline int is_hpet_capable(void)
 /*
  * HPET timer interrupt enable / disable
  */
-static int hpet_legacy_int_enabled;
+static bool hpet_legacy_int_enabled;
 
 /**
  * is_hpet_enabled - check whether the hpet timer interrupt is enabled
@@ -230,7 +230,7 @@ static struct clock_event_device hpet_clockevent;
 
 static void hpet_stop_counter(void)
 {
-	unsigned long cfg = hpet_readl(HPET_CFG);
+	u32 cfg = hpet_readl(HPET_CFG);
 	cfg &= ~HPET_CFG_ENABLE;
 	hpet_writel(cfg, HPET_CFG);
 }
@@ -272,7 +272,7 @@ static void hpet_enable_legacy_int(void)
 
 	cfg |= HPET_CFG_LEGACY;
 	hpet_writel(cfg, HPET_CFG);
-	hpet_legacy_int_enabled = 1;
+	hpet_legacy_int_enabled = true;
 }
 
 static void hpet_legacy_clockevent_register(void)
@@ -983,7 +983,7 @@ void hpet_disable(void)
 			cfg = *hpet_boot_cfg;
 		else if (hpet_legacy_int_enabled) {
 			cfg &= ~HPET_CFG_LEGACY;
-			hpet_legacy_int_enabled = 0;
+			hpet_legacy_int_enabled = false;
 		}
 		cfg &= ~HPET_CFG_ENABLE;
 		hpet_writel(cfg, HPET_CFG);
@@ -1121,8 +1121,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
 
 static void hpet_disable_rtc_channel(void)
 {
-	unsigned long cfg;
-	cfg = hpet_readl(HPET_T1_CFG);
+	u32 cfg = hpet_readl(HPET_T1_CFG);
 	cfg &= ~HPET_TN_ENABLE;
 	hpet_writel(cfg, HPET_T1_CFG);
 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index c767cf2bc80a..206d0b90a3ab 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -72,7 +72,7 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
 {
 	stack_overflow_check(regs);
 
-	if (unlikely(IS_ERR_OR_NULL(desc)))
+	if (IS_ERR_OR_NULL(desc))
 		return false;
 
 	generic_handle_irq_desc(desc);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index d6178d9791db..44256a62702b 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -511,26 +511,31 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
 	return NOTIFY_STOP;
 }
 
-static int was_in_debug_nmi[NR_CPUS];
+static DECLARE_BITMAP(was_in_debug_nmi, NR_CPUS);
 
 static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
+	int cpu;
+
 	switch (cmd) {
 	case NMI_LOCAL:
 		if (atomic_read(&kgdb_active) != -1) {
 			/* KGDB CPU roundup */
-			kgdb_nmicallback(raw_smp_processor_id(), regs);
-			was_in_debug_nmi[raw_smp_processor_id()] = 1;
+			cpu = raw_smp_processor_id();
+			kgdb_nmicallback(cpu, regs);
+			set_bit(cpu, was_in_debug_nmi);
 			touch_nmi_watchdog();
+
 			return NMI_HANDLED;
 		}
 		break;
 
 	case NMI_UNKNOWN:
-		if (was_in_debug_nmi[raw_smp_processor_id()]) {
-			was_in_debug_nmi[raw_smp_processor_id()] = 0;
+		cpu = raw_smp_processor_id();
+
+		if (__test_and_clear_bit(cpu, was_in_debug_nmi))
 			return NMI_HANDLED;
-		}
+
 		break;
 	default:
 		/* do nothing */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2c7aafa70702..2bd81e302427 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -32,6 +32,7 @@
 static int kvmclock = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
+static cycle_t kvm_sched_clock_offset;
 
 static int parse_no_kvmclock(char *arg)
 {
@@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
 	return kvm_clock_read();
 }
 
+static cycle_t kvm_sched_clock_read(void)
+{
+	return kvm_clock_read() - kvm_sched_clock_offset;
+}
+
+static inline void kvm_sched_clock_init(bool stable)
+{
+	if (!stable) {
+		pv_time_ops.sched_clock = kvm_clock_read;
+		return;
+	}
+
+	kvm_sched_clock_offset = kvm_clock_read();
+	pv_time_ops.sched_clock = kvm_sched_clock_read;
+	set_sched_clock_stable();
+
+	printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
+			kvm_sched_clock_offset);
+
+	BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
+	         sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
+}
+
 /*
  * If we don't do that, there is the possibility that the guest
  * will calibrate under heavy load - thus, getting a lower lpj -
@@ -248,7 +272,17 @@ void __init kvmclock_init(void)
 		memblock_free(mem, size);
 		return;
 	}
-	pv_time_ops.sched_clock = kvm_clock_read;
+
+	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
+		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+
+	cpu = get_cpu();
+	vcpu_time = &hv_clock[cpu].pvti;
+	flags = pvclock_read_flags(vcpu_time);
+
+	kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
+	put_cpu();
+
 	x86_platform.calibrate_tsc = kvm_get_tsc_khz;
 	x86_platform.get_wallclock = kvm_get_wallclock;
 	x86_platform.set_wallclock = kvm_set_wallclock;
@@ -265,16 +299,6 @@ void __init kvmclock_init(void)
 	kvm_get_preset_lpj();
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
 	pv_info.name = "KVM";
-
-	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
-		pvclock_set_flags(~0);
-
-	cpu = get_cpu();
-	vcpu_time = &hv_clock[cpu].pvti;
-	flags = pvclock_read_flags(vcpu_time);
-	if (flags & PVCLOCK_COUNTS_FROM_ZERO)
-		set_sched_clock_stable();
-	put_cpu();
 }
 
 int __init kvm_setup_vsyscall_timeinfo(void)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1b55de1267cf..cd99433b8ba1 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
 
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
 {
+	if (!*dev)
+		*dev = &x86_dma_fallback_dev;
+
 	*gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
 	*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
 
-	if (!*dev)
-		*dev = &x86_dma_fallback_dev;
 	if (!is_device_dma_capable(*dev))
 		return false;
 	return true;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d0e62ae8516..9f7c21c22477 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	memcpy(dst, src, arch_task_struct_size);
+#ifdef CONFIG_VM86
+	dst->thread.vm86 = NULL;
+#endif
 
 	return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
 }
@@ -506,3 +509,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }
 
+/*
+ * Called from fs/proc with a reference on @p to find the function
+ * which called into schedule(). This needs to be done carefully
+ * because the task might wake up and we might look at a stack
+ * changing under us.
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long start, bottom, top, sp, fp, ip;
+	int count = 0;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	start = (unsigned long)task_stack_page(p);
+	if (!start)
+		return 0;
+
+	/*
+	 * Layout of the stack page:
+	 *
+	 * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
+	 * PADDING
+	 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+	 * stack
+	 * ----------- bottom = start + sizeof(thread_info)
+	 * thread_info
+	 * ----------- start
+	 *
+	 * The tasks stack pointer points at the location where the
+	 * framepointer is stored. The data on the stack is:
+	 * ... IP FP ... IP FP
+	 *
+	 * We need to read FP and IP, so we need to adjust the upper
+	 * bound by another unsigned long.
+	 */
+	top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+	top -= 2 * sizeof(unsigned long);
+	bottom = start + sizeof(struct thread_info);
+
+	sp = READ_ONCE(p->thread.sp);
+	if (sp < bottom || sp > top)
+		return 0;
+
+	fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+	do {
+		if (fp < bottom || fp > top)
+			return 0;
+		ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
+		if (!in_sched_functions(ip))
+			return ip;
+		fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
+	} while (count++ < 16 && p->state != TASK_RUNNING);
+	return 0;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c13df2c735f8..9f950917528b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -280,14 +280,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		set_iopl_mask(next->iopl);
 
 	/*
-	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
-	 * preempt_count of all tasks was equal here and this would not be
-	 * needed.
-	 */
-	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
-	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
-
-	/*
 	 * Now maybe handle debug registers and/or IO bitmaps
 	 */
 	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
@@ -324,31 +316,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	return prev_p;
 }
-
-#define top_esp                (THREAD_SIZE - sizeof(unsigned long))
-#define top_ebp                (THREAD_SIZE - 2*sizeof(unsigned long))
-
-unsigned long get_wchan(struct task_struct *p)
-{
-	unsigned long bp, sp, ip;
-	unsigned long stack_page;
-	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-	stack_page = (unsigned long)task_stack_page(p);
-	sp = p->thread.sp;
-	if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
-		return 0;
-	/* include/asm-i386/system.h:switch_to() pushes bp last. */
-	bp = *(unsigned long *) sp;
-	do {
-		if (bp < stack_page || bp > top_ebp+stack_page)
-			return 0;
-		ip = *(unsigned long *) (bp+4);
-		if (!in_sched_functions(ip))
-			return ip;
-		bp = *(unsigned long *) bp;
-	} while (count++ < 16);
-	return 0;
-}
-
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c1bbcf12924..e835d263a33b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -332,7 +332,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Switch FS and GS.
 	 *
-	 * These are even more complicated than FS and GS: they have
+	 * These are even more complicated than DS and ES: they have
 	 * 64-bit bases are that controlled by arch_prctl.  Those bases
 	 * only differ from the values in the GDT or LDT if the selector
 	 * is 0.
@@ -401,14 +401,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	this_cpu_write(current_task, next_p);
 
-	/*
-	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
-	 * preempt_count of all tasks was equal here and this would not be
-	 * needed.
-	 */
-	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
-	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
-
 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
 	load_sp0(tss, next);
 
@@ -499,30 +491,6 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
-unsigned long get_wchan(struct task_struct *p)
-{
-	unsigned long stack;
-	u64 fp, ip;
-	int count = 0;
-
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-	stack = (unsigned long)task_stack_page(p);
-	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
-		return 0;
-	fp = *(u64 *)(p->thread.sp);
-	do {
-		if (fp < (unsigned long)stack ||
-		    fp >= (unsigned long)stack+THREAD_SIZE)
-			return 0;
-		ip = *(u64 *)(fp+8);
-		if (!in_sched_functions(ip))
-			return ip;
-		fp = *(u64 *)fp;
-	} while (count++ < 16);
-	return 0;
-}
-
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 {
 	int ret = 0;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 176a0f99d4da..cc457ff818ad 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -524,7 +524,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
  */
 static void force_disable_hpet_msi(struct pci_dev *unused)
 {
-	hpet_msi_disable = 1;
+	hpet_msi_disable = true;
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fdb7f2a2d328..a1e4da98c8f0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -111,6 +111,7 @@
 #include <asm/mce.h>
 #include <asm/alternative.h>
 #include <asm/prom.h>
+#include <asm/microcode.h>
 
 /*
  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -480,34 +481,34 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 
 #ifdef CONFIG_KEXEC_CORE
 
+/* 16M alignment for crash kernel regions */
+#define CRASH_ALIGN		(16 << 20)
+
 /*
  * Keep the crash kernel below this limit.  On 32 bits earlier kernels
  * would limit the kernel to the low 512 MiB due to mapping restrictions.
  * On 64bit, old kexec-tools need to under 896MiB.
  */
 #ifdef CONFIG_X86_32
-# define CRASH_KERNEL_ADDR_LOW_MAX	(512 << 20)
-# define CRASH_KERNEL_ADDR_HIGH_MAX	(512 << 20)
+# define CRASH_ADDR_LOW_MAX	(512 << 20)
+# define CRASH_ADDR_HIGH_MAX	(512 << 20)
 #else
-# define CRASH_KERNEL_ADDR_LOW_MAX	(896UL<<20)
-# define CRASH_KERNEL_ADDR_HIGH_MAX	MAXMEM
+# define CRASH_ADDR_LOW_MAX	(896UL << 20)
+# define CRASH_ADDR_HIGH_MAX	MAXMEM
 #endif
 
-static void __init reserve_crashkernel_low(void)
+static int __init reserve_crashkernel_low(void)
 {
 #ifdef CONFIG_X86_64
-	const unsigned long long alignment = 16<<20;	/* 16M */
-	unsigned long long low_base = 0, low_size = 0;
+	unsigned long long base, low_base = 0, low_size = 0;
 	unsigned long total_low_mem;
-	unsigned long long base;
-	bool auto_set = false;
 	int ret;
 
-	total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+	total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
+
 	/* crashkernel=Y,low */
-	ret = parse_crashkernel_low(boot_command_line, total_low_mem,
-						&low_size, &base);
-	if (ret != 0) {
+	ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base);
+	if (ret) {
 		/*
 		 * two parts from lib/swiotlb.c:
 		 * -swiotlb size: user-specified with swiotlb= or default.
@@ -517,52 +518,52 @@ static void __init reserve_crashkernel_low(void)
 		 * make sure we allocate enough extra low memory so that we
 		 * don't run out of DMA buffers for 32-bit devices.
 		 */
-		low_size = max(swiotlb_size_or_default() + (8UL<<20), 256UL<<20);
-		auto_set = true;
+		low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
 	} else {
 		/* passed with crashkernel=0,low ? */
 		if (!low_size)
-			return;
+			return 0;
 	}
 
-	low_base = memblock_find_in_range(low_size, (1ULL<<32),
-					low_size, alignment);
-
+	low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN);
 	if (!low_base) {
-		if (!auto_set)
-			pr_info("crashkernel low reservation failed - No suitable area found.\n");
+		pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
+		       (unsigned long)(low_size >> 20));
+		return -ENOMEM;
+	}
 
-		return;
+	ret = memblock_reserve(low_base, low_size);
+	if (ret) {
+		pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
+		return ret;
 	}
 
-	memblock_reserve(low_base, low_size);
 	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
-			(unsigned long)(low_size >> 20),
-			(unsigned long)(low_base >> 20),
-			(unsigned long)(total_low_mem >> 20));
+		(unsigned long)(low_size >> 20),
+		(unsigned long)(low_base >> 20),
+		(unsigned long)(total_low_mem >> 20));
+
 	crashk_low_res.start = low_base;
 	crashk_low_res.end   = low_base + low_size - 1;
 	insert_resource(&iomem_resource, &crashk_low_res);
 #endif
+	return 0;
 }
 
 static void __init reserve_crashkernel(void)
 {
-	const unsigned long long alignment = 16<<20;	/* 16M */
-	unsigned long long total_mem;
-	unsigned long long crash_size, crash_base;
+	unsigned long long crash_size, crash_base, total_mem;
 	bool high = false;
 	int ret;
 
 	total_mem = memblock_phys_mem_size();
 
 	/* crashkernel=XM */
-	ret = parse_crashkernel(boot_command_line, total_mem,
-			&crash_size, &crash_base);
+	ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
 	if (ret != 0 || crash_size <= 0) {
 		/* crashkernel=X,high */
 		ret = parse_crashkernel_high(boot_command_line, total_mem,
-				&crash_size, &crash_base);
+					     &crash_size, &crash_base);
 		if (ret != 0 || crash_size <= 0)
 			return;
 		high = true;
@@ -573,11 +574,10 @@ static void __init reserve_crashkernel(void)
 		/*
 		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
 		 */
-		crash_base = memblock_find_in_range(alignment,
-					high ? CRASH_KERNEL_ADDR_HIGH_MAX :
-					       CRASH_KERNEL_ADDR_LOW_MAX,
-					crash_size, alignment);
-
+		crash_base = memblock_find_in_range(CRASH_ALIGN,
+						    high ? CRASH_ADDR_HIGH_MAX
+							 : CRASH_ADDR_LOW_MAX,
+						    crash_size, CRASH_ALIGN);
 		if (!crash_base) {
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
@@ -587,26 +587,32 @@ static void __init reserve_crashkernel(void)
 		unsigned long long start;
 
 		start = memblock_find_in_range(crash_base,
-				 crash_base + crash_size, crash_size, 1<<20);
+					       crash_base + crash_size,
+					       crash_size, 1 << 20);
 		if (start != crash_base) {
 			pr_info("crashkernel reservation failed - memory is in use.\n");
 			return;
 		}
 	}
-	memblock_reserve(crash_base, crash_size);
+	ret = memblock_reserve(crash_base, crash_size);
+	if (ret) {
+		pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
+		return;
+	}
 
-	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-			"for crashkernel (System RAM: %ldMB)\n",
-			(unsigned long)(crash_size >> 20),
-			(unsigned long)(crash_base >> 20),
-			(unsigned long)(total_mem >> 20));
+	if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
+		memblock_free(crash_base, crash_size);
+		return;
+	}
+
+	pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
+		(unsigned long)(crash_size >> 20),
+		(unsigned long)(crash_base >> 20),
+		(unsigned long)(total_mem >> 20));
 
 	crashk_res.start = crash_base;
 	crashk_res.end   = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);
-
-	if (crash_base >= (1ULL<<32))
-		reserve_crashkernel_low();
 }
 #else
 static void __init reserve_crashkernel(void)
@@ -1079,8 +1085,10 @@ void __init setup_arch(char **cmdline_p)
 	memblock_set_current_limit(ISA_END_ADDRESS);
 	memblock_x86_fill();
 
-	if (efi_enabled(EFI_BOOT))
+	if (efi_enabled(EFI_BOOT)) {
+		efi_fake_memmap();
 		efi_find_mirror();
+	}
 
 	/*
 	 * The EFI specification says that boot service code won't be called
@@ -1173,6 +1181,14 @@ void __init setup_arch(char **cmdline_p)
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
 			KERNEL_PGD_PTRS);
+
+	/*
+	 * sync back low identity map too.  It is used for example
+	 * in the 32-bit EFI stub.
+	 */
+	clone_pgd_range(initial_page_table,
+			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+			KERNEL_PGD_PTRS);
 #endif
 
 	tboot_probe();
@@ -1234,6 +1250,8 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_apply_memmap_quirks();
 #endif
+
+	microcode_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index da52e6bb5c7f..b7ffb7c00075 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -63,6 +63,7 @@
 
 int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
+	unsigned long buf_val;
 	void __user *buf;
 	unsigned int tmpflags;
 	unsigned int err = 0;
@@ -107,7 +108,8 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 
-		get_user_ex(buf, &sc->fpstate);
+		get_user_ex(buf_val, &sc->fpstate);
+		buf = (void __user *)buf_val;
 	} get_user_catch(err);
 
 	err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
@@ -196,7 +198,7 @@ static unsigned long align_sigframe(unsigned long sp)
 	return sp;
 }
 
-static inline void __user *
+static void __user *
 get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 	     void __user **fpstate)
 {
@@ -299,7 +301,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set,
 
 	if (current->mm->context.vdso)
 		restorer = current->mm->context.vdso +
-			selected_vdso32->sym___kernel_sigreturn;
+			vdso_image_32.sym___kernel_sigreturn;
 	else
 		restorer = &frame->retcode;
 	if (ksig->ka.sa.sa_flags & SA_RESTORER)
@@ -363,7 +365,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
 		/* Set up to return from userspace.  */
 		restorer = current->mm->context.vdso +
-			selected_vdso32->sym___kernel_rt_sigreturn;
+			vdso_image_32.sym___kernel_rt_sigreturn;
 		if (ksig->ka.sa.sa_flags & SA_RESTORER)
 			restorer = ksig->ka.sa.sa_restorer;
 		put_user_ex(restorer, &frame->pretcode);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e5f920..892ee2e5ecbc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
  */
 #define UDELAY_10MS_DEFAULT 10000
 
-static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+static unsigned int init_udelay = INT_MAX;
 
 static int __init cpu_init_udelay(char *str)
 {
@@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay);
 static void __init smp_quirk_init_udelay(void)
 {
 	/* if cmdline changed it from default, leave it alone */
-	if (init_udelay != UDELAY_10MS_DEFAULT)
+	if (init_udelay != INT_MAX)
 		return;
 
 	/* if modern processor, use no delay */
 	if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
 	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
 		init_udelay = 0;
+
+	/* else, use legacy delay */
+	init_udelay = UDELAY_10MS_DEFAULT;
 }
 
 /*
@@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		/*
 		 * Give the other CPU some time to accept the IPI.
 		 */
-		if (init_udelay)
+		if (init_udelay == 0)
+			udelay(10);
+		else
 			udelay(300);
 
 		pr_debug("Startup point 1\n");
@@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		/*
 		 * Give the other CPU some time to accept the IPI.
 		 */
-		if (init_udelay)
+		if (init_udelay == 0)
+			udelay(10);
+		else
 			udelay(200);
 
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 346eec73f7db..ade185a46b1d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
 dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 {
-	const struct bndcsr *bndcsr;
+	const struct mpx_bndcsr *bndcsr;
 	siginfo_t *info;
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
@@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	 * which is all zeros which indicates MPX was not
 	 * responsible for the exception.
 	 */
-	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
 	if (!bndcsr)
 		goto exit_trap;
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c3f7602cd038..c7c4d9c51e99 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -168,21 +168,20 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
  *              ns = cycles * cyc2ns_scale / SC
  *
  *      And since SC is a constant power of two, we can convert the div
- *  into a shift.
+ *  into a shift. The larger SC is, the more accurate the conversion, but
+ *  cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication
+ *  (64-bit result) can be used.
  *
- *  We can use khz divisor instead of mhz to keep a better precision, since
- *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ *  We can use khz divisor instead of mhz to keep a better precision.
  *  (mathieu.desnoyers@polymtl.ca)
  *
  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
 static void cyc2ns_data_init(struct cyc2ns_data *data)
 {
 	data->cyc2ns_mul = 0;
-	data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+	data->cyc2ns_shift = 0;
 	data->cyc2ns_offset = 0;
 	data->__count = 0;
 }
@@ -216,14 +215,14 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 
 	if (likely(data == tail)) {
 		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
 	} else {
 		data->__count++;
 
 		barrier();
 
 		ns = data->cyc2ns_offset;
-		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+		ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
 
 		barrier();
 
@@ -257,12 +256,22 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	 * time function is continuous; see the comment near struct
 	 * cyc2ns_data.
 	 */
-	data->cyc2ns_mul =
-		DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
-				  cpu_khz);
-	data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+			       NSEC_PER_MSEC, 0);
+
+	/*
+	 * cyc2ns_shift is exported via arch_perf_update_userpage() where it is
+	 * not expected to be greater than 31 due to the original published
+	 * conversion algorithm shifting a 32-bit value (now specifies a 64-bit
+	 * value) - refer perf_event_mmap_page documentation in perf_event.h.
+	 */
+	if (data->cyc2ns_shift == 32) {
+		data->cyc2ns_shift = 31;
+		data->cyc2ns_mul >>= 1;
+	}
+
 	data->cyc2ns_offset = ns_now -
-		mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+		mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
 
 	cyc2ns_write_end(cpu, data);
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d8a1d56276e1..639a6e34500c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,8 @@ config KVM
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
+	select IRQ_BYPASS_MANAGER
+	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index d090ecf08809..9dc091acd5fb 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include "irq.h"
 #include "assigned-dev.h"
+#include "trace/events/kvm.h"
 
 struct kvm_assigned_dev_kernel {
 	struct kvm_irq_ack_notifier ack_notifier;
@@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-#ifdef __KVM_HAVE_MSI
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
+				int level)
+{
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	struct kvm_kernel_irq_routing_entry *e;
+	int ret = -EINVAL;
+	int idx;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/*
+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
+	 * which would need to be retried from thread context;  when same GSI
+	 * is connected to both PIC and IOAPIC, we'd have to report a
+	 * partial failure here.
+	 * Since there's no easy way to do this, we only support injecting MSI
+	 * which is limited to 1:1 GSI mapping.
+	 */
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+		e = &entries[0];
+		ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
+						irq, level);
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
+
 static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 
 	return IRQ_HANDLED;
 }
-#endif
 
-#ifdef __KVM_HAVE_MSIX
 static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
 {
 	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
@@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 
 	return IRQ_HANDLED;
 }
-#endif
 
 /* Ack the irq line for an assigned device */
 static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
@@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
 	return 0;
 }
 
-#ifdef __KVM_HAVE_MSI
 static int assigned_device_enable_host_msi(struct kvm *kvm,
 					   struct kvm_assigned_dev_kernel *dev)
 {
@@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
 
 	return 0;
 }
-#endif
 
-#ifdef __KVM_HAVE_MSIX
 static int assigned_device_enable_host_msix(struct kvm *kvm,
 					    struct kvm_assigned_dev_kernel *dev)
 {
@@ -443,8 +473,6 @@ err:
 	return r;
 }
 
-#endif
-
 static int assigned_device_enable_guest_intx(struct kvm *kvm,
 				struct kvm_assigned_dev_kernel *dev,
 				struct kvm_assigned_irq *irq)
@@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm,
 	return 0;
 }
 
-#ifdef __KVM_HAVE_MSI
 static int assigned_device_enable_guest_msi(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *dev,
 			struct kvm_assigned_irq *irq)
@@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,
 	dev->ack_notifier.gsi = -1;
 	return 0;
 }
-#endif
 
-#ifdef __KVM_HAVE_MSIX
 static int assigned_device_enable_guest_msix(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *dev,
 			struct kvm_assigned_irq *irq)
@@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,
 	dev->ack_notifier.gsi = -1;
 	return 0;
 }
-#endif
 
 static int assign_host_irq(struct kvm *kvm,
 			   struct kvm_assigned_dev_kernel *dev,
@@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm,
 	case KVM_DEV_IRQ_HOST_INTX:
 		r = assigned_device_enable_host_intx(kvm, dev);
 		break;
-#ifdef __KVM_HAVE_MSI
 	case KVM_DEV_IRQ_HOST_MSI:
 		r = assigned_device_enable_host_msi(kvm, dev);
 		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
 	case KVM_DEV_IRQ_HOST_MSIX:
 		r = assigned_device_enable_host_msix(kvm, dev);
 		break;
-#endif
 	default:
 		r = -EINVAL;
 	}
@@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm,
 	case KVM_DEV_IRQ_GUEST_INTX:
 		r = assigned_device_enable_guest_intx(kvm, dev, irq);
 		break;
-#ifdef __KVM_HAVE_MSI
 	case KVM_DEV_IRQ_GUEST_MSI:
 		r = assigned_device_enable_guest_msi(kvm, dev, irq);
 		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
 	case KVM_DEV_IRQ_GUEST_MSIX:
 		r = assigned_device_enable_guest_msix(kvm, dev, irq);
 		break;
-#endif
 	default:
 		r = -EINVAL;
 	}
@@ -826,7 +842,6 @@ out:
 }
 
 
-#ifdef __KVM_HAVE_MSIX
 static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
 				    struct kvm_assigned_msix_nr *entry_nr)
 {
@@ -906,7 +921,6 @@ msix_entry_out:
 
 	return r;
 }
-#endif
 
 static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
 		struct kvm_assigned_pci_dev *assigned_dev)
@@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 			goto out;
 		break;
 	}
-#ifdef __KVM_HAVE_MSIX
 	case KVM_ASSIGN_SET_MSIX_NR: {
 		struct kvm_assigned_msix_nr entry_nr;
 		r = -EFAULT;
@@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 			goto out;
 		break;
 	}
-#endif
 	case KVM_ASSIGN_SET_INTX_MASK: {
 		struct kvm_assigned_pci_dev assigned_dev;
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2fbea2544f24..6525e926f566 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -30,7 +30,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 	int feature_bit = 0;
 	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
-	xstate_bv &= XSTATE_EXTEND_MASK;
+	xstate_bv &= XFEATURE_MASK_EXTEND;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
 		        u32 eax, ebx, ecx, edx, offset;
@@ -51,7 +51,7 @@ u64 kvm_supported_xcr0(void)
 	u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
 
 	if (!kvm_x86_ops->mpx_supported())
-		xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+		xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
 
 	return xcr0;
 }
@@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
 		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-		F(AVX512CD);
+		F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
 
 	/* cpuid 0xD.1.eax */
 	const u32 kvm_supported_word10_x86_features =
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index dd05b9cef6ae..06332cb7e7d1 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
 	return best && (best->ebx & bit(X86_FEATURE_MPX));
 }
+
+static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
+}
+
+static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+	return best && (best->edx & bit(X86_FEATURE_RDTSCP));
+}
+
+/*
+ * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
+ */
+#define BIT_NRIPS	3
+
+static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0);
+
+	/*
+	 * NRIPS is a scattered cpuid feature, so we can't use
+	 * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit
+	 * position 8, not 3).
+	 */
+	return best && (best->edx & bit(BIT_NRIPS));
+}
+#undef BIT_NRIPS
+
 #endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b372a7557c16..1505587d06e9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 #define GET_SMSTATE(type, smbase, offset)				  \
 	({								  \
 	 type __val;							  \
-	 int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val,       \
-				     sizeof(__val), NULL);		  \
+	 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,      \
+				      sizeof(__val));			  \
 	 if (r != X86EMUL_CONTINUE)					  \
 		 return X86EMUL_UNHANDLEABLE;				  \
 	 __val;								  \
@@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	u64 val, cr0, cr4;
 	u32 base3;
 	u16 selector;
-	int i;
+	int i, r;
 
 	for (i = 0; i < 16; i++)
 		*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
@@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 	dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
 	ctxt->ops->set_gdt(ctxt, &dt);
 
+	r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+	if (r != X86EMUL_CONTINUE)
+		return r;
+
 	for (i = 0; i < 6; i++) {
-		int r = rsm_load_seg_64(ctxt, smbase, i);
+		r = rsm_load_seg_64(ctxt, smbase, i);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
 
-	return rsm_enter_protected_mode(ctxt, cr0, cr4);
+	return X86EMUL_CONTINUE;
 }
 
 static int em_rsm(struct x86_emulate_ctxt *ctxt)
@@ -2480,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
-	 * CR0/CR3/CR4/EFER.  Also this will ensure that addresses passed
-	 * to read_std/write_std are not virtual.
-	 *
-	 * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
+	 * supports long mode.
 	 */
+	cr4 = ctxt->ops->get_cr(ctxt, 4);
+	if (emulator_has_longmode(ctxt)) {
+		struct desc_struct cs_desc;
+
+		/* Zero CR4.PCIDE before CR0.PG.  */
+		if (cr4 & X86_CR4_PCIDE) {
+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+			cr4 &= ~X86_CR4_PCIDE;
+		}
+
+		/* A 32-bit code segment is required to clear EFER.LMA.  */
+		memset(&cs_desc, 0, sizeof(cs_desc));
+		cs_desc.type = 0xb;
+		cs_desc.s = cs_desc.g = cs_desc.p = 1;
+		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+	}
+
+	/* For the 64-bit case, this will clear EFER.LMA.  */
 	cr0 = ctxt->ops->get_cr(ctxt, 0);
 	if (cr0 & X86_CR0_PE)
 		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-	cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+	/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
 	if (cr4 & X86_CR4_PAE)
 		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+	/* And finally go back to 32-bit mode.  */
 	efer = 0;
 	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
 
@@ -4451,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
 	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
 	/* 0xA8 - 0xAF */
 	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
-	II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
+	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
 	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
 	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
 	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a8160d2ae362..62cf8c915e95 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	case HV_X64_MSR_TIME_REF_COUNT:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_RESET:
 		r = true;
 		break;
 	}
@@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 						 data);
 	case HV_X64_MSR_CRASH_CTL:
 		return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
+	case HV_X64_MSR_RESET:
+		if (data == 1) {
+			vcpu_debug(vcpu, "hyper-v reset requested\n");
+			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
+		}
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 	return 0;
 }
 
-static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+/* Calculate cpu time spent by current task in 100ns units */
+static u64 current_task_runtime_100ns(void)
+{
+	cputime_t utime, stime;
+
+	task_cputime_adjusted(current, &utime, &stime);
+	return div_u64(cputime_to_nsecs(utime + stime), 100);
+}
+
+static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 {
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
 
@@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
 	case HV_X64_MSR_TPR:
 		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	case HV_X64_MSR_VP_RUNTIME:
+		if (!host)
+			return 1;
+		hv->runtime_offset = data - current_task_runtime_100ns();
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 						 pdata);
 	case HV_X64_MSR_CRASH_CTL:
 		return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
+	case HV_X64_MSR_RESET:
+		data = 0;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_APIC_ASSIST_PAGE:
 		data = hv->hv_vapic;
 		break;
+	case HV_X64_MSR_VP_RUNTIME:
+		data = current_task_runtime_100ns() + hv->runtime_offset;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 		mutex_unlock(&vcpu->kvm->lock);
 		return r;
 	} else
-		return kvm_hv_set_msr(vcpu, msr, data);
+		return kvm_hv_set_msr(vcpu, msr, data, host);
 }
 
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f90952f64e79..08116ff227cc 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -35,6 +35,7 @@
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
 
+#include "ioapic.h"
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
@@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 	s64 interval;
 
-	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
+	if (!ioapic_in_kernel(kvm) ||
+	    ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
 		return;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 856f79105bb5..88d0a92d3f94 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
 }
 
 
-static void update_handled_vectors(struct kvm_ioapic *ioapic)
-{
-	DECLARE_BITMAP(handled_vectors, 256);
-	int i;
-
-	memset(handled_vectors, 0, sizeof(handled_vectors));
-	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
-		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
-	memcpy(ioapic->handled_vectors, handled_vectors,
-	       sizeof(handled_vectors));
-	smp_wmb();
-}
-
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-			u32 *tmr)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 	union kvm_ioapic_redirect_entry *e;
@@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
 		    index == RTC_GSI) {
 			if (kvm_apic_match_dest(vcpu, NULL, 0,
-				e->fields.dest_id, e->fields.dest_mode)) {
+			             e->fields.dest_id, e->fields.dest_mode) ||
+			    (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
+			     kvm_apic_pending_eoi(vcpu, e->fields.vector)))
 				__set_bit(e->fields.vector,
 					(unsigned long *)eoi_exit_bitmap);
-				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-					__set_bit(e->fields.vector,
-						(unsigned long *)tmr);
-			}
 		}
 	}
 	spin_unlock(&ioapic->lock);
@@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 			e->bits |= (u32) val;
 			e->fields.remote_irr = 0;
 		}
-		update_handled_vectors(ioapic);
 		mask_after = e->fields.mask;
 		if (mask_before != mask_after)
 			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 	ioapic->id = 0;
 	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
 	rtc_irq_eoi_tracking_reset(ioapic);
-	update_handled_vectors(ioapic);
 }
 
 static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm)
 	if (ret < 0) {
 		kvm->arch.vioapic = NULL;
 		kfree(ioapic);
+		return ret;
 	}
 
+	kvm_vcpu_request_scan_ioapic(kvm);
 	return ret;
 }
 
@@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
 	ioapic->irr = 0;
 	ioapic->irr_delivered = 0;
-	update_handled_vectors(ioapic);
 	kvm_vcpu_request_scan_ioapic(kvm);
 	kvm_ioapic_inject_all(ioapic, state->irr);
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4e6256..084617d37c74 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@ struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -73,7 +74,6 @@ struct kvm_ioapic {
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
 	spinlock_t lock;
-	DECLARE_BITMAP(handled_vectors, 256);
 	struct rtc_status rtc_status;
 	struct delayed_work eoi_inject;
 	u32 irq_eoi[IOAPIC_NUM_PINS];
@@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
 	return kvm->arch.vioapic;
 }
 
-static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+static inline int ioapic_in_kernel(struct kvm *kvm)
 {
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	smp_rmb();
-	return test_bit(vector, ioapic->handled_vectors);
+	int ret;
+
+	ret = (ioapic_irqchip(kvm) != NULL);
+	return ret;
 }
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-			u32 *tmr);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
 #endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index a1ec6a50a05a..097060e33bd6 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+	return v->arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-	if (kvm_apic_accept_pic_intr(v))
-		return pic_irqchip(v->kvm)->output;	/* PIC */
-	else
+	u8 accept = kvm_apic_accept_pic_intr(v);
+
+	if (accept) {
+		if (irqchip_split(v->kvm))
+			return pending_userspace_extint(v);
+		else
+			return pic_irqchip(v->kvm)->output;
+	} else
 		return 0;
 }
 
@@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
 		return 1;
 
-	if (kvm_apic_vid_enabled(v->kvm))
+	if (kvm_vcpu_apic_vid_enabled(v))
 		return 0;
 
 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
@@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.pending;
 
 	if (kvm_cpu_has_extint(v))
@@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-	if (kvm_cpu_has_extint(v))
-		return kvm_pic_read_irq(v->kvm); /* PIC */
-	return -1;
+	if (kvm_cpu_has_extint(v)) {
+		if (irqchip_split(v->kvm)) {
+			int vector = v->arch.pending_external_vector;
+
+			v->arch.pending_external_vector = -1;
+			return vector;
+		} else
+			return kvm_pic_read_irq(v->kvm); /* PIC */
+	} else
+		return -1;
 }
 
 /*
@@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
 	int vector;
 
-	if (!irqchip_in_kernel(v->kvm))
+	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.nr;
 
 	vector = kvm_cpu_get_extint(v);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 3d782a2c336a..ae5c78f2337d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 	return kvm->arch.vpic;
 }
 
+static inline int pic_in_kernel(struct kvm *kvm)
+{
+	int ret;
+
+	ret = (pic_irqchip(kvm) != NULL);
+	return ret;
+}
+
+static inline int irqchip_split(struct kvm *kvm)
+{
+	return kvm->arch.irqchip_split;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	struct kvm_pic *vpic = pic_irqchip(kvm);
+	bool ret;
+
+	ret = (vpic != NULL);
+	ret |= irqchip_split(kvm);
 
 	/* Read vpic before kvm->irq_routing.  */
 	smp_rmb();
-	return vpic != NULL;
+	return ret;
+}
+
+static inline int lapic_in_kernel(struct kvm_vcpu *vcpu)
+{
+	/* Same as irqchip_in_kernel(vcpu->kvm), but with less
+	 * pointer chasing and no unnecessary memory barriers.
+	 */
+	return vcpu->arch.apic != NULL;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e5b58c..84b96d319909 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	return r;
 }
 
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-				   struct kvm_lapic_irq *irq)
+void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+		     struct kvm_lapic_irq *irq)
 {
 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
@@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 	irq->level = 1;
 	irq->shorthand = 0;
 }
+EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 		struct kvm *kvm, int irq_source_id, int level, bool line_status)
@@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 }
 
 
-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
-			 struct kvm *kvm)
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
 {
 	struct kvm_lapic_irq irq;
 	int r;
 
+	if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
+		return -EWOULDBLOCK;
+
 	kvm_set_msi_irq(e, &irq);
 
 	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
@@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
 		return -EWOULDBLOCK;
 }
 
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- *  Other values - No need to retry.
- */
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
-	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-	struct kvm_kernel_irq_routing_entry *e;
-	int ret = -EINVAL;
-	int idx;
-
-	trace_kvm_set_irq(irq, level, irq_source_id);
-
-	/*
-	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
-	 * which would need to be retried from thread context;  when same GSI
-	 * is connected to both PIC and IOAPIC, we'd have to report a
-	 * partial failure here.
-	 * Since there's no easy way to do this, we only support injecting MSI
-	 * which is limited to 1:1 GSI mapping.
-	 */
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
-		e = &entries[0];
-		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-			ret = kvm_set_msi_inatomic(e, kvm);
-		else
-			ret = -EWOULDBLOCK;
-	}
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-	return ret;
-}
-
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
@@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		goto unlock;
 	}
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	if (!irqchip_in_kernel(kvm))
+	if (!ioapic_in_kernel(kvm))
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
@@ -297,6 +266,33 @@ out:
 	return r;
 }
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			     struct kvm_vcpu **dest_vcpu)
+{
+	int i, r = 0;
+	struct kvm_vcpu *vcpu;
+
+	if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+		return true;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		if (++r == 2)
+			return false;
+
+		*dest_vcpu = vcpu;
+	}
+
+	return r == 1;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
@@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, default_routing,
 				   ARRAY_SIZE(default_routing), 0);
 }
+
+static const struct kvm_irq_routing_entry empty_routing[] = {};
+
+int kvm_setup_empty_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
+}
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+		return;
+	kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_kernel_irq_routing_entry *entry;
+	struct kvm_irq_routing_table *table;
+	u32 i, nr_ioapic_pins;
+	int idx;
+
+	/* kvm->irq_routing must be read after clearing
+	 * KVM_SCAN_IOAPIC. */
+	smp_mb();
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
+			       kvm->arch.nr_reserved_ioapic_pins);
+	for (i = 0; i < nr_ioapic_pins; ++i) {
+		hlist_for_each_entry(entry, &table->map[i], link) {
+			u32 dest_id, dest_mode;
+			bool level;
+
+			if (entry->type != KVM_IRQ_ROUTING_MSI)
+				continue;
+			dest_id = (entry->msi.address_lo >> 12) & 0xff;
+			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
+			level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
+			if (level && kvm_apic_match_dest(vcpu, NULL, 0,
+						dest_id, dest_mode)) {
+				u32 vector = entry->msi.data & 0xff;
+
+				__set_bit(vector,
+					  (unsigned long *) eoi_exit_bitmap);
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8d9013c5e1ee..ecd4ea1d28a8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,7 +209,7 @@ out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	__kvm_apic_update_irr(pir, apic->regs);
+
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
@@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 	vcpu = apic->vcpu;
 
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
+	if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
 		/* try to update RVI */
 		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 }
 
-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-	int i;
-
-	for (i = 0; i < 8; i++)
-		apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]);
-}
-
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -764,6 +757,65 @@ out:
 	return ret;
 }
 
+bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			struct kvm_vcpu **dest_vcpu)
+{
+	struct kvm_apic_map *map;
+	bool ret = false;
+	struct kvm_lapic *dst = NULL;
+
+	if (irq->shorthand)
+		return false;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+		if (irq->dest_id == 0xFF)
+			goto out;
+
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
+			goto out;
+
+		dst = map->phys_map[irq->dest_id];
+		if (dst && kvm_apic_present(dst->vcpu))
+			*dest_vcpu = dst->vcpu;
+		else
+			goto out;
+	} else {
+		u16 cid;
+		unsigned long bitmap = 1;
+		int i, r = 0;
+
+		if (!kvm_apic_logical_map_valid(map))
+			goto out;
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
+
+		for_each_set_bit(i, &bitmap, 16) {
+			dst = map->logical_map[cid][i];
+			if (++r == 2)
+				goto out;
+		}
+
+		if (dst && kvm_apic_present(dst->vcpu))
+			*dest_vcpu = dst->vcpu;
+		else
+			goto out;
+	}
+
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
@@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	case APIC_DM_LOWEST:
 		vcpu->arch.apic_arb_prio++;
 	case APIC_DM_FIXED:
+		if (unlikely(trig_mode && !level))
+			break;
+
 		/* FIXME add logic for vcpu on reset */
 		if (unlikely(!apic_enabled(apic)))
 			break;
@@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (dest_map)
 			__set_bit(vcpu->vcpu_id, dest_map);
 
+		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
+			if (trig_mode)
+				apic_set_vector(vector, apic->regs + APIC_TMR);
+			else
+				apic_clear_vector(vector, apic->regs + APIC_TMR);
+		}
+
 		if (kvm_x86_ops->deliver_posted_interrupt)
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 		else {
@@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
 }
 
+static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
+{
+	return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
+}
+
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
-		int trigger_mode;
-		if (apic_test_vector(vector, apic->regs + APIC_TMR))
-			trigger_mode = IOAPIC_LEVEL_TRIG;
-		else
-			trigger_mode = IOAPIC_EDGE_TRIG;
-		kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
+	int trigger_mode;
+
+	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
+	if (!kvm_ioapic_handles_vector(apic, vector))
+		return;
+
+	/* Request a KVM exit to inform the userspace IOAPIC. */
+	if (irqchip_split(apic->vcpu->kvm)) {
+		apic->vcpu->arch.pending_ioapic_eoi = vector;
+		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
+		return;
 	}
+
+	if (apic_test_vector(vector, apic->regs + APIC_TMR))
+		trigger_mode = IOAPIC_LEVEL_TRIG;
+	else
+		trigger_mode = IOAPIC_EDGE_TRIG;
+
+	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
 }
 
 static int apic_set_eoi(struct kvm_lapic *apic)
@@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
-	apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
+	apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
 	apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
@@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	kvm_rtc_eoi_tracking_restore_one(vcpu);
+	if (ioapic_in_kernel(vcpu->kvm))
+		kvm_rtc_eoi_tracking_restore_one(vcpu);
+
+	vcpu->arch.apic_arb_prio = 0;
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
@@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
 	    /* Cache not set: could be safe but we don't bother. */
 	    apic->highest_isr_cache == -1 ||
 	    /* Need EOI to update ioapic. */
-	    kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
 		/*
 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
 		 * so we need not do anything here.
@@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 reg = (msr - APIC_BASE_MSR) << 4;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
 		return 1;
 
 	if (reg == APIC_ICR2)
@@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
+	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
 		return 1;
 
 	if (reg == APIC_DFR || reg == APIC_ICR2) {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 764037991d26..fde8e35d5850 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
-void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void __kvm_apic_update_irr(u32 *pir, void *regs);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
@@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
+static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
 {
-	return kvm_x86_ops->vm_has_apicv(kvm);
+	return kvm_x86_ops->cpu_uses_apicv(vcpu);
 }
 
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
@@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
 
+bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			struct kvm_vcpu **dest_vcpu);
 #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff606f507913..7d85bcae3332 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm->arch.indirect_shadow_pages--;
 }
 
-static int has_wrprotected_page(struct kvm_vcpu *vcpu,
-				gfn_t gfn,
-				int level)
+static int __has_wrprotected_page(gfn_t gfn, int level,
+				  struct kvm_memory_slot *slot)
 {
-	struct kvm_memory_slot *slot;
 	struct kvm_lpage_info *linfo;
 
-	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 	if (slot) {
 		linfo = lpage_info_slot(gfn, slot, level);
 		return linfo->write_count;
@@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu,
 	return 1;
 }
 
+static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+{
+	struct kvm_memory_slot *slot;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
+	return __has_wrprotected_page(gfn, level, slot);
+}
+
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned long page_size;
@@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 	return ret;
 }
 
+static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot,
+					  bool no_dirty_log)
+{
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
+		return false;
+	if (no_dirty_log && slot->dirty_bitmap)
+		return false;
+
+	return true;
+}
+
 static struct kvm_memory_slot *
 gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
 			    bool no_dirty_log)
@@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
 	struct kvm_memory_slot *slot;
 
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
-	      (no_dirty_log && slot->dirty_bitmap))
+	if (!memslot_valid_for_gpte(slot, no_dirty_log))
 		slot = NULL;
 
 	return slot;
 }
 
-static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
-{
-	return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
-}
-
-static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
+			 bool *force_pt_level)
 {
 	int host_level, level, max_level;
+	struct kvm_memory_slot *slot;
+
+	if (unlikely(*force_pt_level))
+		return PT_PAGE_TABLE_LEVEL;
+
+	slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn);
+	*force_pt_level = !memslot_valid_for_gpte(slot, true);
+	if (unlikely(*force_pt_level))
+		return PT_PAGE_TABLE_LEVEL;
 
 	host_level = host_mapping_level(vcpu->kvm, large_gfn);
 
@@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 	max_level = min(kvm_x86_ops->get_lpage_level(), host_level);
 
 	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
-		if (has_wrprotected_page(vcpu, large_gfn, level))
+		if (__has_wrprotected_page(large_gfn, level, slot))
 			break;
 
 	return level - 1;
@@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 {
 	int r;
 	int level;
-	int force_pt_level;
+	bool force_pt_level = false;
 	pfn_t pfn;
 	unsigned long mmu_seq;
 	bool map_writable, write = error_code & PFERR_WRITE_MASK;
 
-	force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
+	level = mapping_level(vcpu, gfn, &force_pt_level);
 	if (likely(!force_pt_level)) {
-		level = mapping_level(vcpu, gfn);
 		/*
 		 * This path builds a PAE pagetable - so we can map
 		 * 2mb pages at maximum. Therefore check if the level
@@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 			level = PT_DIRECTORY_LEVEL;
 
 		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-	} else
-		level = PT_PAGE_TABLE_LEVEL;
+	}
 
 	if (fast_page_fault(vcpu, v, level, error_code))
 		return 0;
@@ -3427,7 +3445,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
 {
-	if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
+	if (unlikely(!lapic_in_kernel(vcpu) ||
 		     kvm_event_needs_reinjection(vcpu)))
 		return false;
 
@@ -3476,7 +3494,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	pfn_t pfn;
 	int r;
 	int level;
-	int force_pt_level;
+	bool force_pt_level;
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	unsigned long mmu_seq;
 	int write = error_code & PFERR_WRITE_MASK;
@@ -3495,20 +3513,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	if (r)
 		return r;
 
-	if (mapping_level_dirty_bitmap(vcpu, gfn) ||
-	    !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL))
-		force_pt_level = 1;
-	else
-		force_pt_level = 0;
-
+	force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
+							   PT_DIRECTORY_LEVEL);
+	level = mapping_level(vcpu, gfn, &force_pt_level);
 	if (likely(!force_pt_level)) {
-		level = mapping_level(vcpu, gfn);
 		if (level > PT_DIRECTORY_LEVEL &&
 		    !check_hugepage_cache_consistency(vcpu, gfn, level))
 			level = PT_DIRECTORY_LEVEL;
 		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
-	} else
-		level = PT_PAGE_TABLE_LEVEL;
+	}
 
 	if (fast_page_fault(vcpu, gpa, level, error_code))
 		return 0;
@@ -3706,7 +3719,7 @@ static void
 __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 			    int maxphyaddr, bool execonly)
 {
-	int pte;
+	u64 bad_mt_xwr;
 
 	rsvd_check->rsvd_bits_mask[0][3] =
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
@@ -3724,14 +3737,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
 	rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
-	for (pte = 0; pte < 64; pte++) {
-		int rwx_bits = pte & 7;
-		int mt = pte >> 3;
-		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
-				rwx_bits == 0x2 || rwx_bits == 0x6 ||
-				(rwx_bits == 0x4 && !execonly))
-			rsvd_check->bad_mt_xwr |= (1ull << pte);
+	bad_mt_xwr = 0xFFull << (2 * 8);	/* bits 3..5 must not be 2 */
+	bad_mt_xwr |= 0xFFull << (3 * 8);	/* bits 3..5 must not be 3 */
+	bad_mt_xwr |= 0xFFull << (7 * 8);	/* bits 3..5 must not be 7 */
+	bad_mt_xwr |= REPEAT_BYTE(1ull << 2);	/* bits 0..2 must not be 010 */
+	bad_mt_xwr |= REPEAT_BYTE(1ull << 6);	/* bits 0..2 must not be 110 */
+	if (!execonly) {
+		/* bits 0..2 must not be 100 unless VMX capabilities allow it */
+		bad_mt_xwr |= REPEAT_BYTE(1ull << 4);
 	}
+	rsvd_check->bad_mt_xwr = bad_mt_xwr;
 }
 
 static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 736e6ab8784d..b41faa91a6f9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -698,7 +698,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	int r;
 	pfn_t pfn;
 	int level = PT_PAGE_TABLE_LEVEL;
-	int force_pt_level;
+	bool force_pt_level = false;
 	unsigned long mmu_seq;
 	bool map_writable, is_self_change_mapping;
 
@@ -743,15 +743,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
 	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);
 
-	if (walker.level >= PT_DIRECTORY_LEVEL)
-		force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn)
-		   || is_self_change_mapping;
-	else
-		force_pt_level = 1;
-	if (!force_pt_level) {
-		level = min(walker.level, mapping_level(vcpu, walker.gfn));
-		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
-	}
+	if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) {
+		level = mapping_level(vcpu, walker.gfn, &force_pt_level);
+		if (likely(!force_pt_level)) {
+			level = min(walker.level, level);
+			walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
+		}
+	} else
+		force_pt_level = true;
 
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
 	smp_rmb();
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94b7d15db3fc..f2c8e4917688 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -159,6 +159,9 @@ struct vcpu_svm {
 	u32 apf_reason;
 
 	u64  tsc_ratio;
+
+	/* cached guest cpuid flags for faster access */
+	bool nrips_enabled	: 1;
 };
 
 static DEFINE_PER_CPU(u64, current_tsc_ratio);
@@ -514,7 +517,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (svm->vmcb->control.next_rip != 0) {
-		WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
+		WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
 		svm->next_rip = svm->vmcb->control.next_rip;
 	}
 
@@ -866,64 +869,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
-#define MTRR_TYPE_UC_MINUS	7
-#define MTRR2PROTVAL_INVALID 0xff
-
-static u8 mtrr2protval[8];
-
-static u8 fallback_mtrr_type(int mtrr)
-{
-	/*
-	 * WT and WP aren't always available in the host PAT.  Treat
-	 * them as UC and UC- respectively.  Everything else should be
-	 * there.
-	 */
-	switch (mtrr)
-	{
-	case MTRR_TYPE_WRTHROUGH:
-		return MTRR_TYPE_UNCACHABLE;
-	case MTRR_TYPE_WRPROT:
-		return MTRR_TYPE_UC_MINUS;
-	default:
-		BUG();
-	}
-}
-
-static void build_mtrr2protval(void)
-{
-	int i;
-	u64 pat;
-
-	for (i = 0; i < 8; i++)
-		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
-
-	/* Ignore the invalid MTRR types.  */
-	mtrr2protval[2] = 0;
-	mtrr2protval[3] = 0;
-
-	/*
-	 * Use host PAT value to figure out the mapping from guest MTRR
-	 * values to nested page table PAT/PCD/PWT values.  We do not
-	 * want to change the host PAT value every time we enter the
-	 * guest.
-	 */
-	rdmsrl(MSR_IA32_CR_PAT, pat);
-	for (i = 0; i < 8; i++) {
-		u8 mtrr = pat >> (8 * i);
-
-		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
-			mtrr2protval[mtrr] = __cm_idx2pte(i);
-	}
-
-	for (i = 0; i < 8; i++) {
-		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
-			u8 fallback = fallback_mtrr_type(i);
-			mtrr2protval[i] = mtrr2protval[fallback];
-			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
-		}
-	}
-}
-
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -990,7 +935,6 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
-	build_mtrr2protval();
 	return 0;
 
 err:
@@ -1145,44 +1089,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
-static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
-{
-	struct kvm_vcpu *vcpu = &svm->vcpu;
-
-	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
-	 *
-	 * AMD does not have IPAT.  To emulate it for the case of guests
-	 * with no assigned devices, just set everything to WB.  If guests
-	 * have assigned devices, however, we cannot force WB for RAM
-	 * pages only, so use the guest PAT directly.
-	 */
-	if (!kvm_arch_has_assigned_device(vcpu->kvm))
-		*g_pat = 0x0606060606060606;
-	else
-		*g_pat = vcpu->arch.pat;
-}
-
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	u8 mtrr;
-
-	/*
-	 * 1. MMIO: trust guest MTRR, so same as item 3.
-	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
-	 * 3. Passthrough: can't guarantee the result, try to trust guest.
-	 */
-	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
-		return 0;
-
-	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
-	    kvm_read_cr0(vcpu) & X86_CR0_CD)
-		return _PAGE_NOCACHE;
-
-	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
-	return mtrr2protval[mtrr];
-}
-
-static void init_vmcb(struct vcpu_svm *svm, bool init_event)
+static void init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
@@ -1253,8 +1160,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-	if (!init_event)
-		svm_set_efer(&svm->vcpu, 0);
+	svm_set_efer(&svm->vcpu, 0);
 	save->dr6 = 0xffff0ff0;
 	kvm_set_rflags(&svm->vcpu, 2);
 	save->rip = 0x0000fff0;
@@ -1278,7 +1184,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
-		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -1309,7 +1214,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
 			svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	}
-	init_vmcb(svm, init_event);
+	init_vmcb(svm);
 
 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
@@ -1365,7 +1270,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 	svm->asid_generation = 0;
-	init_vmcb(svm, false);
+	init_vmcb(svm);
 
 	svm_init_osvw(&svm->vcpu);
 
@@ -1673,10 +1578,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-
-	/* These are emulated via page tables.  */
-	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-
+	/*
+	 * re-enable caching here because the QEMU bios
+	 * does not do it - this results in some delay at
+	 * reboot
+	 */
+	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
@@ -1984,7 +1892,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
 	 * so reinitialize it.
 	 */
 	clear_page(svm->vmcb);
-	init_vmcb(svm, false);
+	init_vmcb(svm);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
@@ -2459,7 +2367,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
-	nested_vmcb->control.next_rip          = vmcb->control.next_rip;
+
+	if (svm->nrips_enabled)
+		nested_vmcb->control.next_rip  = vmcb->control.next_rip;
 
 	/*
 	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
@@ -3154,7 +3064,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	r = cr_interception(svm);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (lapic_in_kernel(&svm->vcpu))
 		return r;
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return r;
@@ -3351,16 +3261,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
-	case MSR_IA32_CR_PAT:
-		if (npt_enabled) {
-			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
-				return 1;
-			vcpu->arch.pat = data;
-			svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
-			mark_dirty(svm->vmcb, VMCB_NPT);
-			break;
-		}
-		/* fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
@@ -3398,24 +3298,11 @@ static int msr_interception(struct vcpu_svm *svm)
 
 static int interrupt_window_interception(struct vcpu_svm *svm)
 {
-	struct kvm_run *kvm_run = svm->vcpu.run;
-
 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	mark_dirty(svm->vmcb, VMCB_INTR);
 	++svm->vcpu.stat.irq_window_exits;
-	/*
-	 * If the user space waits to inject interrupts, exit as soon as
-	 * possible
-	 */
-	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
-	    kvm_run->request_interrupt_window &&
-	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
-		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		return 0;
-	}
-
 	return 1;
 }
 
@@ -3763,12 +3650,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	return;
 }
 
-static int svm_vm_has_apicv(struct kvm *kvm)
+static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
 {
 	return 0;
 }
 
-static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 {
 	return;
 }
@@ -4195,8 +4082,17 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }
 
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	return 0;
+}
+
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	/* Update nrips enabled cache */
+	svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
 }
 
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -4524,7 +4420,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
-	.vm_has_apicv = svm_vm_has_apicv,
+	.cpu_uses_apicv = svm_cpu_uses_apicv,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4eae7c35ddf5..120302511802 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -129,6 +129,24 @@ TRACE_EVENT(kvm_pio,
 );
 
 /*
+ * Tracepoint for fast mmio.
+ */
+TRACE_EVENT(kvm_fast_mmio,
+	TP_PROTO(u64 gpa),
+	TP_ARGS(gpa),
+
+	TP_STRUCT__entry(
+		__field(u64,	gpa)
+	),
+
+	TP_fast_assign(
+		__entry->gpa		= gpa;
+	),
+
+	TP_printk("fast mmio at gpa 0x%llx", __entry->gpa)
+);
+
+/*
  * Tracepoint for cpuid.
  */
 TRACE_EVENT(kvm_cpuid,
@@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm,
 		  __entry->smbase)
 );
 
+/*
+ * Tracepoint for VT-d posted-interrupts.
+ */
+TRACE_EVENT(kvm_pi_irte_update,
+	TP_PROTO(unsigned int vcpu_id, unsigned int gsi,
+		 unsigned int gvec, u64 pi_desc_addr, bool set),
+	TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id		)
+		__field(	unsigned int,	gsi		)
+		__field(	unsigned int,	gvec		)
+		__field(	u64,		pi_desc_addr	)
+		__field(	bool,		set		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->gsi		= gsi;
+		__entry->gvec		= gvec;
+		__entry->pi_desc_addr	= pi_desc_addr;
+		__entry->set		= set;
+	),
+
+	TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, "
+		  "gvec: 0x%x, pi_desc_addr: 0x%llx",
+		  __entry->set ? "enabled and being updated" : "disabled",
+		  __entry->vcpu_id,
+		  __entry->gsi,
+		  __entry->gvec,
+		  __entry->pi_desc_addr)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64076740251e..5eb56ed77c1f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -35,6 +35,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
+#include <asm/cpu.h>
 #include <asm/io.h>
 #include <asm/desc.h>
 #include <asm/vmx.h>
@@ -45,6 +46,7 @@
 #include <asm/debugreg.h>
 #include <asm/kexec.h>
 #include <asm/apic.h>
+#include <asm/irq_remapping.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -424,6 +426,9 @@ struct nested_vmx {
 	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
 	u64 vmcs01_debugctl;
 
+	u16 vpid02;
+	u16 last_vpid;
+
 	u32 nested_vmx_procbased_ctls_low;
 	u32 nested_vmx_procbased_ctls_high;
 	u32 nested_vmx_true_procbased_ctls_low;
@@ -440,14 +445,33 @@ struct nested_vmx {
 	u32 nested_vmx_misc_low;
 	u32 nested_vmx_misc_high;
 	u32 nested_vmx_ept_caps;
+	u32 nested_vmx_vpid_caps;
 };
 
 #define POSTED_INTR_ON  0
+#define POSTED_INTR_SN  1
+
 /* Posted-Interrupt Descriptor */
 struct pi_desc {
 	u32 pir[8];     /* Posted interrupt requested */
-	u32 control;	/* bit 0 of control is outstanding notification bit */
-	u32 rsvd[7];
+	union {
+		struct {
+				/* bit 256 - Outstanding Notification */
+			u16	on	: 1,
+				/* bit 257 - Suppress Notification */
+				sn	: 1,
+				/* bit 271:258 - Reserved */
+				rsvd_1	: 14;
+				/* bit 279:272 - Notification Vector */
+			u8	nv;
+				/* bit 287:280 - Reserved */
+			u8	rsvd_2;
+				/* bit 319:288 - Notification Destination */
+			u32	ndst;
+		};
+		u64 control;
+	};
+	u32 rsvd[6];
 } __aligned(64);
 
 static bool pi_test_and_set_on(struct pi_desc *pi_desc)
@@ -467,6 +491,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+	return clear_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+	return set_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_on(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_ON,
+			(unsigned long *)&pi_desc->control);
+}
+
+static inline int pi_test_sn(struct pi_desc *pi_desc)
+{
+	return test_bit(POSTED_INTR_SN,
+			(unsigned long *)&pi_desc->control);
+}
+
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
 	unsigned long         host_rsp;
@@ -532,8 +580,6 @@ struct vcpu_vmx {
 	s64 vnmi_blocked_time;
 	u32 exit_reason;
 
-	bool rdtscp_enabled;
-
 	/* Posted interrupt descriptor */
 	struct pi_desc pi_desc;
 
@@ -563,6 +609,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
+static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
+{
+	return &(to_vmx(vcpu)->pi_desc);
+}
+
 #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
 #define FIELD(number, name)	[number] = VMCS12_OFFSET(name)
 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
@@ -809,7 +860,7 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
-static int vmx_vm_has_apicv(struct kvm *kvm);
+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -831,6 +882,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
+/*
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
+ * can find which vCPU should be waken up.
+ */
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
+
 static unsigned long *vmx_io_bitmap_a;
 static unsigned long *vmx_io_bitmap_b;
 static unsigned long *vmx_msr_bitmap_legacy;
@@ -946,9 +1004,9 @@ static inline bool cpu_has_vmx_tpr_shadow(void)
 	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
 }
 
-static inline bool vm_need_tpr_shadow(struct kvm *kvm)
+static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
 {
-	return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
+	return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
 }
 
 static inline bool cpu_has_secondary_exec_ctrls(void)
@@ -983,7 +1041,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 
 static inline bool cpu_has_vmx_posted_intr(void)
 {
-	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
+		vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
 }
 
 static inline bool cpu_has_vmx_apicv(void)
@@ -1062,9 +1121,9 @@ static inline bool cpu_has_vmx_ple(void)
 		SECONDARY_EXEC_PAUSE_LOOP_EXITING;
 }
 
-static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
+static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
 {
-	return flexpriority_enabled && irqchip_in_kernel(kvm);
+	return flexpriority_enabled && lapic_in_kernel(vcpu);
 }
 
 static inline bool cpu_has_vmx_vpid(void)
@@ -1157,6 +1216,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
 }
 
+static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
+}
+
 static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
 {
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
@@ -1337,13 +1401,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
 			 __loaded_vmcs_clear, loaded_vmcs, 1);
 }
 
-static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
+static inline void vpid_sync_vcpu_single(int vpid)
 {
-	if (vmx->vpid == 0)
+	if (vpid == 0)
 		return;
 
 	if (cpu_has_vmx_invvpid_single())
-		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+		__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0);
 }
 
 static inline void vpid_sync_vcpu_global(void)
@@ -1352,10 +1416,10 @@ static inline void vpid_sync_vcpu_global(void)
 		__invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
 }
 
-static inline void vpid_sync_context(struct vcpu_vmx *vmx)
+static inline void vpid_sync_context(int vpid)
 {
 	if (cpu_has_vmx_invvpid_single())
-		vpid_sync_vcpu_single(vmx);
+		vpid_sync_vcpu_single(vpid);
 	else
 		vpid_sync_vcpu_global();
 }
@@ -1895,6 +1959,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
 	preempt_enable();
 }
 
+static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+	struct pi_desc old, new;
+	unsigned int dest;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return;
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		/*
+		 * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
+		 * are two possible cases:
+		 * 1. After running 'pre_block', context switch
+		 *    happened. For this case, 'sn' was set in
+		 *    vmx_vcpu_put(), so we need to clear it here.
+		 * 2. After running 'pre_block', we were blocked,
+		 *    and woken up by some other guy. For this case,
+		 *    we don't need to do anything, 'pi_post_block'
+		 *    will do everything for us. However, we cannot
+		 *    check whether it is case #1 or case #2 here
+		 *    (maybe, not needed), so we also clear sn here,
+		 *    I think it is not a big deal.
+		 */
+		if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
+			if (vcpu->cpu != cpu) {
+				dest = cpu_physical_id(cpu);
+
+				if (x2apic_enabled())
+					new.ndst = dest;
+				else
+					new.ndst = (dest << 8) & 0xFF00;
+			}
+
+			/* set 'NV' to 'notification vector' */
+			new.nv = POSTED_INTR_VECTOR;
+		}
+
+		/* Allow posting non-urgent interrupts */
+		new.sn = 0;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+}
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -1945,10 +2055,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
 		vmx->loaded_vmcs->cpu = cpu;
 	}
+
+	vmx_vcpu_pi_load(vcpu, cpu);
+}
+
+static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return;
+
+	/* Set SN when the vCPU is preempted */
+	if (vcpu->preempted)
+		pi_set_sn(pi_desc);
 }
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	vmx_vcpu_pi_put(vcpu);
+
 	__vmx_load_host_state(to_vmx(vcpu));
 	if (!vmm_exclusive) {
 		__loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
@@ -2207,7 +2334,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		if (index >= 0)
 			move_msr_up(vmx, index, save_nmsrs++);
 		index = __find_msr_index(vmx, MSR_TSC_AUX);
-		if (index >= 0 && vmx->rdtscp_enabled)
+		if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu))
 			move_msr_up(vmx, index, save_nmsrs++);
 		/*
 		 * MSR_STAR is only needed on long mode guests, and only
@@ -2377,7 +2504,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	vmx->nested.nested_vmx_pinbased_ctls_high |=
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_VMX_PREEMPTION_TIMER;
-	if (vmx_vm_has_apicv(vmx->vcpu.kvm))
+	if (vmx_cpu_uses_apicv(&vmx->vcpu))
 		vmx->nested.nested_vmx_pinbased_ctls_high |=
 			PIN_BASED_POSTED_INTR;
 
@@ -2471,10 +2598,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_RDTSCP |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+		SECONDARY_EXEC_ENABLE_VPID |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 		SECONDARY_EXEC_WBINVD_EXITING |
-		SECONDARY_EXEC_XSAVES;
+		SECONDARY_EXEC_XSAVES |
+		SECONDARY_EXEC_PCOMMIT;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
@@ -2493,6 +2622,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
+	if (enable_vpid)
+		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+				VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+	else
+		vmx->nested.nested_vmx_vpid_caps = 0;
+
 	if (enable_unrestricted_guest)
 		vmx->nested.nested_vmx_secondary_ctls_high |=
 			SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@ -2608,7 +2743,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
 		/* Currently, no nested vpid support */
-		*pdata = vmx->nested.nested_vmx_ept_caps;
+		*pdata = vmx->nested.nested_vmx_ept_caps |
+			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
 	default:
 		return 1;
@@ -2673,7 +2809,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.ia32_xss;
 		break;
 	case MSR_TSC_AUX:
-		if (!to_vmx(vcpu)->rdtscp_enabled)
+		if (!guest_cpuid_has_rdtscp(vcpu))
 			return 1;
 		/* Otherwise falls through */
 	default:
@@ -2779,7 +2915,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
 		break;
 	case MSR_TSC_AUX:
-		if (!vmx->rdtscp_enabled)
+		if (!guest_cpuid_has_rdtscp(vcpu))
 			return 1;
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
@@ -2874,6 +3010,8 @@ static int hardware_enable(void)
 		return -EBUSY;
 
 	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
+	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 
 	/*
 	 * Now we can enable the vmclear operation in kdump
@@ -3015,7 +3153,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 			SECONDARY_EXEC_SHADOW_VMCS |
 			SECONDARY_EXEC_XSAVES |
-			SECONDARY_EXEC_ENABLE_PML;
+			SECONDARY_EXEC_ENABLE_PML |
+			SECONDARY_EXEC_PCOMMIT;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -3441,9 +3580,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
 {
-	vpid_sync_context(to_vmx(vcpu));
+	vpid_sync_context(vpid);
 	if (enable_ept) {
 		if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 			return;
@@ -3451,6 +3590,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -3644,20 +3788,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		if (!is_paging(vcpu)) {
 			hw_cr4 &= ~X86_CR4_PAE;
 			hw_cr4 |= X86_CR4_PSE;
-			/*
-			 * SMEP/SMAP is disabled if CPU is in non-paging mode
-			 * in hardware. However KVM always uses paging mode to
-			 * emulate guest non-paging mode with TDP.
-			 * To emulate this behavior, SMEP/SMAP needs to be
-			 * manually disabled when guest switches to non-paging
-			 * mode.
-			 */
-			hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
 		} else if (!(cr4 & X86_CR4_PAE)) {
 			hw_cr4 &= ~X86_CR4_PAE;
 		}
 	}
 
+	if (!enable_unrestricted_guest && !is_paging(vcpu))
+		/*
+		 * SMEP/SMAP is disabled if CPU is in non-paging mode in
+		 * hardware.  However KVM always uses paging mode without
+		 * unrestricted guest.
+		 * To emulate this behavior, SMEP/SMAP needs to be manually
+		 * disabled when guest switches to non-paging mode.
+		 */
+		hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
+
 	vmcs_writel(CR4_READ_SHADOW, cr4);
 	vmcs_writel(GUEST_CR4, hw_cr4);
 	return 0;
@@ -4105,17 +4250,13 @@ static void seg_setup(int seg)
 static int alloc_apic_access_page(struct kvm *kvm)
 {
 	struct page *page;
-	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
 	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page_done)
 		goto out;
-	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
-	kvm_userspace_mem.flags = 0;
-	kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
-	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+	r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+				    APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
 	if (r)
 		goto out;
 
@@ -4140,44 +4281,38 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 {
 	/* Called with kvm->slots_lock held. */
 
-	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
 	BUG_ON(kvm->arch.ept_identity_pagetable_done);
 
-	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
-	kvm_userspace_mem.flags = 0;
-	kvm_userspace_mem.guest_phys_addr =
-		kvm->arch.ept_identity_map_addr;
-	kvm_userspace_mem.memory_size = PAGE_SIZE;
-	r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+	r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+				    kvm->arch.ept_identity_map_addr, PAGE_SIZE);
 
 	return r;
 }
 
-static void allocate_vpid(struct vcpu_vmx *vmx)
+static int allocate_vpid(void)
 {
 	int vpid;
 
-	vmx->vpid = 0;
 	if (!enable_vpid)
-		return;
+		return 0;
 	spin_lock(&vmx_vpid_lock);
 	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
-	if (vpid < VMX_NR_VPIDS) {
-		vmx->vpid = vpid;
+	if (vpid < VMX_NR_VPIDS)
 		__set_bit(vpid, vmx_vpid_bitmap);
-	}
+	else
+		vpid = 0;
 	spin_unlock(&vmx_vpid_lock);
+	return vpid;
 }
 
-static void free_vpid(struct vcpu_vmx *vmx)
+static void free_vpid(int vpid)
 {
-	if (!enable_vpid)
+	if (!enable_vpid || vpid == 0)
 		return;
 	spin_lock(&vmx_vpid_lock);
-	if (vmx->vpid != 0)
-		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
+	__clear_bit(vpid, vmx_vpid_bitmap);
 	spin_unlock(&vmx_vpid_lock);
 }
 
@@ -4332,9 +4467,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 			msr, MSR_TYPE_W);
 }
 
-static int vmx_vm_has_apicv(struct kvm *kvm)
+static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
 {
-	return enable_apicv && irqchip_in_kernel(kvm);
+	return enable_apicv && lapic_in_kernel(vcpu);
 }
 
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4378,6 +4513,22 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_SMP
 	if (vcpu->mode == IN_GUEST_MODE) {
+		struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+		/*
+		 * Currently, we don't support urgent interrupt,
+		 * all interrupts are recognized as non-urgent
+		 * interrupt, so we cannot post interrupts when
+		 * 'SN' is set.
+		 *
+		 * If the vcpu is in guest mode, it means it is
+		 * running instead of being scheduled out and
+		 * waiting in the run queue, and that's the only
+		 * case when 'SN' is set currently, warning if
+		 * 'SN' is set.
+		 */
+		WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
+
 		apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
 				POSTED_INTR_VECTOR);
 		return true;
@@ -4514,7 +4665,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
-	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
 	return pin_based_exec_ctrl;
 }
@@ -4526,7 +4677,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 	if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
 		exec_control &= ~CPU_BASED_MOV_DR_EXITING;
 
-	if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
+	if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
 		exec_control &= ~CPU_BASED_TPR_SHADOW;
 #ifdef CONFIG_X86_64
 		exec_control |= CPU_BASED_CR8_STORE_EXITING |
@@ -4543,7 +4694,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
-	if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
+	if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu))
 		exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 	if (vmx->vpid == 0)
 		exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
@@ -4557,7 +4708,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 	if (!ple_gap)
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-	if (!vmx_vm_has_apicv(vmx->vcpu.kvm))
+	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -4567,8 +4718,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 	   a current VMCS12
 	*/
 	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
-	/* PML is enabled/disabled in creating/destorying vcpu */
-	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
+
+	if (!enable_pml)
+		exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
+
+	/* Currently, we allow L1 guest to directly run pcommit instruction. */
+	exec_control &= ~SECONDARY_EXEC_PCOMMIT;
 
 	return exec_control;
 }
@@ -4613,12 +4768,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
-	if (cpu_has_secondary_exec_ctrls()) {
+	if (cpu_has_secondary_exec_ctrls())
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 				vmx_secondary_exec_control(vmx));
-	}
 
-	if (vmx_vm_has_apicv(vmx->vcpu.kvm)) {
+	if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
@@ -4762,7 +4916,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	if (cpu_has_vmx_tpr_shadow() && !init_event) {
 		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
-		if (vm_need_tpr_shadow(vcpu->kvm))
+		if (cpu_need_tpr_shadow(vcpu))
 			vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 				     __pa(vcpu->arch.apic->regs));
 		vmcs_write32(TPR_THRESHOLD, 0);
@@ -4770,7 +4924,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	if (vmx_vm_has_apicv(vcpu->kvm))
+	if (vmx_cpu_uses_apicv(vcpu))
 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
 
 	if (vmx->vpid != 0)
@@ -4780,12 +4934,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
 	vmx->vcpu.arch.cr0 = cr0;
 	vmx_set_cr4(vcpu, 0);
-	if (!init_event)
-		vmx_set_efer(vcpu, 0);
+	vmx_set_efer(vcpu, 0);
 	vmx_fpu_activate(vcpu);
 	update_exception_bitmap(vcpu);
 
-	vpid_sync_context(vmx);
+	vpid_sync_context(vmx->vpid);
 }
 
 /*
@@ -4949,14 +5102,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
-	struct kvm_userspace_memory_region tss_mem = {
-		.slot = TSS_PRIVATE_MEMSLOT,
-		.guest_phys_addr = addr,
-		.memory_size = PAGE_SIZE * 3,
-		.flags = 0,
-	};
 
-	ret = x86_set_memory_region(kvm, &tss_mem);
+	ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+				    PAGE_SIZE * 3);
 	if (ret)
 		return ret;
 	kvm->arch.tss_addr = addr;
@@ -5310,7 +5458,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 				u8 cr8 = (u8)val;
 				err = kvm_set_cr8(vcpu, cr8);
 				kvm_complete_insn_gp(vcpu, err);
-				if (irqchip_in_kernel(vcpu->kvm))
+				if (lapic_in_kernel(vcpu))
 					return 1;
 				if (cr8_prev <= cr8)
 					return 1;
@@ -5524,17 +5672,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	++vcpu->stat.irq_window_exits;
-
-	/*
-	 * If the user space waits to inject interrupts, exit as soon as
-	 * possible
-	 */
-	if (!irqchip_in_kernel(vcpu->kvm) &&
-	    vcpu->run->request_interrupt_window &&
-	    !kvm_cpu_has_interrupt(vcpu)) {
-		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		return 0;
-	}
 	return 1;
 }
 
@@ -5767,6 +5904,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
 		skip_emulated_instruction(vcpu);
+		trace_kvm_fast_mmio(gpa);
 		return 1;
 	}
 
@@ -5924,6 +6062,25 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+/*
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
+ */
+static void wakeup_handler(void)
+{
+	struct kvm_vcpu *vcpu;
+	int cpu = smp_processor_id();
+
+	spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+	list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
+			blocked_vcpu_list) {
+		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+		if (pi_test_on(pi_desc) == 1)
+			kvm_vcpu_kick(vcpu);
+	}
+	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
+}
+
 static __init int hardware_setup(void)
 {
 	int r = -ENOMEM, i, msr;
@@ -6110,6 +6267,8 @@ static __init int hardware_setup(void)
 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
 	}
 
+	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+
 	return alloc_kvm_area();
 
 out8:
@@ -6641,7 +6800,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 
 static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 {
-	u32 exec_control;
 	if (vmx->nested.current_vmptr == -1ull)
 		return;
 
@@ -6654,9 +6812,8 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 		   they were modified */
 		copy_shadow_to_vmcs12(vmx);
 		vmx->nested.sync_shadow_vmcs = false;
-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
-		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+		vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+				SECONDARY_EXEC_SHADOW_VMCS);
 		vmcs_write64(VMCS_LINK_POINTER, -1ull);
 	}
 	vmx->nested.posted_intr_nv = -1;
@@ -6676,6 +6833,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 		return;
 
 	vmx->nested.vmxon = false;
+	free_vpid(vmx->nested.vpid02);
 	nested_release_vmcs12(vmx);
 	if (enable_shadow_vmcs)
 		free_vmcs(vmx->nested.current_shadow_vmcs);
@@ -7052,7 +7210,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	gpa_t vmptr;
-	u32 exec_control;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -7084,9 +7241,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		vmx->nested.current_vmcs12 = new_vmcs12;
 		vmx->nested.current_vmcs12_page = page;
 		if (enable_shadow_vmcs) {
-			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-			exec_control |= SECONDARY_EXEC_SHADOW_VMCS;
-			vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+				      SECONDARY_EXEC_SHADOW_VMCS);
 			vmcs_write64(VMCS_LINK_POINTER,
 				     __pa(vmx->nested.current_shadow_vmcs));
 			vmx->nested.sync_shadow_vmcs = true;
@@ -7192,7 +7348,63 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 static int handle_invvpid(struct kvm_vcpu *vcpu)
 {
-	kvm_queue_exception(vcpu, UD_VECTOR);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 vmx_instruction_info;
+	unsigned long type, types;
+	gva_t gva;
+	struct x86_exception e;
+	int vpid;
+
+	if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+	      SECONDARY_EXEC_ENABLE_VPID) ||
+			!(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+	types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+
+	if (!(types & (1UL << type))) {
+		nested_vmx_failValid(vcpu,
+			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		return 1;
+	}
+
+	/* according to the intel vmx instruction reference, the memory
+	 * operand is read even if it isn't needed (e.g., for type==global)
+	 */
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+			vmx_instruction_info, false, &gva))
+		return 1;
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid,
+				sizeof(u32), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	switch (type) {
+	case VMX_VPID_EXTENT_ALL_CONTEXT:
+		if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+			return 1;
+		}
+		__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+		nested_vmx_succeed(vcpu);
+		break;
+	default:
+		/* Trap single context invalidation invvpid calls */
+		BUG_ON(1);
+		break;
+	}
+
+	skip_emulated_instruction(vcpu);
 	return 1;
 }
 
@@ -7221,6 +7433,13 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_pcommit(struct kvm_vcpu *vcpu)
+{
+	/* we never catch pcommit instruct for L1 guest. */
+	WARN_ON(1);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7271,6 +7490,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
+	[EXIT_REASON_PCOMMIT]                 = handle_pcommit,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7572,6 +7792,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * the XSS exit bitmap in vmcs12.
 		 */
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+	case EXIT_REASON_PCOMMIT:
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
 	default:
 		return true;
 	}
@@ -7583,10 +7805,9 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_enable_pml(struct vcpu_vmx *vmx)
+static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
 {
 	struct page *pml_pg;
-	u32 exec_control;
 
 	pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!pml_pg)
@@ -7597,24 +7818,15 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx)
 	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 
-	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-	exec_control |= SECONDARY_EXEC_ENABLE_PML;
-	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
-
 	return 0;
 }
 
-static void vmx_disable_pml(struct vcpu_vmx *vmx)
+static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
-	u32 exec_control;
-
-	ASSERT(vmx->pml_pg);
-	__free_page(vmx->pml_pg);
-	vmx->pml_pg = NULL;
-
-	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
-	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+	if (vmx->pml_pg) {
+		__free_page(vmx->pml_pg);
+		vmx->pml_pg = NULL;
+	}
 }
 
 static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
@@ -7938,10 +8150,10 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	 * apicv
 	 */
 	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-				!vmx_vm_has_apicv(vcpu->kvm))
+				!vmx_cpu_uses_apicv(vcpu))
 		return;
 
-	if (!vm_need_tpr_shadow(vcpu->kvm))
+	if (!cpu_need_tpr_shadow(vcpu))
 		return;
 
 	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -8043,9 +8255,10 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 	}
 }
 
-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 {
-	if (!vmx_vm_has_apicv(vcpu->kvm))
+	u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap;
+	if (!vmx_cpu_uses_apicv(vcpu))
 		return;
 
 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
@@ -8491,8 +8704,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (enable_pml)
-		vmx_disable_pml(vmx);
-	free_vpid(vmx);
+		vmx_destroy_pml_buffer(vmx);
+	free_vpid(vmx->vpid);
 	leave_guest_mode(vcpu);
 	vmx_load_vmcs01(vcpu);
 	free_nested(vmx);
@@ -8511,7 +8724,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
 
-	allocate_vpid(vmx);
+	vmx->vpid = allocate_vpid();
 
 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
 	if (err)
@@ -8544,7 +8757,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	put_cpu();
 	if (err)
 		goto free_vmcs;
-	if (vm_need_virtualize_apic_accesses(kvm)) {
+	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
 		err = alloc_apic_access_page(kvm);
 		if (err)
 			goto free_vmcs;
@@ -8559,8 +8772,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			goto free_vmcs;
 	}
 
-	if (nested)
+	if (nested) {
 		nested_vmx_setup_ctls_msrs(vmx);
+		vmx->nested.vpid02 = allocate_vpid();
+	}
 
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
@@ -8573,7 +8788,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	 * for the guest, etc.
 	 */
 	if (enable_pml) {
-		err = vmx_enable_pml(vmx);
+		err = vmx_create_pml_buffer(vmx);
 		if (err)
 			goto free_vmcs;
 	}
@@ -8581,13 +8796,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	return &vmx->vcpu;
 
 free_vmcs:
+	free_vpid(vmx->nested.vpid02);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
 	kfree(vmx->guest_msrs);
 uninit_vcpu:
 	kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:
-	free_vpid(vmx);
+	free_vpid(vmx->vpid);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 	return ERR_PTR(err);
 }
@@ -8617,17 +8833,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	u64 ipat = 0;
 
 	/* For VT-d and EPT combination
-	 * 1. MMIO: guest may want to apply WC, trust it.
+	 * 1. MMIO: always map as UC
 	 * 2. EPT with VT-d:
 	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.  So the same as item 1.
+	 *	result, try to trust guest.
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
 	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
-	if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+	if (is_mmio) {
+		cache = MTRR_TYPE_UNCACHABLE;
+		goto exit;
+	}
+
+	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		ipat = VMX_EPT_IPAT_BIT;
 		cache = MTRR_TYPE_WRBACK;
 		goto exit;
@@ -8657,49 +8878,67 @@ static int vmx_get_lpage_level(void)
 		return PT_PDPE_LEVEL;
 }
 
+static void vmcs_set_secondary_exec_control(u32 new_ctl)
+{
+	/*
+	 * These bits in the secondary execution controls field
+	 * are dynamic, the others are mostly based on the hypervisor
+	 * architecture and the guest's CPUID.  Do not touch the
+	 * dynamic bits.
+	 */
+	u32 mask =
+		SECONDARY_EXEC_SHADOW_VMCS |
+		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
+	u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+
+	vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+		     (new_ctl & ~mask) | (cur_ctl & mask));
+}
+
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 exec_control;
+	u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx);
 
-	vmx->rdtscp_enabled = false;
 	if (vmx_rdtscp_supported()) {
-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-		if (exec_control & SECONDARY_EXEC_RDTSCP) {
-			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
-			if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
-				vmx->rdtscp_enabled = true;
-			else {
-				exec_control &= ~SECONDARY_EXEC_RDTSCP;
-				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
-						exec_control);
-			}
+		bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu);
+		if (!rdtscp_enabled)
+			secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP;
+
+		if (nested) {
+			if (rdtscp_enabled)
+				vmx->nested.nested_vmx_secondary_ctls_high |=
+					SECONDARY_EXEC_RDTSCP;
+			else
+				vmx->nested.nested_vmx_secondary_ctls_high &=
+					~SECONDARY_EXEC_RDTSCP;
 		}
-		if (nested && !vmx->rdtscp_enabled)
-			vmx->nested.nested_vmx_secondary_ctls_high &=
-				~SECONDARY_EXEC_RDTSCP;
 	}
 
 	/* Exposing INVPCID only when PCID is exposed */
 	best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
 	if (vmx_invpcid_supported() &&
-	    best && (best->ebx & bit(X86_FEATURE_INVPCID)) &&
-	    guest_cpuid_has_pcid(vcpu)) {
-		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-		exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
-		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
-			     exec_control);
-	} else {
-		if (cpu_has_secondary_exec_ctrls()) {
-			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
-			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
-			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
-				     exec_control);
-		}
+	    (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) ||
+	    !guest_cpuid_has_pcid(vcpu))) {
+		secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+
 		if (best)
 			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 	}
+
+	vmcs_set_secondary_exec_control(secondary_exec_ctl);
+
+	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
+		if (guest_cpuid_has_pcommit(vcpu))
+			vmx->nested.nested_vmx_secondary_ctls_high |=
+				SECONDARY_EXEC_PCOMMIT;
+		else
+			vmx->nested.nested_vmx_secondary_ctls_high &=
+				~SECONDARY_EXEC_PCOMMIT;
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9307,13 +9546,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	if (cpu_has_secondary_exec_ctrls()) {
 		exec_control = vmx_secondary_exec_control(vmx);
-		if (!vmx->rdtscp_enabled)
-			exec_control &= ~SECONDARY_EXEC_RDTSCP;
+
 		/* Take the following fields only from vmcs12 */
 		exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 				  SECONDARY_EXEC_RDTSCP |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-				  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
+				  SECONDARY_EXEC_PCOMMIT);
 		if (nested_cpu_has(vmcs12,
 				CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9332,7 +9571,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 				vmcs_write64(APIC_ACCESS_ADDR,
 				  page_to_phys(vmx->nested.apic_access_page));
 		} else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
-			    (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) {
+			    cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
 			exec_control |=
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 			kvm_vcpu_reload_apic_access_page(vcpu);
@@ -9442,12 +9681,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	if (enable_vpid) {
 		/*
-		 * Trivially support vpid by letting L2s share their parent
-		 * L1's vpid. TODO: move to a more elaborate solution, giving
-		 * each L2 its own vpid and exposing the vpid feature to L1.
+		 * There is no direct mapping between vpid02 and vpid12, the
+		 * vpid02 is per-vCPU for L0 and reused while the value of
+		 * vpid12 is changed w/ one invvpid during nested vmentry.
+		 * The vpid12 is allocated by L1 for L2, so it will not
+		 * influence global bitmap(for vpid01 and vpid02 allocation)
+		 * even if spawn a lot of nested vCPUs.
 		 */
-		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
-		vmx_flush_tlb(vcpu);
+		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+			if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+				vmx->nested.last_vpid = vmcs12->virtual_processor_id;
+				__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+			}
+		} else {
+			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+			vmx_flush_tlb(vcpu);
+		}
+
 	}
 
 	if (nested_cpu_has_ept(vmcs12)) {
@@ -10287,6 +10538,201 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+/*
+ * This routine does the following things for vCPU which is going
+ * to be blocked if VT-d PI is enabled.
+ * - Store the vCPU to the wakeup list, so when interrupts happen
+ *   we can find the right vCPU to wake up.
+ * - Change the Posted-interrupt descriptor as below:
+ *      'NDST' <-- vcpu->pre_pcpu
+ *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
+ * - If 'ON' is set during this process, which means at least one
+ *   interrupt is posted for this vCPU, we cannot block it, in
+ *   this case, return 1, otherwise, return 0.
+ *
+ */
+static int vmx_pre_block(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags;
+	unsigned int dest;
+	struct pi_desc old, new;
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return 0;
+
+	vcpu->pre_pcpu = vcpu->cpu;
+	spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+			  vcpu->pre_pcpu), flags);
+	list_add_tail(&vcpu->blocked_vcpu_list,
+		      &per_cpu(blocked_vcpu_on_cpu,
+		      vcpu->pre_pcpu));
+	spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
+			       vcpu->pre_pcpu), flags);
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		/*
+		 * We should not block the vCPU if
+		 * an interrupt is posted for it.
+		 */
+		if (pi_test_on(pi_desc) == 1) {
+			spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
+					  vcpu->pre_pcpu), flags);
+			list_del(&vcpu->blocked_vcpu_list);
+			spin_unlock_irqrestore(
+					&per_cpu(blocked_vcpu_on_cpu_lock,
+					vcpu->pre_pcpu), flags);
+			vcpu->pre_pcpu = -1;
+
+			return 1;
+		}
+
+		WARN((pi_desc->sn == 1),
+		     "Warning: SN field of posted-interrupts "
+		     "is set before blocking\n");
+
+		/*
+		 * Since vCPU can be preempted during this process,
+		 * vcpu->cpu could be different with pre_pcpu, we
+		 * need to set pre_pcpu as the destination of wakeup
+		 * notification event, then we can find the right vCPU
+		 * to wakeup in wakeup handler if interrupts happen
+		 * when the vCPU is in blocked state.
+		 */
+		dest = cpu_physical_id(vcpu->pre_pcpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* set 'NV' to 'wakeup vector' */
+		new.nv = POSTED_INTR_WAKEUP_VECTOR;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+
+	return 0;
+}
+
+static void vmx_post_block(struct kvm_vcpu *vcpu)
+{
+	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+	struct pi_desc old, new;
+	unsigned int dest;
+	unsigned long flags;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return;
+
+	do {
+		old.control = new.control = pi_desc->control;
+
+		dest = cpu_physical_id(vcpu->cpu);
+
+		if (x2apic_enabled())
+			new.ndst = dest;
+		else
+			new.ndst = (dest << 8) & 0xFF00;
+
+		/* Allow posting non-urgent interrupts */
+		new.sn = 0;
+
+		/* set 'NV' to 'notification vector' */
+		new.nv = POSTED_INTR_VECTOR;
+	} while (cmpxchg(&pi_desc->control, old.control,
+			new.control) != old.control);
+
+	if(vcpu->pre_pcpu != -1) {
+		spin_lock_irqsave(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		list_del(&vcpu->blocked_vcpu_list);
+		spin_unlock_irqrestore(
+			&per_cpu(blocked_vcpu_on_cpu_lock,
+			vcpu->pre_pcpu), flags);
+		vcpu->pre_pcpu = -1;
+	}
+}
+
+/*
+ * vmx_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * @set: set or unset PI
+ * returns 0 on success, < 0 on failure
+ */
+static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+			      uint32_t guest_irq, bool set)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_irq_routing_table *irq_rt;
+	struct kvm_lapic_irq irq;
+	struct kvm_vcpu *vcpu;
+	struct vcpu_data vcpu_info;
+	int idx, ret = -EINVAL;
+
+	if (!kvm_arch_has_assigned_device(kvm) ||
+		!irq_remapping_cap(IRQ_POSTING_CAP))
+		return 0;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+
+	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+		if (e->type != KVM_IRQ_ROUTING_MSI)
+			continue;
+		/*
+		 * VT-d PI cannot support posting multicast/broadcast
+		 * interrupts to a vCPU, we still use interrupt remapping
+		 * for these kind of interrupts.
+		 *
+		 * For lowest-priority interrupts, we only support
+		 * those with single CPU as the destination, e.g. user
+		 * configures the interrupts via /proc/irq or uses
+		 * irqbalance to make the interrupts single-CPU.
+		 *
+		 * We will support full lowest-priority interrupt later.
+		 */
+
+		kvm_set_msi_irq(e, &irq);
+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
+			continue;
+
+		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
+		vcpu_info.vector = irq.vector;
+
+		trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi,
+				vcpu_info.vector, vcpu_info.pi_desc_addr, set);
+
+		if (set)
+			ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
+		else {
+			/* suppress notification event before unposting */
+			pi_set_sn(vcpu_to_pi_desc(vcpu));
+			ret = irq_set_vcpu_affinity(host_irq, NULL);
+			pi_clear_sn(vcpu_to_pi_desc(vcpu));
+		}
+
+		if (ret < 0) {
+			printk(KERN_INFO "%s: failed to update PI IRTE\n",
+					__func__);
+			goto out;
+		}
+	}
+
+	ret = 0;
+out:
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -10356,7 +10802,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
-	.vm_has_apicv = vmx_vm_has_apicv,
+	.cpu_uses_apicv = vmx_cpu_uses_apicv,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,
@@ -10403,7 +10849,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.flush_log_dirty = vmx_flush_log_dirty,
 	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 
+	.pre_block = vmx_pre_block,
+	.post_block = vmx_post_block,
+
 	.pmu_ops = &intel_pmu_ops,
+
+	.update_pi_irte = vmx_update_pi_irte,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466bf8dee..4a6eff166fc6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -51,6 +51,8 @@
 #include <linux/pci.h>
 #include <linux/timekeeper_internal.h>
 #include <linux/pvclock_gtod.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
 #define CREATE_TRACE_POINTS
@@ -64,6 +66,7 @@
 #include <asm/fpu/internal.h> /* Ugh! */
 #include <asm/pvclock.h>
 #include <asm/div64.h>
+#include <asm/irq_remapping.h>
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
@@ -622,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if ((cr0 ^ old_cr0) & update_bits)
 		kvm_mmu_reset_context(vcpu);
 
-	if ((cr0 ^ old_cr0) & X86_CR0_CD)
+	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
 		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
 
 	return 0;
@@ -663,9 +668,9 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 	/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
 	if (index != XCR_XFEATURE_ENABLED_MASK)
 		return 1;
-	if (!(xcr0 & XSTATE_FP))
+	if (!(xcr0 & XFEATURE_MASK_FP))
 		return 1;
-	if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
+	if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
 		return 1;
 
 	/*
@@ -673,23 +678,24 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 	 * saving.  However, xcr0 bit 0 is always set, even if the
 	 * emulated CPU does not support XSAVE (see fx_init).
 	 */
-	valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
+	valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
 	if (xcr0 & ~valid_bits)
 		return 1;
 
-	if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
+	if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
+	    (!(xcr0 & XFEATURE_MASK_BNDCSR)))
 		return 1;
 
-	if (xcr0 & XSTATE_AVX512) {
-		if (!(xcr0 & XSTATE_YMM))
+	if (xcr0 & XFEATURE_MASK_AVX512) {
+		if (!(xcr0 & XFEATURE_MASK_YMM))
 			return 1;
-		if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+		if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
 			return 1;
 	}
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
-	if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
+	if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
 		kvm_update_cpuid(vcpu);
 	return 0;
 }
@@ -788,7 +794,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	if (cr8 & CR8_RESERVED_BITS)
 		return 1;
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu))
 		kvm_lapic_set_tpr(vcpu, cr8);
 	else
 		vcpu->arch.cr8 = cr8;
@@ -798,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8);
 
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 {
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (lapic_in_kernel(vcpu))
 		return kvm_lapic_get_cr8(vcpu);
 	else
 		return vcpu->arch.cr8;
@@ -952,6 +958,9 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
 	HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
 	HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
+	HV_X64_MSR_RESET,
+	HV_X64_MSR_VP_INDEX,
+	HV_X64_MSR_VP_RUNTIME,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -1708,8 +1717,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		vcpu->pvclock_set_guest_stopped_request = false;
 	}
 
-	pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
-
 	/* If the host uses TSC clocksource, then it is stable */
 	if (use_master_clock)
 		pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -1899,6 +1906,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu)
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
+	accumulate_steal_time(vcpu);
+
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
@@ -2007,8 +2016,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 					&vcpu->requests);
 
 			ka->boot_vcpu_runs_old_kvmclock = tmp;
-
-			ka->kvmclock_offset = -get_kernel_ns();
 		}
 
 		vcpu->arch.time = data;
@@ -2051,12 +2058,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!(data & KVM_MSR_ENABLED))
 			break;
 
-		vcpu->arch.st.last_steal = current->sched_info.run_delay;
-
-		preempt_disable();
-		accumulate_steal_time(vcpu);
-		preempt_enable();
-
 		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
 
 		break;
@@ -2452,6 +2453,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_DISABLE_QUIRKS:
 	case KVM_CAP_SET_BOOT_CPU_ID:
+ 	case KVM_CAP_SPLIT_IRQCHIP:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -2631,7 +2633,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		vcpu->cpu = cpu;
 	}
 
-	accumulate_steal_time(vcpu);
 	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
 }
 
@@ -2665,12 +2666,24 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 {
 	if (irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
-	if (irqchip_in_kernel(vcpu->kvm))
+
+	if (!irqchip_in_kernel(vcpu->kvm)) {
+		kvm_queue_interrupt(vcpu, irq->irq, false);
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+		return 0;
+	}
+
+	/*
+	 * With in-kernel LAPIC, we only use this to inject EXTINT, so
+	 * fail for in-kernel 8259.
+	 */
+	if (pic_in_kernel(vcpu->kvm))
 		return -ENXIO;
 
-	kvm_queue_interrupt(vcpu, irq->irq, false);
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	if (vcpu->arch.pending_external_vector != -1)
+		return -EEXIST;
 
+	vcpu->arch.pending_external_vector = irq->irq;
 	return 0;
 }
 
@@ -2909,7 +2922,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 	 * Copy each region from the possibly compacted offset to the
 	 * non-compacted offset.
 	 */
-	valid = xstate_bv & ~XSTATE_FPSSE;
+	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
 	while (valid) {
 		u64 feature = valid & -valid;
 		int index = fls64(feature) - 1;
@@ -2947,7 +2960,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 	 * Copy each region from the non-compacted offset to the
 	 * possibly compacted offset.
 	 */
-	valid = xstate_bv & ~XSTATE_FPSSE;
+	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
 	while (valid) {
 		u64 feature = valid & -valid;
 		int index = fls64(feature) - 1;
@@ -2975,7 +2988,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 			&vcpu->arch.guest_fpu.state.fxsave,
 			sizeof(struct fxregs_state));
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
-			XSTATE_FPSSE;
+			XFEATURE_MASK_FPSSE;
 	}
 }
 
@@ -2995,7 +3008,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 			return -EINVAL;
 		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
-		if (xstate_bv & ~XSTATE_FPSSE)
+		if (xstate_bv & ~XFEATURE_MASK_FPSSE)
 			return -EINVAL;
 		memcpy(&vcpu->arch.guest_fpu.state.fxsave,
 			guest_xsave->region, sizeof(struct fxregs_state));
@@ -3179,7 +3192,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_vapic_addr va;
 
 		r = -EINVAL;
-		if (!irqchip_in_kernel(vcpu->kvm))
+		if (!lapic_in_kernel(vcpu))
 			goto out;
 		r = -EFAULT;
 		if (copy_from_user(&va, argp, sizeof va))
@@ -3428,41 +3441,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
-	int r = 0;
-
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-	return r;
+	return 0;
 }
 
 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
-	int r = 0;
-
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
 	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-	return r;
+	return 0;
 }
 
 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 {
-	int r = 0;
-
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
 		sizeof(ps->channels));
 	ps->flags = kvm->arch.vpit->pit_state.flags;
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	memset(&ps->reserved, 0, sizeof(ps->reserved));
-	return r;
+	return 0;
 }
 
 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 {
-	int r = 0, start = 0;
+	int start = 0;
 	u32 prev_legacy, cur_legacy;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3474,7 +3481,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 	kvm->arch.vpit->pit_state.flags = ps->flags;
 	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-	return r;
+	return 0;
 }
 
 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
@@ -3559,6 +3566,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		kvm->arch.disabled_quirks = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_SPLIT_IRQCHIP: {
+		mutex_lock(&kvm->lock);
+		r = -EINVAL;
+		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
+			goto split_irqchip_unlock;
+		r = -EEXIST;
+		if (irqchip_in_kernel(kvm))
+			goto split_irqchip_unlock;
+		if (atomic_read(&kvm->online_vcpus))
+			goto split_irqchip_unlock;
+		r = kvm_setup_empty_irq_routing(kvm);
+		if (r)
+			goto split_irqchip_unlock;
+		/* Pairs with irqchip_in_kernel. */
+		smp_wmb();
+		kvm->arch.irqchip_split = true;
+		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
+		r = 0;
+split_irqchip_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 	default:
 		r = -EINVAL;
 		break;
@@ -3672,7 +3701,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 			goto get_irqchip_out;
 		r = kvm_vm_ioctl_get_irqchip(kvm, chip);
 		if (r)
@@ -3696,7 +3725,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 
 		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
+		if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 			goto set_irqchip_out;
 		r = kvm_vm_ioctl_set_irqchip(kvm, chip);
 		if (r)
@@ -4063,6 +4092,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
 }
 
+static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
+		unsigned long addr, void *val, unsigned int bytes)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
+
+	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
+}
+
 int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 				       gva_t addr, void *val,
 				       unsigned int bytes,
@@ -4798,6 +4836,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
+	.read_phys           = kvm_read_guest_phys_system,
 	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
@@ -5670,7 +5709,7 @@ void kvm_arch_exit(void)
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (lapic_in_kernel(vcpu)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
 		return 1;
 	} else {
@@ -5777,9 +5816,15 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
  */
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
 {
-	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
-		vcpu->run->request_interrupt_window &&
-		kvm_arch_interrupt_allowed(vcpu));
+	if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
+		return false;
+
+	if (kvm_cpu_has_interrupt(vcpu))
+		return false;
+
+	return (irqchip_split(vcpu->kvm)
+		? kvm_apic_accept_pic_intr(vcpu)
+		: kvm_arch_interrupt_allowed(vcpu));
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
@@ -5790,13 +5835,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
-		kvm_run->ready_for_interrupt_injection = 1;
-	else
+	if (!irqchip_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection =
 			kvm_arch_interrupt_allowed(vcpu) &&
 			!kvm_cpu_has_interrupt(vcpu) &&
 			!kvm_event_needs_reinjection(vcpu);
+	else if (!pic_in_kernel(vcpu->kvm))
+		kvm_run->ready_for_interrupt_injection =
+			kvm_apic_accept_pic_intr(vcpu) &&
+			!kvm_cpu_has_interrupt(vcpu);
+	else
+		kvm_run->ready_for_interrupt_injection = 1;
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -6147,18 +6196,18 @@ static void process_smi(struct kvm_vcpu *vcpu)
 
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
-	u64 eoi_exit_bitmap[4];
-	u32 tmr[8];
-
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
-	memset(eoi_exit_bitmap, 0, 32);
-	memset(tmr, 0, 32);
+	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
 
-	kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
-	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
-	kvm_apic_update_tmr(vcpu, tmr);
+	if (irqchip_split(vcpu->kvm))
+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+	else {
+		kvm_x86_ops->sync_pir_to_irr(vcpu);
+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+	}
+	kvm_x86_ops->load_eoi_exitmap(vcpu);
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -6171,7 +6220,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
 	struct page *page = NULL;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!lapic_in_kernel(vcpu))
 		return;
 
 	if (!kvm_x86_ops->set_apic_access_page_addr)
@@ -6209,7 +6258,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+	bool req_int_win = !lapic_in_kernel(vcpu) &&
 		vcpu->run->request_interrupt_window;
 	bool req_immediate_exit = false;
 
@@ -6261,6 +6310,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_pmu_handle_event(vcpu);
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
 			kvm_pmu_deliver_pmi(vcpu);
+		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
+			if (test_bit(vcpu->arch.pending_ioapic_eoi,
+				     (void *) vcpu->arch.eoi_exit_bitmap)) {
+				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
+				vcpu->run->eoi.vector =
+						vcpu->arch.pending_ioapic_eoi;
+				r = 0;
+				goto out;
+			}
+		}
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
@@ -6271,6 +6331,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+			vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+			r = 0;
+			goto out;
+		}
+	}
+
+	/*
+	 * KVM_REQ_EVENT is not set when posted interrupts are set by
+	 * VT-d hardware, so we have to update RVI unconditionally.
+	 */
+	if (kvm_lapic_enabled(vcpu)) {
+		/*
+		 * Update architecture specific hints for APIC
+		 * virtual interrupt delivery.
+		 */
+		if (kvm_x86_ops->hwapic_irr_update)
+			kvm_x86_ops->hwapic_irr_update(vcpu,
+				kvm_lapic_find_highest_irr(vcpu));
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6289,13 +6369,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->enable_irq_window(vcpu);
 
 		if (kvm_lapic_enabled(vcpu)) {
-			/*
-			 * Update architecture specific hints for APIC
-			 * virtual interrupt delivery.
-			 */
-			if (kvm_x86_ops->hwapic_irr_update)
-				kvm_x86_ops->hwapic_irr_update(vcpu,
-					kvm_lapic_find_highest_irr(vcpu));
 			update_cr8_intercept(vcpu);
 			kvm_lapic_sync_to_vapic(vcpu);
 		}
@@ -6431,10 +6504,15 @@ out:
 
 static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 {
-	if (!kvm_arch_vcpu_runnable(vcpu)) {
+	if (!kvm_arch_vcpu_runnable(vcpu) &&
+	    (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
 		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 		kvm_vcpu_block(vcpu);
 		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+
+		if (kvm_x86_ops->post_block)
+			kvm_x86_ops->post_block(vcpu);
+
 		if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
 			return 1;
 	}
@@ -6457,6 +6535,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+		!vcpu->arch.apf.halted);
+}
+
 static int vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -6465,11 +6549,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
 	for (;;) {
-		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
-		    !vcpu->arch.apf.halted)
+		if (kvm_vcpu_running(vcpu)) {
 			r = vcpu_enter_guest(vcpu);
-		else
+		} else {
 			r = vcpu_block(kvm, vcpu);
+		}
+
 		if (r <= 0)
 			break;
 
@@ -6478,8 +6563,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 			kvm_inject_pending_timer_irqs(vcpu);
 
 		if (dm_request_for_irq_injection(vcpu)) {
-			r = -EINTR;
-			vcpu->run->exit_reason = KVM_EXIT_INTR;
+			r = 0;
+			vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 			++vcpu->stat.request_irq_exits;
 			break;
 		}
@@ -6606,7 +6691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	/* re-sync apic's tpr */
-	if (!irqchip_in_kernel(vcpu->kvm)) {
+	if (!lapic_in_kernel(vcpu)) {
 		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
 			r = -EINVAL;
 			goto out;
@@ -7004,7 +7089,7 @@ static void fx_init(struct kvm_vcpu *vcpu)
 	/*
 	 * Ensure guest xcr0 is valid for loading
 	 */
-	vcpu->arch.xcr0 = XSTATE_FP;
+	vcpu->arch.xcr0 = XFEATURE_MASK_FP;
 
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
@@ -7306,7 +7391,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+	return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
 }
 
 struct static_key kvm_no_apic_vcpu __read_mostly;
@@ -7375,6 +7460,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
+	vcpu->arch.pending_external_vector = -1;
+
 	return 0;
 
 fail_free_mce_banks:
@@ -7400,7 +7487,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvm_mmu_destroy(vcpu);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!lapic_in_kernel(vcpu))
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
@@ -7478,34 +7565,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
 	kvm_free_pit(kvm);
 }
 
-int __x86_set_memory_region(struct kvm *kvm,
-			    const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 {
 	int i, r;
+	unsigned long hva;
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *slot, old;
 
 	/* Called with kvm->slots_lock held.  */
-	BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+	if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+		return -EINVAL;
+
+	slot = id_to_memslot(slots, id);
+	if (size) {
+		if (WARN_ON(slot->npages))
+			return -EEXIST;
 
+		/*
+		 * MAP_SHARED to prevent internal slot pages from being moved
+		 * by fork()/COW.
+		 */
+		hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+			      MAP_SHARED | MAP_ANONYMOUS, 0);
+		if (IS_ERR((void *)hva))
+			return PTR_ERR((void *)hva);
+	} else {
+		if (!slot->npages)
+			return 0;
+
+		hva = 0;
+	}
+
+	old = *slot;
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		struct kvm_userspace_memory_region m = *mem;
+		struct kvm_userspace_memory_region m;
 
-		m.slot |= i << 16;
+		m.slot = id | (i << 16);
+		m.flags = 0;
+		m.guest_phys_addr = gpa;
+		m.userspace_addr = hva;
+		m.memory_size = size;
 		r = __kvm_set_memory_region(kvm, &m);
 		if (r < 0)
 			return r;
 	}
 
+	if (!size) {
+		r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+		WARN_ON(r < 0);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__x86_set_memory_region);
 
-int x86_set_memory_region(struct kvm *kvm,
-			  const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 {
 	int r;
 
 	mutex_lock(&kvm->slots_lock);
-	r = __x86_set_memory_region(kvm, mem);
+	r = __x86_set_memory_region(kvm, id, gpa, size);
 	mutex_unlock(&kvm->slots_lock);
 
 	return r;
@@ -7520,16 +7639,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		 * unless the the memory map has changed due to process exit
 		 * or fd copying.
 		 */
-		struct kvm_userspace_memory_region mem;
-		memset(&mem, 0, sizeof(mem));
-		mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
-		x86_set_memory_region(kvm, &mem);
-
-		mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
-		x86_set_memory_region(kvm, &mem);
-
-		mem.slot = TSS_PRIVATE_MEMSLOT;
-		x86_set_memory_region(kvm, &mem);
+		x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+		x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+		x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
 	}
 	kvm_iommu_unmap_guest(kvm);
 	kfree(kvm->arch.vpic);
@@ -7632,27 +7744,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
 				enum kvm_mr_change change)
 {
-	/*
-	 * Only private memory slots need to be mapped here since
-	 * KVM_SET_MEMORY_REGION ioctl is no longer supported.
-	 */
-	if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
-		unsigned long userspace_addr;
-
-		/*
-		 * MAP_SHARED to prevent internal slot pages from being moved
-		 * by fork()/COW.
-		 */
-		userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
-					 PROT_READ | PROT_WRITE,
-					 MAP_SHARED | MAP_ANONYMOUS, 0);
-
-		if (IS_ERR((void *)userspace_addr))
-			return PTR_ERR((void *)userspace_addr);
-
-		memslot->userspace_addr = userspace_addr;
-	}
-
 	return 0;
 }
 
@@ -7714,17 +7805,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 {
 	int nr_mmu_pages = 0;
 
-	if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
-		int ret;
-
-		ret = vm_munmap(old->userspace_addr,
-				old->npages * PAGE_SIZE);
-		if (ret < 0)
-			printk(KERN_WARNING
-			       "kvm_vm_ioctl_set_memory_region: "
-			       "failed to munmap memory\n");
-	}
-
 	if (!kvm->arch.n_requested_mmu_pages)
 		nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
 
@@ -7773,19 +7853,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 	kvm_mmu_invalidate_zap_all_pages(kvm);
 }
 
+static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+	if (!list_empty_careful(&vcpu->async_pf.done))
+		return true;
+
+	if (kvm_apic_has_events(vcpu))
+		return true;
+
+	if (vcpu->arch.pv.pv_unhalted)
+		return true;
+
+	if (atomic_read(&vcpu->arch.nmi_queued))
+		return true;
+
+	if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+		return true;
+
+	if (kvm_arch_interrupt_allowed(vcpu) &&
+	    kvm_cpu_has_interrupt(vcpu))
+		return true;
+
+	return false;
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
 		kvm_x86_ops->check_nested_events(vcpu, false);
 
-	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
-		!vcpu->arch.apf.halted)
-		|| !list_empty_careful(&vcpu->async_pf.done)
-		|| kvm_apic_has_events(vcpu)
-		|| vcpu->arch.pv.pv_unhalted
-		|| atomic_read(&vcpu->arch.nmi_queued) ||
-		(kvm_arch_interrupt_allowed(vcpu) &&
-		 kvm_cpu_has_interrupt(vcpu));
+	return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
 }
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -8017,7 +8114,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+				      struct irq_bypass_producer *prod)
+{
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+	if (kvm_x86_ops->update_pi_irte) {
+		irqfd->producer = prod;
+		return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+				prod->irq, irqfd->gsi, 1);
+	}
+
+	return -EINVAL;
+}
+
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+				      struct irq_bypass_producer *prod)
+{
+	int ret;
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+	if (!kvm_x86_ops->update_pi_irte) {
+		WARN_ON(irqfd->producer != NULL);
+		return;
+	}
+
+	WARN_ON(irqfd->producer != prod);
+	irqfd->producer = NULL;
+
+	/*
+	 * When producer of consumer is unregistered, we change back to
+	 * remapped mode, so we can re-use the current implementation
+	 * when the irq is masked/disabed or the consumer side (KVM
+	 * int this case doesn't want to receive the interrupts.
+	*/
+	ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+	if (ret)
+		printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
+		       " fails: %d\n", irqfd->consumer.token, ret);
+}
+
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+				   uint32_t guest_irq, bool set)
+{
+	if (!kvm_x86_ops->update_pi_irte)
+		return -EINVAL;
+
+	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+}
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
@@ -8032,3 +8181,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2f822cd886c2..f2afa5fe48a6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -180,9 +180,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 					  int page_num);
 
-#define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-				| XSTATE_BNDREGS | XSTATE_BNDCSR \
-				| XSTATE_AVX512)
+#define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
+				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
+				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 816488c0b97e..d388de72eaca 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -353,8 +353,12 @@ AVXcode: 1
 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
 18: Grp16 (1A)
 19:
-1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv
-1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
 1c:
 1d:
 1e:
@@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
+c8: sha1nexte Vdq,Wdq
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq
+cb: sha256rnds2 Vdq,Wdq
+cc: sha256msg1 Vdq,Wdq
+cd: sha256msg2 Vdq,Wdq
 db: VAESIMC Vdq,Wdq (66),(v1)
 dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -790,6 +800,7 @@ AVXcode: 3
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+cc: sha1rnds4 Vdq,Wdq,Ib
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
 EndTable
@@ -874,7 +885,7 @@ GrpTable: Grp7
 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
 3: LIDT Ms
 4: SMSW Mw/Rv
-5:
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
 6: LMSW Ew
 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
 EndTable
@@ -888,6 +899,9 @@ EndTable
 
 GrpTable: Grp9
 1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
 EndTable
@@ -932,8 +946,8 @@ GrpTable: Grp15
 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
 4: XSAVE
 5: XRSTOR | lfence (11B)
-6: XSAVEOPT | mfence (11B)
-7: clflush | sfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
 EndTable
 
 GrpTable: Grp16
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index dd76a05729b0..024f6e971174 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -169,6 +169,76 @@ void fxch_i(void)
 	fpu_tag_word = tag_word;
 }
 
+static void fcmovCC(void)
+{
+	/* fcmovCC st(i) */
+	int i = FPU_rm;
+	FPU_REG *st0_ptr = &st(0);
+	FPU_REG *sti_ptr = &st(i);
+	long tag_word = fpu_tag_word;
+	int regnr = top & 7;
+	int regnri = (top + i) & 7;
+	u_char sti_tag = (tag_word >> (regnri * 2)) & 3;
+
+	if (sti_tag == TAG_Empty) {
+		FPU_stack_underflow();
+		clear_C1();
+		return;
+	}
+	reg_copy(sti_ptr, st0_ptr);
+	tag_word &= ~(3 << (regnr * 2));
+	tag_word |= (sti_tag << (regnr * 2));
+	fpu_tag_word = tag_word;
+}
+
+void fcmovb(void)
+{
+	if (FPU_EFLAGS & X86_EFLAGS_CF)
+		fcmovCC();
+}
+
+void fcmove(void)
+{
+	if (FPU_EFLAGS & X86_EFLAGS_ZF)
+		fcmovCC();
+}
+
+void fcmovbe(void)
+{
+	if (FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF))
+		fcmovCC();
+}
+
+void fcmovu(void)
+{
+	if (FPU_EFLAGS & X86_EFLAGS_PF)
+		fcmovCC();
+}
+
+void fcmovnb(void)
+{
+	if (!(FPU_EFLAGS & X86_EFLAGS_CF))
+		fcmovCC();
+}
+
+void fcmovne(void)
+{
+	if (!(FPU_EFLAGS & X86_EFLAGS_ZF))
+		fcmovCC();
+}
+
+void fcmovnbe(void)
+{
+	if (!(FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF)))
+		fcmovCC();
+}
+
+void fcmovnu(void)
+{
+	if (!(FPU_EFLAGS & X86_EFLAGS_PF))
+		fcmovCC();
+}
+
 void ffree_(void)
 {
 	/* ffree st(i) */
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index 4dae511c85ad..afbc4d805d66 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -71,7 +71,7 @@
 
 #include "fpu_system.h"
 
-#include <asm/sigcontext.h>	/* for struct _fpstate */
+#include <uapi/asm/sigcontext.h>	/* for struct _fpstate */
 #include <asm/math_emu.h>
 #include <linux/linkage.h>
 
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 3d8f2e421466..e945fedf1de2 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -40,49 +40,33 @@
 
 #define __BAD__ FPU_illegal	/* Illegal on an 80486, causes SIGILL */
 
-#ifndef NO_UNDOC_CODE		/* Un-documented FPU op-codes supported by default. */
+/* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */
 
-/* WARNING: These codes are not documented by Intel in their 80486 manual
-   and may not work on FPU clones or later Intel FPUs. */
-
-/* Changes to support the un-doc codes provided by Linus Torvalds. */
-
-#define _d9_d8_ fstp_i		/* unofficial code (19) */
-#define _dc_d0_ fcom_st		/* unofficial code (14) */
-#define _dc_d8_ fcompst		/* unofficial code (1c) */
-#define _dd_c8_ fxch_i		/* unofficial code (0d) */
-#define _de_d0_ fcompst		/* unofficial code (16) */
-#define _df_c0_ ffreep		/* unofficial code (07) ffree + pop */
-#define _df_c8_ fxch_i		/* unofficial code (0f) */
-#define _df_d0_ fstp_i		/* unofficial code (17) */
-#define _df_d8_ fstp_i		/* unofficial code (1f) */
+/* WARNING: "u" entries are not documented by Intel in their 80486 manual
+   and may not work on FPU clones or later Intel FPUs.
+   Changes to support them provided by Linus Torvalds. */
 
 static FUNC const st_instr_table[64] = {
-	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
-	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
-	fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
-	fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
-	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
-	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
-	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
-	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
+/* Opcode:	d8		d9		da		db */
+/*		dc		dd		de		df */
+/* c0..7 */	fadd__,		fld_i_,		fcmovb,		fcmovnb,
+/* c0..7 */	fadd_i,		ffree_,		faddp_,		ffreep,/*u*/
+/* c8..f */	fmul__,		fxch_i,		fcmove,		fcmovne,
+/* c8..f */	fmul_i,		fxch_i,/*u*/	fmulp_,		fxch_i,/*u*/
+/* d0..7 */	fcom_st,	fp_nop,		fcmovbe,	fcmovnbe,
+/* d0..7 */	fcom_st,/*u*/	fst_i_,		fcompst,/*u*/	fstp_i,/*u*/
+/* d8..f */	fcompst,	fstp_i,/*u*/	fcmovu,		fcmovnu,
+/* d8..f */	fcompst,/*u*/	fstp_i,		fcompp,		fstp_i,/*u*/
+/* e0..7 */	fsub__,		FPU_etc,	__BAD__,	finit_,
+/* e0..7 */	fsubri,		fucom_,		fsubrp,		fstsw_,
+/* e8..f */	fsubr_,		fconst,		fucompp,	fucomi_,
+/* e8..f */	fsub_i,		fucomp,		fsubp_,		fucomip,
+/* f0..7 */	fdiv__,		FPU_triga,	__BAD__,	fcomi_,
+/* f0..7 */	fdivri,		__BAD__,	fdivrp,		fcomip,
+/* f8..f */	fdivr_,		FPU_trigb,	__BAD__,	__BAD__,
+/* f8..f */	fdiv_i,		__BAD__,	fdivp_,		__BAD__,
 };
 
-#else /* Support only documented FPU op-codes */
-
-static FUNC const st_instr_table[64] = {
-	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
-	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
-	fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
-	fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
-	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
-	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
-	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
-	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
-};
-
-#endif /* NO_UNDOC_CODE */
-
 #define _NONE_ 0		/* Take no special action */
 #define _REG0_ 1		/* Need to check for not empty st(0) */
 #define _REGI_ 2		/* Need to check for not empty st(0) and st(rm) */
@@ -94,36 +78,18 @@ static FUNC const st_instr_table[64] = {
 #define _REGIc 0		/* Compare st(0) and st(rm) */
 #define _REGIn 0		/* Uses st(0) and st(rm), but handle checks later */
 
-#ifndef NO_UNDOC_CODE
-
-/* Un-documented FPU op-codes supported by default. (see above) */
-
 static u_char const type_table[64] = {
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
-	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
-	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+/* Opcode:	d8	d9	da	db	dc	dd	de	df */
+/* c0..7 */	_REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _REGi_,
+/* c8..f */	_REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _REGI_, _REGIp, _REGI_,
+/* d0..7 */	_REGIc, _NONE_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_,
+/* d8..f */	_REGIc, _REG0_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_,
+/* e0..7 */	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+/* e8..f */	_REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc,
+/* f0..7 */	_REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc,
+/* f8..f */	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
 };
 
-#else /* Support only documented FPU op-codes */
-
-static u_char const type_table[64] = {
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
-	_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
-	_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
-	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
-};
-
-#endif /* NO_UNDOC_CODE */
-
 #ifdef RE_ENTRANT_CHECKING
 u_char emulating = 0;
 #endif /* RE_ENTRANT_CHECKING */
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h
index 9779df436b7d..caff438b9c1d 100644
--- a/arch/x86/math-emu/fpu_proto.h
+++ b/arch/x86/math-emu/fpu_proto.h
@@ -46,6 +46,14 @@ extern void fstsw_(void);
 extern void fp_nop(void);
 extern void fld_i_(void);
 extern void fxch_i(void);
+extern void fcmovb(void);
+extern void fcmove(void);
+extern void fcmovbe(void);
+extern void fcmovu(void);
+extern void fcmovnb(void);
+extern void fcmovne(void);
+extern void fcmovnbe(void);
+extern void fcmovnu(void);
 extern void ffree_(void);
 extern void ffreep(void);
 extern void fst_i_(void);
@@ -108,6 +116,10 @@ extern void fcompp(void);
 extern void fucom_(void);
 extern void fucomp(void);
 extern void fucompp(void);
+extern void fcomi_(void);
+extern void fcomip(void);
+extern void fucomi_(void);
+extern void fucomip(void);
 /* reg_constant.c */
 extern void fconst(void);
 /* reg_ld_str.c */
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c
index 2931ff355218..95228ff042c0 100644
--- a/arch/x86/math-emu/load_store.c
+++ b/arch/x86/math-emu/load_store.c
@@ -33,11 +33,12 @@
 
 #define pop_0()	{ FPU_settag0(TAG_Empty); top++; }
 
+/* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
 static u_char const type_table[32] = {
-	_PUSH_, _PUSH_, _PUSH_, _PUSH_,
-	_null_, _null_, _null_, _null_,
-	_REG0_, _REG0_, _REG0_, _REG0_,
-	_REG0_, _REG0_, _REG0_, _REG0_,
+	_PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32,  db:fild m32,  dd:fld f64,  df:fild m16 */
+	_null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef,    db,dd,df:fisttp m32/64/16 */
+	_REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32,  db:fist m32,  dd:fst f64,  df:fist m16 */
+	_REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */
 	_NONE_, _null_, _NONE_, _PUSH_,
 	_NONE_, _PUSH_, _null_, _PUSH_,
 	_NONE_, _null_, _NONE_, _REG0_,
@@ -45,15 +46,19 @@ static u_char const type_table[32] = {
 };
 
 u_char const data_sizes_16[32] = {
-	4, 4, 8, 2, 0, 0, 0, 0,
-	4, 4, 8, 2, 4, 4, 8, 2,
+	4, 4, 8, 2,
+	0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
+	4, 4, 8, 2,
+	4, 4, 8, 2,
 	14, 0, 94, 10, 2, 10, 0, 8,
 	14, 0, 94, 10, 2, 10, 2, 8
 };
 
 static u_char const data_sizes_32[32] = {
-	4, 4, 8, 2, 0, 0, 0, 0,
-	4, 4, 8, 2, 4, 4, 8, 2,
+	4, 4, 8, 2,
+	0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
+	4, 4, 8, 2,
+	4, 4, 8, 2,
 	28, 0, 108, 10, 2, 10, 0, 8,
 	28, 0, 108, 10, 2, 10, 2, 8
 };
@@ -65,6 +70,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
 	FPU_REG *st0_ptr;
 	u_char st0_tag = TAG_Empty;	/* This is just to stop a gcc warning. */
 	u_char loaded_tag;
+	int sv_cw;
 
 	st0_ptr = NULL;		/* Initialized just to stop compiler warnings. */
 
@@ -111,7 +117,8 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
 	}
 
 	switch (type) {
-	case 000:		/* fld m32real */
+	/* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
+	case 000:		/* fld m32real (d9 /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_single((float __user *)data_address, &loaded_data);
@@ -123,13 +130,13 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
 		}
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 001:		/* fild m32int */
+	case 001:		/* fild m32int (db /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_int32((long __user *)data_address, &loaded_data);
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 002:		/* fld m64real */
+	case 002:		/* fld m64real (dd /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_double((double __user *)data_address,
@@ -142,12 +149,44 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
 		}
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 003:		/* fild m16int */
+	case 003:		/* fild m16int (df /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_int16((short __user *)data_address, &loaded_data);
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
+	/* case 004: undefined (d9 /1) */
+	/* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */
+	case 005:		/* fisttp m32int (db /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int32
+		    (st0_ptr, st0_tag, (long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
+	case 006:		/* fisttp m64int (dd /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int64
+		    (st0_ptr, st0_tag, (long long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
+	case 007:		/* fisttp m16int (df /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int16
+		    (st0_ptr, st0_tag, (short __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
 	case 010:		/* fst m32real */
 		clear_C1();
 		FPU_store_single(st0_ptr, st0_tag,
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c
index ecce55fc2e2e..b77360fdbf4a 100644
--- a/arch/x86/math-emu/reg_compare.c
+++ b/arch/x86/math-emu/reg_compare.c
@@ -249,6 +249,54 @@ static int compare_st_st(int nr)
 	return 0;
 }
 
+static int compare_i_st_st(int nr)
+{
+	int f, c;
+	FPU_REG *st_ptr;
+
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
+	}
+
+	partial_status &= ~SW_C0;
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
+		EXCEPTION(EX_Invalid);
+		return !(control_word & CW_Invalid);
+	}
+
+	switch (c & 7) {
+	case COMP_A_lt_B:
+		f = X86_EFLAGS_CF;
+		break;
+	case COMP_A_eq_B:
+		f = X86_EFLAGS_ZF;
+		break;
+	case COMP_A_gt_B:
+		f = 0;
+		break;
+	case COMP_No_Comp:
+		f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
+		break;
+#ifdef PARANOID
+	default:
+		EXCEPTION(EX_INTERNAL | 0x122);
+		f = 0;
+		break;
+#endif /* PARANOID */
+	}
+	FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
+}
+
 static int compare_u_st_st(int nr)
 {
 	int f = 0, c;
@@ -299,6 +347,58 @@ static int compare_u_st_st(int nr)
 	return 0;
 }
 
+static int compare_ui_st_st(int nr)
+{
+	int f = 0, c;
+	FPU_REG *st_ptr;
+
+	if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) {
+		FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
+		/* Stack fault */
+		EXCEPTION(EX_StackUnder);
+		return !(control_word & CW_Invalid);
+	}
+
+	partial_status &= ~SW_C0;
+	st_ptr = &st(nr);
+	c = compare(st_ptr, FPU_gettagi(nr));
+	if (c & COMP_NaN) {
+		FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF);
+		if (c & COMP_SNaN) {	/* This is the only difference between
+					   un-ordered and ordinary comparisons */
+			EXCEPTION(EX_Invalid);
+			return !(control_word & CW_Invalid);
+		}
+		return 0;
+	}
+
+	switch (c & 7) {
+	case COMP_A_lt_B:
+		f = X86_EFLAGS_CF;
+		break;
+	case COMP_A_eq_B:
+		f = X86_EFLAGS_ZF;
+		break;
+	case COMP_A_gt_B:
+		f = 0;
+		break;
+	case COMP_No_Comp:
+		f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF;
+		break;
+#ifdef PARANOID
+	default:
+		EXCEPTION(EX_INTERNAL | 0x123);
+		f = 0;
+		break;
+#endif /* PARANOID */
+	}
+	FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f;
+	if (c & COMP_Denormal) {
+		return denormal_operand() < 0;
+	}
+	return 0;
+}
+
 /*---------------------------------------------------------------------------*/
 
 void fcom_st(void)
@@ -348,3 +448,31 @@ void fucompp(void)
 	} else
 		FPU_illegal();
 }
+
+/* P6+ compare-to-EFLAGS ops */
+
+void fcomi_(void)
+{
+	/* fcomi st(i) */
+	compare_i_st_st(FPU_rm);
+}
+
+void fcomip(void)
+{
+	/* fcomip st(i) */
+	if (!compare_i_st_st(FPU_rm))
+		FPU_pop();
+}
+
+void fucomi_(void)
+{
+	/* fucomi st(i) */
+	compare_ui_st_st(FPU_rm);
+}
+
+void fucomip(void)
+{
+	/* fucomip st(i) */
+	if (!compare_ui_st_st(FPU_rm))
+		FPU_pop();
+}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index a482d105172b..65c47fda26fc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_SMP)		+= tlb.o
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
+obj-$(CONFIG_X86_PTDUMP_CORE)	+= dump_pagetables.o
 
 obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index f0cedf3395af..1bf417e9cc13 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -32,6 +32,8 @@ struct pg_state {
 	const struct addr_marker *marker;
 	unsigned long lines;
 	bool to_dmesg;
+	bool check_wx;
+	unsigned long wx_pages;
 };
 
 struct addr_marker {
@@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
 			pt_dump_cont_printf(m, dmsg, "    ");
 		if ((level == 4 && pr & _PAGE_PAT) ||
 		    ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
-			pt_dump_cont_printf(m, dmsg, "pat ");
+			pt_dump_cont_printf(m, dmsg, "PAT ");
 		else
 			pt_dump_cont_printf(m, dmsg, "    ");
 		if (pr & _PAGE_GLOBAL)
@@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 */
-	prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
-	cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
+	prot = pgprot_val(new_prot);
+	cur = pgprot_val(st->current_prot);
 
 	if (!st->level) {
 		/* First entry */
@@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 		const char *unit = units;
 		unsigned long delta;
 		int width = sizeof(unsigned long) * 2;
+		pgprotval_t pr = pgprot_val(st->current_prot);
+
+		if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
+			WARN_ONCE(1,
+				  "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
+				  (void *)st->start_address,
+				  (void *)st->start_address);
+			st->wx_pages += (st->current_address -
+					 st->start_address) / PAGE_SIZE;
+		}
 
 		/*
 		 * Now print the actual finished series
@@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 {
 	int i;
 	pte_t *start;
+	pgprotval_t prot;
 
 	start = (pte_t *) pmd_page_vaddr(addr);
 	for (i = 0; i < PTRS_PER_PTE; i++) {
-		pgprot_t prot = pte_pgprot(*start);
-
+		prot = pte_flags(*start);
 		st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
-		note_page(m, st, prot, 4);
+		note_page(m, st, __pgprot(prot), 4);
 		start++;
 	}
 }
@@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
 {
 	int i;
 	pmd_t *start;
+	pgprotval_t prot;
 
 	start = (pmd_t *) pud_page_vaddr(addr);
 	for (i = 0; i < PTRS_PER_PMD; i++) {
 		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
 		if (!pmd_none(*start)) {
-			pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK;
-
-			if (pmd_large(*start) || !pmd_present(*start))
+			if (pmd_large(*start) || !pmd_present(*start)) {
+				prot = pmd_flags(*start);
 				note_page(m, st, __pgprot(prot), 3);
-			else
+			} else {
 				walk_pte_level(m, st, *start,
 					       P + i * PMD_LEVEL_MULT);
+			}
 		} else
 			note_page(m, st, __pgprot(0), 3);
 		start++;
@@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 {
 	int i;
 	pud_t *start;
+	pgprotval_t prot;
 
 	start = (pud_t *) pgd_page_vaddr(addr);
 
 	for (i = 0; i < PTRS_PER_PUD; i++) {
 		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
 		if (!pud_none(*start)) {
-			pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK;
-
-			if (pud_large(*start) || !pud_present(*start))
+			if (pud_large(*start) || !pud_present(*start)) {
+				prot = pud_flags(*start);
 				note_page(m, st, __pgprot(prot), 2);
-			else
+			} else {
 				walk_pmd_level(m, st, *start,
 					       P + i * PUD_LEVEL_MULT);
+			}
 		} else
 			note_page(m, st, __pgprot(0), 2);
 
@@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 #define pgd_none(a)  pud_none(__pud(pgd_val(a)))
 #endif
 
-void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
+				       bool checkwx)
 {
 #ifdef CONFIG_X86_64
 	pgd_t *start = (pgd_t *) &init_level4_pgt;
 #else
 	pgd_t *start = swapper_pg_dir;
 #endif
+	pgprotval_t prot;
 	int i;
 	struct pg_state st = {};
 
@@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 		st.to_dmesg = true;
 	}
 
+	st.check_wx = checkwx;
+	if (checkwx)
+		st.wx_pages = 0;
+
 	for (i = 0; i < PTRS_PER_PGD; i++) {
 		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
 		if (!pgd_none(*start)) {
-			pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK;
-
-			if (pgd_large(*start) || !pgd_present(*start))
+			if (pgd_large(*start) || !pgd_present(*start)) {
+				prot = pgd_flags(*start);
 				note_page(m, &st, __pgprot(prot), 1);
-			else
+			} else {
 				walk_pud_level(m, &st, *start,
 					       i * PGD_LEVEL_MULT);
+			}
 		} else
 			note_page(m, &st, __pgprot(0), 1);
 
@@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 	/* Flush out the last page */
 	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
 	note_page(m, &st, __pgprot(0), 0);
+	if (!checkwx)
+		return;
+	if (st.wx_pages)
+		pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
+			st.wx_pages);
+	else
+		pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
 }
 
+void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
+{
+	ptdump_walk_pgd_level_core(m, pgd, false);
+}
+
+void ptdump_walk_pgd_level_checkwx(void)
+{
+	ptdump_walk_pgd_level_core(NULL, NULL, true);
+}
+
+#ifdef CONFIG_X86_PTDUMP
 static int ptdump_show(struct seq_file *m, void *v)
 {
 	ptdump_walk_pgd_level(m, NULL);
@@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
+#endif
 
 static int pt_dump_init(void)
 {
+#ifdef CONFIG_X86_PTDUMP
 	struct dentry *pe;
+#endif
 
 #ifdef CONFIG_X86_32
 	/* Not a compile-time constant on x86-32 */
@@ -412,10 +453,12 @@ static int pt_dump_init(void)
 	address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
 #endif
 
+#ifdef CONFIG_X86_PTDUMP
 	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
 				 &ptdump_fops);
 	if (!pe)
 		return -ENOMEM;
+#endif
 
 	return 0;
 }
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..ae9a37bf1371 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long mask;
-	pte_t pte = *(pte_t *)&pmd;
 	struct page *head, *page;
 	int refs;
 
 	mask = _PAGE_PRESENT|_PAGE_USER;
 	if (write)
 		mask |= _PAGE_RW;
-	if ((pte_flags(pte) & mask) != mask)
+	if ((pmd_flags(pmd) & mask) != mask)
 		return 0;
 	/* hugepages are never "special" */
-	VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
-	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+	VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
+	VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
 
 	refs = 0;
-	head = pte_page(pte);
+	head = pmd_page(pmd);
 	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	unsigned long mask;
-	pte_t pte = *(pte_t *)&pud;
 	struct page *head, *page;
 	int refs;
 
 	mask = _PAGE_PRESENT|_PAGE_USER;
 	if (write)
 		mask |= _PAGE_RW;
-	if ((pte_flags(pte) & mask) != mask)
+	if ((pud_flags(pud) & mask) != mask)
 		return 0;
 	/* hugepages are never "special" */
-	VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL);
-	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+	VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
+	VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
 
 	refs = 0;
-	head = pte_page(pte);
+	head = pud_page(pud);
 	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d8a83df153a..1f37cb2b56a9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -693,14 +693,12 @@ void free_initmem(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 void __init free_initrd_mem(unsigned long start, unsigned long end)
 {
-#ifdef CONFIG_MICROCODE_EARLY
 	/*
 	 * Remember, initrd memory may contain microcode or other useful things.
 	 * Before we lose initrd mem, we need to find a place to hold them
 	 * now that normal virtual memory is enabled.
 	 */
 	save_microcode_in_initrd();
-#endif
 
 	/*
 	 * end could be not aligned, and We can not align that,
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 7562f42914b4..cb4ef3de61f9 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -957,6 +957,8 @@ void mark_rodata_ro(void)
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
 	mark_nxdata_nx();
+	if (__supported_pte_mask & _PAGE_NX)
+		debug_checkwx();
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30564e2752d3..5ed62eff31bd 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1132,7 +1132,7 @@ void mark_rodata_ro(void)
 	 * has been zapped already via cleanup_highmem().
 	 */
 	all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
-	set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
+	set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
 
 	rodata_test();
 
@@ -1150,6 +1150,8 @@ void mark_rodata_ro(void)
 	free_init_pages("unused kernel",
 			(unsigned long) __va(__pa_symbol(rodata_end)),
 			(unsigned long) __va(__pa_symbol(_sdata)));
+
+	debug_checkwx();
 }
 
 #endif
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 9ce5da27b136..d470cf219a2d 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -126,5 +126,5 @@ void __init kasan_init(void)
 	__flush_tlb_all();
 	init_task.kasan_depth = 0;
 
-	pr_info("Kernel address sanitizer initialized\n");
+	pr_info("KernelAddressSanitizer initialized\n");
 }
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 134948b0926f..b0ae85f90f10 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -237,7 +237,8 @@ bad_opcode:
  */
 siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 {
-	const struct bndreg *bndregs, *bndreg;
+	const struct mpx_bndreg_state *bndregs;
+	const struct mpx_bndreg *bndreg;
 	siginfo_t *info = NULL;
 	struct insn insn;
 	uint8_t bndregno;
@@ -258,13 +259,13 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
 		goto err_out;
 	}
 	/* get bndregs field from current task's xsave area */
-	bndregs = get_xsave_field_ptr(XSTATE_BNDREGS);
+	bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
 	if (!bndregs) {
 		err = -EINVAL;
 		goto err_out;
 	}
 	/* now go select the individual register in the set of 4 */
-	bndreg = &bndregs[bndregno];
+	bndreg = &bndregs->bndreg[bndregno];
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
@@ -306,7 +307,7 @@ err_out:
 
 static __user void *mpx_get_bounds_dir(void)
 {
-	const struct bndcsr *bndcsr;
+	const struct mpx_bndcsr *bndcsr;
 
 	if (!cpu_feature_enabled(X86_FEATURE_MPX))
 		return MPX_INVALID_BOUNDS_DIR;
@@ -315,7 +316,7 @@ static __user void *mpx_get_bounds_dir(void)
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
 	 */
-	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
 	if (!bndcsr)
 		return MPX_INVALID_BOUNDS_DIR;
 
@@ -489,10 +490,10 @@ out_unmap:
 static int do_mpx_bt_fault(void)
 {
 	unsigned long bd_entry, bd_base;
-	const struct bndcsr *bndcsr;
+	const struct mpx_bndcsr *bndcsr;
 	struct mm_struct *mm = current->mm;
 
-	bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
 	if (!bndcsr)
 		return -EINVAL;
 	/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2c44c0792301..a3137a4feed1 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address)
 phys_addr_t slow_virt_to_phys(void *__virt_addr)
 {
 	unsigned long virt_addr = (unsigned long)__virt_addr;
-	phys_addr_t phys_addr;
-	unsigned long offset;
+	unsigned long phys_addr, offset;
 	enum pg_level level;
-	unsigned long pmask;
 	pte_t *pte;
 
 	pte = lookup_address(virt_addr, &level);
 	BUG_ON(!pte);
-	pmask = page_level_mask(level);
-	offset = virt_addr & ~pmask;
-	phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
-	return (phys_addr | offset);
+
+	switch (level) {
+	case PG_LEVEL_1G:
+		phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PUD_PAGE_MASK;
+		break;
+	case PG_LEVEL_2M:
+		phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PMD_PAGE_MASK;
+		break;
+	default:
+		phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+		offset = virt_addr & ~PAGE_MASK;
+	}
+
+	return (phys_addr_t)(phys_addr | offset);
 }
 EXPORT_SYMBOL_GPL(slow_virt_to_phys);
 
@@ -458,7 +468,7 @@ static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
 			struct cpa_data *cpa)
 {
-	unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
+	unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn;
 	pte_t new_pte, old_pte, *tmp;
 	pgprot_t old_prot, new_prot, req_prot;
 	int i, do_split = 1;
@@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 
 	switch (level) {
 	case PG_LEVEL_2M:
-#ifdef CONFIG_X86_64
+		old_prot = pmd_pgprot(*(pmd_t *)kpte);
+		old_pfn = pmd_pfn(*(pmd_t *)kpte);
+		break;
 	case PG_LEVEL_1G:
-#endif
-		psize = page_level_size(level);
-		pmask = page_level_mask(level);
+		old_prot = pud_pgprot(*(pud_t *)kpte);
+		old_pfn = pud_pfn(*(pud_t *)kpte);
 		break;
 	default:
 		do_split = -EINVAL;
 		goto out_unlock;
 	}
 
+	psize = page_level_size(level);
+	pmask = page_level_mask(level);
+
 	/*
 	 * Calculate the number of pages, which fit into this large
 	 * page starting at address:
@@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * up accordingly.
 	 */
 	old_pte = *kpte;
-	old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte));
+	req_prot = pgprot_large_2_4k(old_prot);
 
 	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
 	pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	req_prot = canon_pgprot(req_prot);
 
 	/*
-	 * old_pte points to the large page base address. So we need
+	 * old_pfn points to the large page base pfn. So we need
 	 * to add the offset of the virtual address:
 	 */
-	pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
+	pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT);
 	cpa->pfn = pfn;
 
 	new_prot = static_protections(req_prot, address, pfn);
@@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * the pages in the range we try to preserve:
 	 */
 	addr = address & pmask;
-	pfn = pte_pfn(old_pte);
+	pfn = old_pfn;
 	for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
 		pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
@@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 		 * The address is aligned and the number of pages
 		 * covers the full page.
 		 */
-		new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
+		new_pte = pfn_pte(old_pfn, new_prot);
 		__set_pmd_pte(kpte, address, new_pte);
 		cpa->flags |= CPA_FLUSHTLB;
 		do_split = 0;
@@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 		   struct page *base)
 {
 	pte_t *pbase = (pte_t *)page_address(base);
-	unsigned long pfn, pfninc = 1;
+	unsigned long ref_pfn, pfn, pfninc = 1;
 	unsigned int i, level;
 	pte_t *tmp;
 	pgprot_t ref_prot;
@@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 	}
 
 	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
-	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
-	/* promote PAT bit to correct position */
-	if (level == PG_LEVEL_2M)
+	switch (level) {
+	case PG_LEVEL_2M:
+		ref_prot = pmd_pgprot(*(pmd_t *)kpte);
+		/* clear PSE and promote PAT bit to correct position */
 		ref_prot = pgprot_large_2_4k(ref_prot);
+		ref_pfn = pmd_pfn(*(pmd_t *)kpte);
+		break;
 
-#ifdef CONFIG_X86_64
-	if (level == PG_LEVEL_1G) {
+	case PG_LEVEL_1G:
+		ref_prot = pud_pgprot(*(pud_t *)kpte);
+		ref_pfn = pud_pfn(*(pud_t *)kpte);
 		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+
 		/*
-		 * Set the PSE flags only if the PRESENT flag is set
+		 * Clear the PSE flags if the PRESENT flag is not set
 		 * otherwise pmd_present/pmd_huge will return true
 		 * even on a non present pmd.
 		 */
-		if (pgprot_val(ref_prot) & _PAGE_PRESENT)
-			pgprot_val(ref_prot) |= _PAGE_PSE;
-		else
+		if (!(pgprot_val(ref_prot) & _PAGE_PRESENT))
 			pgprot_val(ref_prot) &= ~_PAGE_PSE;
+		break;
+
+	default:
+		spin_unlock(&pgd_lock);
+		return 1;
 	}
-#endif
 
 	/*
 	 * Set the GLOBAL flags only if the PRESENT flag is set
@@ -643,13 +664,16 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 	/*
 	 * Get the target pfn from the original entry:
 	 */
-	pfn = pte_pfn(*kpte);
+	pfn = ref_pfn;
 	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
 		set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
 
-	if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
-				PFN_DOWN(__pa(address)) + 1))
-		split_page_count(level);
+	if (virt_addr_valid(address)) {
+		unsigned long pfn = PFN_DOWN(__pa(address));
+
+		if (pfn_range_is_mapped(pfn, pfn + 1))
+			split_page_count(level);
+	}
 
 	/*
 	 * Install the new, split up pagetable.
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 70efcd0940f9..75991979f667 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1109,7 +1109,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
-		prog->jited = true;
+		prog->jited = 1;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ff9911707160..3cd69832d7f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,16 +4,15 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
+#include <linux/pci-acpi.h>
 #include <asm/numa.h>
 #include <asm/pci_x86.h>
 
 struct pci_root_info {
-	struct acpi_device *bridge;
-	char name[16];
+	struct acpi_pci_root_info common;
 	struct pci_sysdata sd;
 #ifdef	CONFIG_PCI_MMCONFIG
 	bool mcfg_added;
-	u16 segment;
 	u8 start_bus;
 	u8 end_bus;
 #endif
@@ -178,15 +177,18 @@ static int check_segment(u16 seg, struct device *dev, char *estr)
 	return 0;
 }
 
-static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
-			  u8 end, phys_addr_t addr)
+static int setup_mcfg_map(struct acpi_pci_root_info *ci)
 {
-	int result;
-	struct device *dev = &info->bridge->dev;
+	int result, seg;
+	struct pci_root_info *info;
+	struct acpi_pci_root *root = ci->root;
+	struct device *dev = &ci->bridge->dev;
 
-	info->start_bus = start;
-	info->end_bus = end;
+	info = container_of(ci, struct pci_root_info, common);
+	info->start_bus = (u8)root->secondary.start;
+	info->end_bus = (u8)root->secondary.end;
 	info->mcfg_added = false;
+	seg = info->sd.domain;
 
 	/* return success if MMCFG is not in use */
 	if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg)
@@ -195,7 +197,8 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
 	if (!(pci_probe & PCI_PROBE_MMCONF))
 		return check_segment(seg, dev, "MMCONFIG is disabled,");
 
-	result = pci_mmconfig_insert(dev, seg, start, end, addr);
+	result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus,
+				     root->mcfg_addr);
 	if (result == 0) {
 		/* enable MMCFG if it hasn't been enabled yet */
 		if (raw_pci_ext_ops == NULL)
@@ -208,134 +211,55 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
 	return 0;
 }
 
-static void teardown_mcfg_map(struct pci_root_info *info)
+static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
 {
+	struct pci_root_info *info;
+
+	info = container_of(ci, struct pci_root_info, common);
 	if (info->mcfg_added) {
-		pci_mmconfig_delete(info->segment, info->start_bus,
-				    info->end_bus);
+		pci_mmconfig_delete(info->sd.domain,
+				    info->start_bus, info->end_bus);
 		info->mcfg_added = false;
 	}
 }
 #else
-static int setup_mcfg_map(struct pci_root_info *info,
-				    u16 seg, u8 start, u8 end,
-				    phys_addr_t addr)
+static int setup_mcfg_map(struct acpi_pci_root_info *ci)
 {
 	return 0;
 }
-static void teardown_mcfg_map(struct pci_root_info *info)
+
+static void teardown_mcfg_map(struct acpi_pci_root_info *ci)
 {
 }
 #endif
 
-static void validate_resources(struct device *dev, struct list_head *crs_res,
-			       unsigned long type)
+static int pci_acpi_root_get_node(struct acpi_pci_root *root)
 {
-	LIST_HEAD(list);
-	struct resource *res1, *res2, *root = NULL;
-	struct resource_entry *tmp, *entry, *entry2;
-
-	BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0);
-	root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource;
-
-	list_splice_init(crs_res, &list);
-	resource_list_for_each_entry_safe(entry, tmp, &list) {
-		bool free = false;
-		resource_size_t end;
-
-		res1 = entry->res;
-		if (!(res1->flags & type))
-			goto next;
-
-		/* Exclude non-addressable range or non-addressable portion */
-		end = min(res1->end, root->end);
-		if (end <= res1->start) {
-			dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n",
-				 res1);
-			free = true;
-			goto next;
-		} else if (res1->end != end) {
-			dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n",
-				 res1, (unsigned long long)end + 1,
-				 (unsigned long long)res1->end);
-			res1->end = end;
-		}
-
-		resource_list_for_each_entry(entry2, crs_res) {
-			res2 = entry2->res;
-			if (!(res2->flags & type))
-				continue;
-
-			/*
-			 * I don't like throwing away windows because then
-			 * our resources no longer match the ACPI _CRS, but
-			 * the kernel resource tree doesn't allow overlaps.
-			 */
-			if (resource_overlaps(res1, res2)) {
-				res2->start = min(res1->start, res2->start);
-				res2->end = max(res1->end, res2->end);
-				dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n",
-					 res2, res1);
-				free = true;
-				goto next;
-			}
-		}
+	int busnum = root->secondary.start;
+	struct acpi_device *device = root->device;
+	int node = acpi_get_node(device->handle);
 
-next:
-		resource_list_del(entry);
-		if (free)
-			resource_list_free_entry(entry);
-		else
-			resource_list_add_tail(entry, crs_res);
+	if (node == NUMA_NO_NODE) {
+		node = x86_pci_root_bus_node(busnum);
+		if (node != 0 && node != NUMA_NO_NODE)
+			dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
+				node);
 	}
+	if (node != NUMA_NO_NODE && !node_online(node))
+		node = NUMA_NO_NODE;
+
+	return node;
 }
 
-static void add_resources(struct pci_root_info *info,
-			  struct list_head *resources,
-			  struct list_head *crs_res)
+static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci)
 {
-	struct resource_entry *entry, *tmp;
-	struct resource *res, *conflict, *root = NULL;
-
-	validate_resources(&info->bridge->dev, crs_res, IORESOURCE_MEM);
-	validate_resources(&info->bridge->dev, crs_res, IORESOURCE_IO);
-
-	resource_list_for_each_entry_safe(entry, tmp, crs_res) {
-		res = entry->res;
-		if (res->flags & IORESOURCE_MEM)
-			root = &iomem_resource;
-		else if (res->flags & IORESOURCE_IO)
-			root = &ioport_resource;
-		else
-			BUG_ON(res);
-
-		conflict = insert_resource_conflict(root, res);
-		if (conflict) {
-			dev_info(&info->bridge->dev,
-				 "ignoring host bridge window %pR (conflicts with %s %pR)\n",
-				 res, conflict->name, conflict);
-			resource_list_destroy_entry(entry);
-		}
-	}
-
-	list_splice_tail(crs_res, resources);
+	return setup_mcfg_map(ci);
 }
 
-static void release_pci_root_info(struct pci_host_bridge *bridge)
+static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci)
 {
-	struct resource *res;
-	struct resource_entry *entry;
-	struct pci_root_info *info = bridge->release_data;
-
-	resource_list_for_each_entry(entry, &bridge->windows) {
-		res = entry->res;
-		if (res->parent &&
-		    (res->flags & (IORESOURCE_MEM | IORESOURCE_IO)))
-			release_resource(res);
-	}
-
-	teardown_mcfg_map(info);
-	kfree(info);
+	teardown_mcfg_map(ci);
+	kfree(container_of(ci, struct pci_root_info, common));
 }
 
 /*
@@ -358,50 +282,47 @@ static bool resource_is_pcicfg_ioport(struct resource *res)
 		res->start == 0xCF8 && res->end == 0xCFF;
 }
 
-static void probe_pci_root_info(struct pci_root_info *info,
-				struct acpi_device *device,
-				int busnum, int domain,
-				struct list_head *list)
+static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
 {
-	int ret;
+	struct acpi_device *device = ci->bridge;
+	int busnum = ci->root->secondary.start;
 	struct resource_entry *entry, *tmp;
+	int status;
 
-	sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
-	info->bridge = device;
-	ret = acpi_dev_get_resources(device, list,
-				     acpi_dev_filter_resource_type_cb,
-				     (void *)(IORESOURCE_IO | IORESOURCE_MEM));
-	if (ret < 0)
-		dev_warn(&device->dev,
-			 "failed to parse _CRS method, error code %d\n", ret);
-	else if (ret == 0)
-		dev_dbg(&device->dev,
-			"no IO and memory resources present in _CRS\n");
-	else
-		resource_list_for_each_entry_safe(entry, tmp, list) {
-			if ((entry->res->flags & IORESOURCE_DISABLED) ||
-			    resource_is_pcicfg_ioport(entry->res))
+	status = acpi_pci_probe_root_resources(ci);
+	if (pci_use_crs) {
+		resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
+			if (resource_is_pcicfg_ioport(entry->res))
 				resource_list_destroy_entry(entry);
-			else
-				entry->res->name = info->name;
-		}
+		return status;
+	}
+
+	resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
+		dev_printk(KERN_DEBUG, &device->dev,
+			   "host bridge window %pR (ignored)\n", entry->res);
+		resource_list_destroy_entry(entry);
+	}
+	x86_pci_root_bus_resources(busnum, &ci->resources);
+
+	return 0;
 }
 
+static struct acpi_pci_root_ops acpi_pci_root_ops = {
+	.pci_ops = &pci_root_ops,
+	.init_info = pci_acpi_root_init_info,
+	.release_info = pci_acpi_root_release_info,
+	.prepare_resources = pci_acpi_root_prepare_resources,
+};
+
 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 {
-	struct acpi_device *device = root->device;
-	struct pci_root_info *info;
 	int domain = root->segment;
 	int busnum = root->secondary.start;
-	struct resource_entry *res_entry;
-	LIST_HEAD(crs_res);
-	LIST_HEAD(resources);
+	int node = pci_acpi_root_get_node(root);
 	struct pci_bus *bus;
-	struct pci_sysdata *sd;
-	int node;
 
 	if (pci_ignore_seg)
-		domain = 0;
+		root->segment = domain = 0;
 
 	if (domain && !pci_domains_supported) {
 		printk(KERN_WARNING "pci_bus %04x:%02x: "
@@ -410,71 +331,33 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 		return NULL;
 	}
 
-	node = acpi_get_node(device->handle);
-	if (node == NUMA_NO_NODE) {
-		node = x86_pci_root_bus_node(busnum);
-		if (node != 0 && node != NUMA_NO_NODE)
-			dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n",
-				node);
-	}
-
-	if (node != NUMA_NO_NODE && !node_online(node))
-		node = NUMA_NO_NODE;
-
-	info = kzalloc_node(sizeof(*info), GFP_KERNEL, node);
-	if (!info) {
-		printk(KERN_WARNING "pci_bus %04x:%02x: "
-		       "ignored (out of memory)\n", domain, busnum);
-		return NULL;
-	}
-
-	sd = &info->sd;
-	sd->domain = domain;
-	sd->node = node;
-	sd->companion = device;
-
 	bus = pci_find_bus(domain, busnum);
 	if (bus) {
 		/*
 		 * If the desired bus has been scanned already, replace
 		 * its bus->sysdata.
 		 */
-		memcpy(bus->sysdata, sd, sizeof(*sd));
-		kfree(info);
-	} else {
-		/* insert busn res at first */
-		pci_add_resource(&resources,  &root->secondary);
+		struct pci_sysdata sd = {
+			.domain = domain,
+			.node = node,
+			.companion = root->device
+		};
 
-		/*
-		 * _CRS with no apertures is normal, so only fall back to
-		 * defaults or native bridge info if we're ignoring _CRS.
-		 */
-		probe_pci_root_info(info, device, busnum, domain, &crs_res);
-		if (pci_use_crs) {
-			add_resources(info, &resources, &crs_res);
-		} else {
-			resource_list_for_each_entry(res_entry, &crs_res)
-				dev_printk(KERN_DEBUG, &device->dev,
-					   "host bridge window %pR (ignored)\n",
-					   res_entry->res);
-			resource_list_free(&crs_res);
-			x86_pci_root_bus_resources(busnum, &resources);
-		}
-
-		if (!setup_mcfg_map(info, domain, (u8)root->secondary.start,
-				    (u8)root->secondary.end, root->mcfg_addr))
-			bus = pci_create_root_bus(NULL, busnum, &pci_root_ops,
-						  sd, &resources);
-
-		if (bus) {
-			pci_scan_child_bus(bus);
-			pci_set_host_bridge_release(
-				to_pci_host_bridge(bus->bridge),
-				release_pci_root_info, info);
-		} else {
-			resource_list_free(&resources);
-			teardown_mcfg_map(info);
-			kfree(info);
+		memcpy(bus->sysdata, &sd, sizeof(sd));
+	} else {
+		struct pci_root_info *info;
+
+		info = kzalloc_node(sizeof(*info), GFP_KERNEL, node);
+		if (!info)
+			dev_err(&root->device->dev,
+				"pci_bus %04x:%02x: ignored (out of memory)\n",
+				domain, busnum);
+		else {
+			info->sd.domain = domain;
+			info->sd.node = node;
+			info->sd.companion = root->device;
+			bus = acpi_pci_root_create(root, &acpi_pci_root_ops,
+						   &info->common, &info->sd);
 		}
 	}
 
@@ -487,9 +370,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 			pcie_bus_configure_settings(child);
 	}
 
-	if (bus && node != NUMA_NO_NODE)
-		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
-
 	return bus;
 }
 
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index d7f997f7c26d..ea48449b2e63 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -50,11 +50,16 @@ void __init efi_bgrt_init(void)
 		       bgrt_tab->version);
 		return;
 	}
-	if (bgrt_tab->status != 1) {
-		pr_err("Ignoring BGRT: invalid status %u (expected 1)\n",
+	if (bgrt_tab->status & 0xfe) {
+		pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n",
 		       bgrt_tab->status);
 		return;
 	}
+	if (bgrt_tab->status != 1) {
+		pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n",
+			 bgrt_tab->status);
+		return;
+	}
 	if (bgrt_tab->image_type != 0) {
 		pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
 		       bgrt_tab->image_type);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1db84c0758b7..ad285404ea7f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -194,7 +194,7 @@ static void __init do_add_efi_memmap(void)
 int __init efi_memblock_x86_reserve_range(void)
 {
 	struct efi_info *e = &boot_params.efi_info;
-	unsigned long pmap;
+	phys_addr_t pmap;
 
 	if (efi_enabled(EFI_PARAVIRT))
 		return 0;
@@ -209,7 +209,7 @@ int __init efi_memblock_x86_reserve_range(void)
 #else
 	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
 #endif
-	memmap.phys_map		= (void *)pmap;
+	memmap.phys_map		= pmap;
 	memmap.nr_map		= e->efi_memmap_size /
 				  e->efi_memdesc_size;
 	memmap.desc_size	= e->efi_memdesc_size;
@@ -222,7 +222,7 @@ int __init efi_memblock_x86_reserve_range(void)
 	return 0;
 }
 
-static void __init print_efi_memmap(void)
+void __init efi_print_memmap(void)
 {
 #ifdef EFI_DEBUG
 	efi_memory_desc_t *md;
@@ -524,7 +524,7 @@ void __init efi_init(void)
 		return;
 
 	if (efi_enabled(EFI_DBG))
-		print_efi_memmap();
+		efi_print_memmap();
 
 	efi_esrt_init();
 }
@@ -705,6 +705,70 @@ out:
 }
 
 /*
+ * Iterate the EFI memory map in reverse order because the regions
+ * will be mapped top-down. The end result is the same as if we had
+ * mapped things forward, but doesn't require us to change the
+ * existing implementation of efi_map_region().
+ */
+static inline void *efi_map_next_entry_reverse(void *entry)
+{
+	/* Initial call */
+	if (!entry)
+		return memmap.map_end - memmap.desc_size;
+
+	entry -= memmap.desc_size;
+	if (entry < memmap.map)
+		return NULL;
+
+	return entry;
+}
+
+/*
+ * efi_map_next_entry - Return the next EFI memory map descriptor
+ * @entry: Previous EFI memory map descriptor
+ *
+ * This is a helper function to iterate over the EFI memory map, which
+ * we do in different orders depending on the current configuration.
+ *
+ * To begin traversing the memory map @entry must be %NULL.
+ *
+ * Returns %NULL when we reach the end of the memory map.
+ */
+static void *efi_map_next_entry(void *entry)
+{
+	if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
+		/*
+		 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
+		 * config table feature requires us to map all entries
+		 * in the same order as they appear in the EFI memory
+		 * map. That is to say, entry N must have a lower
+		 * virtual address than entry N+1. This is because the
+		 * firmware toolchain leaves relative references in
+		 * the code/data sections, which are split and become
+		 * separate EFI memory regions. Mapping things
+		 * out-of-order leads to the firmware accessing
+		 * unmapped addresses.
+		 *
+		 * Since we need to map things this way whether or not
+		 * the kernel actually makes use of
+		 * EFI_PROPERTIES_TABLE, let's just switch to this
+		 * scheme by default for 64-bit.
+		 */
+		return efi_map_next_entry_reverse(entry);
+	}
+
+	/* Initial call */
+	if (!entry)
+		return memmap.map;
+
+	entry += memmap.desc_size;
+	if (entry >= memmap.map_end)
+		return NULL;
+
+	return entry;
+}
+
+/*
  * Map the efi memory ranges of the runtime services and update new_mmap with
  * virtual addresses.
  */
@@ -714,7 +778,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
 	unsigned long left = 0;
 	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+	p = NULL;
+	while ((p = efi_map_next_entry(p))) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
 #ifdef CONFIG_X86_64
@@ -952,24 +1017,6 @@ u32 efi_mem_type(unsigned long phys_addr)
 	return 0;
 }
 
-u64 efi_mem_attributes(unsigned long phys_addr)
-{
-	efi_memory_desc_t *md;
-	void *p;
-
-	if (!efi_enabled(EFI_MEMMAP))
-		return 0;
-
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-		if ((md->phys_addr <= phys_addr) &&
-		    (phys_addr < (md->phys_addr +
-				  (md->num_pages << EFI_PAGE_SHIFT))))
-			return md->attribute;
-	}
-	return 0;
-}
-
 static int __init arch_parse_efi_cmdline(char *str)
 {
 	if (!str) {
@@ -979,8 +1026,6 @@ static int __init arch_parse_efi_cmdline(char *str)
 
 	if (parse_option_str(str, "old_map"))
 		set_bit(EFI_OLD_MEMMAP, &efi.flags);
-	if (parse_option_str(str, "debug"))
-		set_bit(EFI_DBG, &efi.flags);
 
 	return 0;
 }
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 01d54ea766c1..1bbc21e2e4ae 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -61,7 +61,7 @@
 enum intel_mid_timer_options intel_mid_timer_options;
 
 /* intel_mid_ops to store sub arch ops */
-struct intel_mid_ops *intel_mid_ops;
+static struct intel_mid_ops *intel_mid_ops;
 /* getter function for sub arch ops*/
 static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
 enum intel_mid_cpu_type __intel_mid_cpu_chip;
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index ce992e8cc065..5ee360a951ce 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -197,10 +197,9 @@ static int __init sfi_parse_gpio(struct sfi_table_header *table)
 	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
 	pentry = (struct sfi_gpio_table_entry *)sb->pentry;
 
-	gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+	gpio_table = kmemdup(pentry, num * sizeof(*pentry), GFP_KERNEL);
 	if (!gpio_table)
 		return -1;
-	memcpy(gpio_table, pentry, num * sizeof(*pentry));
 	gpio_num_entry = num;
 
 	pr_debug("GPIO pin info:\n");
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 5c9f63fa6abf..327f21c3bde1 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -376,38 +376,42 @@ static void uv_nmi_wait(int master)
 		atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
 }
 
+/* Dump Instruction Pointer header */
 static void uv_nmi_dump_cpu_ip_hdr(void)
 {
-	printk(KERN_DEFAULT
-		"\nUV: %4s %6s %-32s %s   (Note: PID 0 not listed)\n",
+	pr_info("\nUV: %4s %6s %-32s %s   (Note: PID 0 not listed)\n",
 		"CPU", "PID", "COMMAND", "IP");
 }
 
+/* Dump Instruction Pointer info */
 static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
 {
-	printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ",
-		cpu, current->pid, current->comm);
-
+	pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
 	printk_address(regs->ip);
 }
 
-/* Dump this cpu's state */
+/*
+ * Dump this CPU's state.  If action was set to "kdump" and the crash_kexec
+ * failed, then we provide "dump" as an alternate action.  Action "dump" now
+ * also includes the show "ips" (instruction pointers) action whereas the
+ * action "ips" only displays instruction pointers for the non-idle CPU's.
+ * This is an abbreviated form of the "ps" command.
+ */
 static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
 {
 	const char *dots = " ................................. ";
 
-	if (uv_nmi_action_is("ips")) {
-		if (cpu == 0)
-			uv_nmi_dump_cpu_ip_hdr();
+	if (cpu == 0)
+		uv_nmi_dump_cpu_ip_hdr();
 
-		if (current->pid != 0)
-			uv_nmi_dump_cpu_ip(cpu, regs);
+	if (current->pid != 0 || !uv_nmi_action_is("ips"))
+		uv_nmi_dump_cpu_ip(cpu, regs);
 
-	} else if (uv_nmi_action_is("dump")) {
-		printk(KERN_DEFAULT
-			"UV:%sNMI process trace for CPU %d\n", dots, cpu);
+	if (uv_nmi_action_is("dump")) {
+		pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu);
 		show_regs(regs);
 	}
+
 	this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
 }
 
@@ -469,8 +473,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
 				uv_nmi_trigger_dump(tcpu);
 		}
 		if (ignored)
-			printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n",
-				ignored);
+			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
 
 		console_loglevel = saved_console_loglevel;
 		pr_alert("UV: process trace complete\n");
@@ -492,8 +495,9 @@ static void uv_nmi_touch_watchdogs(void)
 	touch_nmi_watchdog();
 }
 
-#if defined(CONFIG_KEXEC_CORE)
 static atomic_t uv_nmi_kexec_failed;
+
+#if defined(CONFIG_KEXEC_CORE)
 static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
 {
 	/* Call crash to dump system state */
@@ -502,10 +506,9 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
 		crash_kexec(regs);
 
 		pr_emerg("UV: crash_kexec unexpectedly returned, ");
+		atomic_set(&uv_nmi_kexec_failed, 1);
 		if (!kexec_crash_image) {
 			pr_cont("crash kernel not loaded\n");
-			atomic_set(&uv_nmi_kexec_failed, 1);
-			uv_nmi_sync_exit(1);
 			return;
 		}
 		pr_cont("kexec busy, stalling cpus while waiting\n");
@@ -514,9 +517,6 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
 	/* If crash exec fails the slaves should return, otherwise stall */
 	while (atomic_read(&uv_nmi_kexec_failed) == 0)
 		mdelay(10);
-
-	/* Crash kernel most likely not loaded, return in an orderly fashion */
-	uv_nmi_sync_exit(0);
 }
 
 #else /* !CONFIG_KEXEC_CORE */
@@ -524,6 +524,7 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
 {
 	if (master)
 		pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+	atomic_set(&uv_nmi_kexec_failed, 1);
 }
 #endif /* !CONFIG_KEXEC_CORE */
 
@@ -613,9 +614,14 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 	master = (atomic_read(&uv_nmi_cpu) == cpu);
 
 	/* If NMI action is "kdump", then attempt to do it */
-	if (uv_nmi_action_is("kdump"))
+	if (uv_nmi_action_is("kdump")) {
 		uv_nmi_kdump(cpu, master, regs);
 
+		/* Unexpected return, revert action to "dump" */
+		if (master)
+			strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
+	}
+
 	/* Pause as all cpus enter the NMI handler */
 	uv_nmi_wait(master);
 
@@ -640,6 +646,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 		atomic_set(&uv_nmi_cpus_in_nmi, -1);
 		atomic_set(&uv_nmi_cpu, -1);
 		atomic_set(&uv_in_nmi, 0);
+		atomic_set(&uv_nmi_kexec_failed, 0);
 	}
 
 	uv_nmi_touch_watchdogs();
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index 10fea5fc821e..df280da34825 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,11 +1,9 @@
 config AMD_MCE_INJ
 	tristate "Simple MCE injection interface for AMD processors"
-	depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
+	depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
 	default n
 	help
 	  This is a simple debugfs interface to inject MCEs and test different
 	  aspects of the MCE handling code.
 
 	  WARNING: Do not even assume this interface is staying stable!
-
-
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 17e35b5bf779..55d38cfa46c2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -17,7 +17,11 @@
 #include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/pci.h>
+
 #include <asm/mce.h>
+#include <asm/amd_nb.h>
+#include <asm/irq_vectors.h>
 
 #include "../kernel/cpu/mcheck/mce-internal.h"
 
@@ -30,16 +34,21 @@ static struct dentry *dfs_inj;
 static u8 n_banks;
 
 #define MAX_FLAG_OPT_SIZE	3
+#define NBCFG			0x44
 
 enum injection_type {
 	SW_INJ = 0,	/* SW injection, simply decode the error */
 	HW_INJ,		/* Trigger a #MC */
+	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
+	THR_INT_INJ,    /* Trigger threshold interrupt */
 	N_INJ_TYPES,
 };
 
 static const char * const flags_options[] = {
 	[SW_INJ] = "sw",
 	[HW_INJ] = "hw",
+	[DFR_INT_INJ] = "df",
+	[THR_INT_INJ] = "th",
 	NULL
 };
 
@@ -129,12 +138,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
 {
 	char buf[MAX_FLAG_OPT_SIZE], *__buf;
 	int err;
-	size_t ret;
 
 	if (cnt > MAX_FLAG_OPT_SIZE)
-		cnt = MAX_FLAG_OPT_SIZE;
-
-	ret = cnt;
+		return -EINVAL;
 
 	if (copy_from_user(&buf, ubuf, cnt))
 		return -EFAULT;
@@ -150,9 +156,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
 		return err;
 	}
 
-	*ppos += ret;
+	*ppos += cnt;
 
-	return ret;
+	return cnt;
 }
 
 static const struct file_operations flags_fops = {
@@ -185,6 +191,55 @@ static void trigger_mce(void *info)
 	asm volatile("int $18");
 }
 
+static void trigger_dfr_int(void *info)
+{
+	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
+}
+
+static void trigger_thr_int(void *info)
+{
+	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
+}
+
+static u32 get_nbc_for_node(int node_id)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 cores_per_node;
+
+	cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket();
+
+	return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+	u32 val;
+	int err;
+
+	if (!F3)
+		return;
+
+	err = pci_read_config_dword(F3, NBCFG, &val);
+	if (err) {
+		pr_err("%s: Error reading F%dx%03x.\n",
+		       __func__, PCI_FUNC(F3->devfn), NBCFG);
+		return;
+	}
+
+	if (val & BIT(27))
+		return;
+
+	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
+	       __func__);
+
+	val |= BIT(27);
+	err = pci_write_config_dword(F3, NBCFG, val);
+	if (err)
+		pr_err("%s: Error writing F%dx%03x.\n",
+		       __func__, PCI_FUNC(F3->devfn), NBCFG);
+}
+
 static void do_inject(void)
 {
 	u64 mcg_status = 0;
@@ -205,6 +260,26 @@ static void do_inject(void)
 	if (!(i_mce.status & MCI_STATUS_PCC))
 		mcg_status |= MCG_STATUS_RIPV;
 
+	/*
+	 * Ensure necessary status bits for deferred errors:
+	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
+	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
+	 */
+	if (inj_type == DFR_INT_INJ) {
+		i_mce.status |= MCI_STATUS_DEFERRED;
+		i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
+	}
+
+	/*
+	 * For multi node CPUs, logging and reporting of bank 4 errors happens
+	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
+	 * Fam10h and later BKDGs.
+	 */
+	if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
+	}
+
 	get_online_cpus();
 	if (!cpu_online(cpu))
 		goto err;
@@ -225,7 +300,16 @@ static void do_inject(void)
 
 	toggle_hw_mce_inject(cpu, false);
 
-	smp_call_function_single(cpu, trigger_mce, NULL, 0);
+	switch (inj_type) {
+	case DFR_INT_INJ:
+		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
+		break;
+	case THR_INT_INJ:
+		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
+		break;
+	default:
+		smp_call_function_single(cpu, trigger_mce, NULL, 0);
+	}
 
 err:
 	put_online_cpus();
@@ -290,6 +374,11 @@ static const char readme_msg[] =
 "\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
 "\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
 "\t    before injecting.\n"
+"\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
+"\t    error APIC interrupt handler to handle the error if the feature is \n"
+"\t    is present in hardware. \n"
+"\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
+"\t    APIC interrupt handler to handle the error. \n"
 "\n";
 
 static ssize_t
diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h
index 9fe77b7b5a0e..81d6562ce01d 100644
--- a/arch/x86/um/asm/syscall.h
+++ b/arch/x86/um/asm/syscall.h
@@ -3,6 +3,10 @@
 
 #include <uapi/linux/audit.h>
 
+typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
+					  unsigned long, unsigned long,
+					  unsigned long, unsigned long);
+
 static inline int syscall_get_arch(void)
 {
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 9701a4fd7bf2..836a1eb5df43 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -12,7 +12,10 @@
 #include <skas.h>
 #include <sysdep/tls.h>
 
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
+{
+	return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
 
 static long write_ldt_entry(struct mm_id *mm_idp, int func,
 		     struct user_desc *desc, void **addr, int done)
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index bd16d6c370ec..439c0994b696 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -7,6 +7,7 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <generated/user_constants.h>
+#include <asm/syscall.h>
 
 #define __NO_STUBS
 
@@ -24,15 +25,13 @@
 
 #define old_mmap sys_old_mmap
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL_I386
 #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
 
-typedef asmlinkage void (*sys_call_ptr_t)(void);
-
-extern asmlinkage void sys_ni_syscall(void);
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
 	/*
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index a75d8700472a..b74ea6c2c0e7 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -7,6 +7,7 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <generated/user_constants.h>
+#include <asm/syscall.h>
 
 #define __NO_STUBS
 
@@ -37,15 +38,13 @@
 #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
 #define __SYSCALL_X32(nr, sym, compat) /* Not supported */
 
-#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
+#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL_64
 #define __SYSCALL_64(nr, sym, compat) [ nr ] = sym,
 
-typedef void (*sys_call_ptr_t)(void);
-
-extern void sys_ni_syscall(void);
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
 	/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 30d12afe52ed..5774800ff583 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,10 @@
 #include <linux/memblock.h>
 #include <linux/edd.h>
 
+#ifdef CONFIG_KEXEC_CORE
+#include <linux/kexec.h>
+#endif
+
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/interface/xen.h>
@@ -71,6 +75,7 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/pat.h>
+#include <asm/cpu.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1077,6 +1082,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 		/* Fast syscall setup is all done in hypercalls, so
 		   these are all ignored.  Stub them out here to stop
 		   Xen console noise. */
+		break;
 
 	default:
 		if (!pmu_msr_write(msr, low, high, &ret))
@@ -1807,6 +1813,21 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 	.notifier_call	= xen_hvm_cpu_notify,
 };
 
+#ifdef CONFIG_KEXEC_CORE
+static void xen_hvm_shutdown(void)
+{
+	native_machine_shutdown();
+	if (kexec_in_progress)
+		xen_reboot(SHUTDOWN_soft_reset);
+}
+
+static void xen_hvm_crash_shutdown(struct pt_regs *regs)
+{
+	native_machine_crash_shutdown(regs);
+	xen_reboot(SHUTDOWN_soft_reset);
+}
+#endif
+
 static void __init xen_hvm_guest_init(void)
 {
 	if (xen_pv_domain())
@@ -1826,6 +1847,10 @@ static void __init xen_hvm_guest_init(void)
 	x86_init.irqs.intr_init = xen_init_IRQ;
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
+#ifdef CONFIG_KEXEC_CORE
+	machine_ops.shutdown = xen_hvm_shutdown;
+	machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
+#endif
 }
 #endif
 
@@ -1875,3 +1900,17 @@ const struct hypervisor_x86 x86_hyper_xen = {
 	.set_cpu_features       = xen_set_cpu_features,
 };
 EXPORT_SYMBOL(x86_hyper_xen);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num)
+{
+	arch_register_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_register_cpu);
+
+void xen_arch_unregister_cpu(int num)
+{
+	arch_unregister_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_unregister_cpu);
+#endif
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 1580e7a5a4cf..e079500b17f3 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void)
 		kfree(pages);
 		return -ENOMEM;
 	}
-	rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
+	rc = alloc_xenballooned_pages(nr_grant_frames, pages);
 	if (rc) {
 		pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
 			nr_grant_frames, rc);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 9c479fe40459..ac161db63388 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2888,6 +2888,7 @@ static int do_remap_gfn(struct vm_area_struct *vma,
 		addr += range;
 		if (err_ptr)
 			err_ptr += batch;
+		cond_resched();
 	}
 out:
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index bfc08b13044b..cab9f766bb06 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -112,6 +112,15 @@ static unsigned long *p2m_identity;
 static pte_t *p2m_missing_pte;
 static pte_t *p2m_identity_pte;
 
+/*
+ * Hint at last populated PFN.
+ *
+ * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack
+ * can avoid scanning the whole P2M (which may be sized to account for
+ * hotplugged memory).
+ */
+static unsigned long xen_p2m_last_pfn;
+
 static inline unsigned p2m_top_index(unsigned long pfn)
 {
 	BUG_ON(pfn >= MAX_P2M_PFN);
@@ -270,7 +279,7 @@ void xen_setup_mfn_list_list(void)
 	else
 		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
 			virt_to_mfn(p2m_top_mfn);
-	HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
+	HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
 	HYPERVISOR_shared_info->arch.p2m_generation = 0;
 	HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
 	HYPERVISOR_shared_info->arch.p2m_cr3 =
@@ -406,6 +415,8 @@ void __init xen_vmalloc_p2m_tree(void)
 	static struct vm_struct vm;
 	unsigned long p2m_limit;
 
+	xen_p2m_last_pfn = xen_max_p2m_pfn;
+
 	p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
 	vm.flags = VM_ALLOC;
 	vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
@@ -519,7 +530,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
  * the new pages are installed with cmpxchg; if we lose the race then
  * simply free the page we allocated and use the one that's there.
  */
-static bool alloc_p2m(unsigned long pfn)
+int xen_alloc_p2m_entry(unsigned long pfn)
 {
 	unsigned topidx;
 	unsigned long *top_mfn_p, *mid_mfn;
@@ -529,6 +540,9 @@ static bool alloc_p2m(unsigned long pfn)
 	unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
 	unsigned long p2m_pfn;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 0;
+
 	ptep = lookup_address(addr, &level);
 	BUG_ON(!ptep || level != PG_LEVEL_4K);
 	pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
@@ -537,7 +551,7 @@ static bool alloc_p2m(unsigned long pfn)
 		/* PMD level is missing, allocate a new one */
 		ptep = alloc_p2m_pmd(addr, pte_pg);
 		if (!ptep)
-			return false;
+			return -ENOMEM;
 	}
 
 	if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
@@ -555,7 +569,7 @@ static bool alloc_p2m(unsigned long pfn)
 
 			mid_mfn = alloc_p2m_page();
 			if (!mid_mfn)
-				return false;
+				return -ENOMEM;
 
 			p2m_mid_mfn_init(mid_mfn, p2m_missing);
 
@@ -581,7 +595,7 @@ static bool alloc_p2m(unsigned long pfn)
 
 		p2m = alloc_p2m_page();
 		if (!p2m)
-			return false;
+			return -ENOMEM;
 
 		if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
 			p2m_init(p2m);
@@ -608,8 +622,15 @@ static bool alloc_p2m(unsigned long pfn)
 			free_p2m_page(p2m);
 	}
 
-	return true;
+	/* Expanded the p2m? */
+	if (pfn > xen_p2m_last_pfn) {
+		xen_p2m_last_pfn = pfn;
+		HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
+	}
+
+	return 0;
 }
+EXPORT_SYMBOL(xen_alloc_p2m_entry);
 
 unsigned long __init set_phys_range_identity(unsigned long pfn_s,
 				      unsigned long pfn_e)
@@ -671,7 +692,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 {
 	if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
-		if (!alloc_p2m(pfn))
+		int ret;
+
+		ret = xen_alloc_p2m_entry(pfn);
+		if (ret < 0)
 			return false;
 
 		return __set_phys_to_machine(pfn, mfn);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f5ef6746d47a..7ab29518a3b9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -212,7 +212,7 @@ static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
 		e_pfn = PFN_DOWN(entry->addr + entry->size);
 
 		/* We only care about E820 after this */
-		if (e_pfn < *min_pfn)
+		if (e_pfn <= *min_pfn)
 			continue;
 
 		s_pfn = PFN_UP(entry->addr);
@@ -548,7 +548,7 @@ static unsigned long __init xen_get_max_pages(void)
 {
 	unsigned long max_pages, limit;
 	domid_t domid = DOMID_SELF;
-	int ret;
+	long ret;
 
 	limit = xen_get_pages_limit();
 	max_pages = limit;
@@ -798,7 +798,7 @@ char * __init xen_memory_setup(void)
 		xen_ignore_unusable();
 
 	/* Make sure the Xen-supplied memory map is well-ordered. */
-	sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
+	sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
 			  &xen_e820_map_entries);
 
 	max_pages = xen_get_max_pages();
@@ -829,6 +829,8 @@ char * __init xen_memory_setup(void)
 	addr = xen_e820_map[0].addr;
 	size = xen_e820_map[0].size;
 	while (i < xen_e820_map_entries) {
+		bool discard = false;
+
 		chunk_size = size;
 		type = xen_e820_map[i].type;
 
@@ -843,10 +845,11 @@ char * __init xen_memory_setup(void)
 				xen_add_extra_mem(pfn_s, n_pfns);
 				xen_max_p2m_pfn = pfn_s + n_pfns;
 			} else
-				type = E820_UNUSABLE;
+				discard = true;
 		}
 
-		xen_align_and_add_e820_region(addr, chunk_size, type);
+		if (!discard)
+			xen_align_and_add_e820_region(addr, chunk_size, type);
 
 		addr += chunk_size;
 		size -= chunk_size;
@@ -965,17 +968,8 @@ char * __init xen_auto_xlated_memory_setup(void)
 static void __init fiddle_vdso(void)
 {
 #ifdef CONFIG_X86_32
-	/*
-	 * This could be called before selected_vdso32 is initialized, so
-	 * just fiddle with both possible images.  vdso_image_32_syscall
-	 * can't be selected, since it only exists on 64-bit systems.
-	 */
-	u32 *mask;
-	mask = vdso_image_32_int80.data +
-		vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
-	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
-	mask = vdso_image_32_sysenter.data +
-		vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
+	u32 *mask = vdso_image_32.data +
+		vdso_image_32.sym_VDSO32_NOTE_MASK;
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 #endif
 }
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 63c223dff5f1..b56855a1382a 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -28,4 +28,5 @@ generic-y += statfs.h
 generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 93795d047303..fd8017ce298a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -47,7 +47,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -56,7 +56,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)		((v)->counter = (i))
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if XCHAL_HAVE_S32C1I
 #define ATOMIC_OP(op)							\
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 201aec0e0446..360944e1da52 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -74,6 +74,12 @@
  */
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
 
 #define MADV_NORMAL	0		/* no further special treatment */
 #define MADV_RANDOM	1		/* expect random page references */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index b97767dbc7c8..b9ad9feadc2d 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -148,7 +148,7 @@ void __init time_init(void)
 	local_timer_setup(0);
 	setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction);
 	sched_clock_register(ccount_sched_clock_read, 32, ccount_freq);
-	clocksource_of_init();
+	clocksource_probe();
 }
 
 /*