88 files changed, 966 insertions, 4169 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d3338a87761f..595193bc2d31 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -32,6 +32,7 @@ config X86_64
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select SWIOTLB
+	select ARCH_HAS_ELFCORE_COMPAT
 
 config FORCE_DYNAMIC_FTRACE
 	def_bool y
@@ -206,7 +207,6 @@ config X86
 	select HAVE_MOVE_PMD
 	select HAVE_MOVE_PUD
 	select HAVE_NMI
-	select HAVE_OPROFILE
 	select HAVE_OPTPROBES
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
@@ -891,7 +891,7 @@ config HPET_TIMER
 
 config HPET_EMULATE_RTC
 	def_bool y
-	depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
+	depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
 
 config APB_TIMER
 	def_bool y if X86_INTEL_MID
@@ -1159,10 +1159,6 @@ config X86_MCE_INJECT
 	  If you don't know what a machine check is and you don't do kernel
 	  QA it is safe to say n.
 
-config X86_THERMAL_VECTOR
-	def_bool y
-	depends on X86_MCE_INTEL
-
 source "arch/x86/events/Kconfig"
 
 config X86_LEGACY_VM86
@@ -2865,7 +2861,6 @@ config IA32_EMULATION
 	depends on X86_64
 	select ARCH_WANT_OLD_COMPAT_IPC
 	select BINFMT_ELF
-	select COMPAT_BINFMT_ELF
 	select COMPAT_OLD_SIGACTION
 	help
 	  Include code to run legacy 32-bit programs under a
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 30920d70b48b..b797f1561943 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,9 +232,6 @@ core-y += arch/x86/
 drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
 drivers-$(CONFIG_PCI)            += arch/x86/pci/
 
-# must be linked after kernel/
-drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-
 # suspend and hibernation support
 drivers-$(CONFIG_PM) += arch/x86/power/
 
@@ -295,16 +292,20 @@ archclean:
 	$(Q)$(MAKE) $(clean)=arch/x86/tools
 
 define archhelp
-  echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
-  echo  '  install      - Install kernel using'
-  echo  '                  (your) ~/bin/$(INSTALLKERNEL) or'
-  echo  '                  (distribution) /sbin/$(INSTALLKERNEL) or'
-  echo  '                  install to $$(INSTALL_PATH) and run lilo'
-  echo  '  fdimage      - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
-  echo  '  fdimage144   - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
-  echo  '  fdimage288   - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
-  echo  '  isoimage     - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
-  echo  '                  bzdisk/fdimage*/isoimage also accept:'
-  echo  '                  FDARGS="..."  arguments for the booted kernel'
-  echo  '                  FDINITRD=file initrd for the booted kernel'
+  echo  '* bzImage		- Compressed kernel image (arch/x86/boot/bzImage)'
+  echo  '  install		- Install kernel using (your) ~/bin/$(INSTALLKERNEL) or'
+  echo  '			  (distribution) /sbin/$(INSTALLKERNEL) or install to '
+  echo  '			  $$(INSTALL_PATH) and run lilo'
+  echo  ''
+  echo  '  fdimage		- Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  fdimage144		- Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  fdimage288		- Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  isoimage		- Create a boot CD-ROM image (arch/x86/boot/image.iso)'
+  echo  '			  bzdisk/fdimage*/isoimage also accept:'
+  echo  '			  FDARGS="..."  arguments for the booted kernel'
+  echo  '                  	  FDINITRD=file initrd for the booted kernel'
+  echo  ''
+  echo  '  kvm_guest.config	- Enable Kconfig items for running this kernel as a KVM guest'
+  echo  '  xen.config		- Enable Kconfig items for running this kernel as a Xen guest'
+
 endef
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cad08703c4ad..ce0464d630a2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -46,14 +46,6 @@
 .code64
 .section .entry.text, "ax"
 
-#ifdef CONFIG_PARAVIRT_XXL
-SYM_CODE_START(native_usergs_sysret64)
-	UNWIND_HINT_EMPTY
-	swapgs
-	sysretq
-SYM_CODE_END(native_usergs_sysret64)
-#endif /* CONFIG_PARAVIRT_XXL */
-
 /*
  * 64-bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
@@ -123,7 +115,12 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	 * Try to use SYSRET instead of IRET if we're returning to
 	 * a completely clean 64-bit userspace context.  If we're not,
 	 * go to the slow exit path.
+	 * In the Xen PV case we must use iret anyway.
 	 */
+
+	ALTERNATIVE "", "jmp	swapgs_restore_regs_and_return_to_usermode", \
+		X86_FEATURE_XENPV
+
 	movq	RCX(%rsp), %rcx
 	movq	RIP(%rsp), %r11
 
@@ -215,7 +212,8 @@ syscall_return_via_sysret:
 
 	popq	%rdi
 	popq	%rsp
-	USERGS_SYSRET64
+	swapgs
+	sysretq
 SYM_CODE_END(entry_SYSCALL_64)
 
 /*
@@ -669,7 +667,7 @@ native_irq_return_ldt:
 	 */
 
 	pushq	%rdi				/* Stash user RDI */
-	SWAPGS					/* to kernel GS */
+	swapgs					/* to kernel GS */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
 
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
@@ -699,7 +697,7 @@ native_irq_return_ldt:
 	orq	PER_CPU_VAR(espfix_stack), %rax
 
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-	SWAPGS					/* to user GS */
+	swapgs					/* to user GS */
 	popq	%rdi				/* Restore user RDI */
 
 	movq	%rax, %rsp
@@ -943,7 +941,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	ret
 
 .Lparanoid_entry_swapgs:
-	SWAPGS
+	swapgs
 
 	/*
 	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
@@ -1001,7 +999,7 @@ SYM_CODE_START_LOCAL(paranoid_exit)
 	jnz		restore_regs_and_return_to_kernel
 
 	/* We are returning to a context with user GSBASE */
-	SWAPGS_UNSAFE_STACK
+	swapgs
 	jmp		restore_regs_and_return_to_kernel
 SYM_CODE_END(paranoid_exit)
 
@@ -1426,7 +1424,7 @@ nmi_no_fsgsbase:
 	jnz	nmi_restore
 
 nmi_swapgs:
-	SWAPGS_UNSAFE_STACK
+	swapgs
 
 nmi_restore:
 	POP_REGS
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index f145e3326c6d..be09c7eac89f 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -159,17 +159,6 @@ struct compat_shmid64_ds {
 	compat_ulong_t __unused5;
 };
 
-/*
- * The type of struct elf_prstatus.pr_reg in compatible core dumps.
- */
-typedef struct user_regs_struct compat_elf_gregset_t;
-
-/* Full regset -- prstatus on x32, otherwise on ia32 */
-#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
-#define SET_PR_FPVALID(S, V, R) \
-  do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
-  while (0)
-
 #ifdef CONFIG_X86_X32_ABI
 #define COMPAT_USE_64BIT_TIME \
 	(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 59bf91c57aa8..1728d4ce5730 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -30,6 +30,7 @@ enum cpuid_leafs
 	CPUID_7_ECX,
 	CPUID_8000_0007_EBX,
 	CPUID_7_EDX,
+	CPUID_8000_001F_EAX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -88,8 +89,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) ||	\
 	   REQUIRED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define DISABLED_MASK_BIT_SET(feature_bit)				\
 	 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK,  0, feature_bit) ||	\
@@ -111,8 +113,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) ||	\
 	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) ||	\
+	   CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) ||	\
 	   DISABLED_MASK_CHECK					  ||	\
-	   BUILD_BUG_ON_ZERO(NCAPINTS != 19))
+	   BUILD_BUG_ON_ZERO(NCAPINTS != 20))
 
 #define cpu_has(c, bit)							\
 	(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 :	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 84b887825f12..1feb6c089ba2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS			19	   /* N 32-bit words worth of info */
+#define NCAPINTS			20	   /* N 32-bit words worth of info */
 #define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE!                                ( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,7 +201,7 @@
 #define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE!                                ( 7*32+10) */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
@@ -211,7 +211,7 @@
 #define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE!                                ( 7*32+20) */
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -236,8 +236,6 @@
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES		( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH	( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -385,6 +383,13 @@
 #define X86_FEATURE_CORE_CAPABILITIES	(18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME			(19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV			(19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH	(19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES		(19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 7947cb1782da..b7dd944dc867 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -91,6 +91,7 @@
 			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK18	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+#define DISABLED_MASK19	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c98f78330b09..4d0b126835b8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -12,6 +12,7 @@
 #include <linux/pgtable.h>
 
 extern unsigned long efi_fw_vendor, efi_config_table;
+extern unsigned long efi_mixed_mode_stack_pa;
 
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
@@ -68,17 +69,33 @@ extern unsigned long efi_fw_vendor, efi_config_table;
 		#f " called with too many arguments (" #p ">" #n ")");	\
 })
 
+static inline void efi_fpu_begin(void)
+{
+	/*
+	 * The UEFI calling convention (UEFI spec 2.3.2 and 2.3.4) requires
+	 * that FCW and MXCSR (64-bit) must be initialized prior to calling
+	 * UEFI code.  (Oddly the spec does not require that the FPU stack
+	 * be empty.)
+	 */
+	kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+}
+
+static inline void efi_fpu_end(void)
+{
+	kernel_fpu_end();
+}
+
 #ifdef CONFIG_X86_32
 #define arch_efi_call_virt_setup()					\
 ({									\
-	kernel_fpu_begin();						\
+	efi_fpu_begin();						\
 	firmware_restrict_branch_speculation_start();			\
 })
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
 	firmware_restrict_branch_speculation_end();			\
-	kernel_fpu_end();						\
+	efi_fpu_end();							\
 })
 
 #define arch_efi_call_virt(p, f, args...)	p->f(args)
@@ -94,22 +111,12 @@ extern asmlinkage u64 __efi_call(void *fp, ...);
 	__efi_call(__VA_ARGS__);					\
 })
 
-/*
- * struct efi_scratch - Scratch space used while switching to/from efi_mm
- * @phys_stack: stack used during EFI Mixed Mode
- * @prev_mm:    store/restore stolen mm_struct while switching to/from efi_mm
- */
-struct efi_scratch {
-	u64			phys_stack;
-	struct mm_struct	*prev_mm;
-} __packed;
-
 #define arch_efi_call_virt_setup()					\
 ({									\
 	efi_sync_low_kernel_mappings();					\
-	kernel_fpu_begin();						\
+	efi_fpu_begin();						\
 	firmware_restrict_branch_speculation_start();			\
-	efi_switch_mm(&efi_mm);						\
+	efi_enter_mm();							\
 })
 
 #define arch_efi_call_virt(p, f, args...)				\
@@ -117,9 +124,9 @@ struct efi_scratch {
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
-	efi_switch_mm(efi_scratch.prev_mm);				\
+	efi_leave_mm();							\
 	firmware_restrict_branch_speculation_end();			\
-	kernel_fpu_end();						\
+	efi_fpu_end();							\
 })
 
 #ifdef CONFIG_KASAN
@@ -136,7 +143,6 @@ struct efi_scratch {
 
 #endif /* CONFIG_X86_32 */
 
-extern struct efi_scratch efi_scratch;
 extern int __init efi_memblock_x86_reserve_range(void);
 extern void __init efi_print_memmap(void);
 extern void __init efi_map_region(efi_memory_desc_t *md);
@@ -149,10 +155,12 @@ extern void __init efi_dump_pagetable(void);
 extern void __init efi_apply_memmap_quirks(void);
 extern int __init efi_reuse_config(u64 tables, int nr_tables);
 extern void efi_delete_dummy_variable(void);
-extern void efi_switch_mm(struct mm_struct *mm);
-extern void efi_recover_from_page_fault(unsigned long phys_addr);
+extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
 extern void efi_free_boot_services(void);
 
+void efi_enter_mm(void);
+void efi_leave_mm(void);
+
 /* kexec external ABI */
 struct efi_setup_data {
 	u64 fw_vendor;
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 66bdfe838d61..9224d40cdefe 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -364,7 +364,7 @@ do {									\
 #define COMPAT_ARCH_DLINFO						\
 if (exec->e_machine == EM_X86_64)					\
 	ARCH_DLINFO_X32;						\
-else									\
+else if (IS_ENABLED(CONFIG_IA32_EMULATION))				\
 	ARCH_DLINFO_IA32
 
 #define COMPAT_ELF_ET_DYN_BASE	(TASK_UNMAPPED_BASE + 0x1000000)
diff --git a/arch/x86/include/asm/elfcore-compat.h b/arch/x86/include/asm/elfcore-compat.h
new file mode 100644
index 000000000000..f1b6c7a8d8fc
--- /dev/null
+++ b/arch/x86/include/asm/elfcore-compat.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_X86_ELFCORE_COMPAT_H
+#define _ASM_X86_ELFCORE_COMPAT_H
+
+#include <asm/user32.h>
+
+/*
+ * On amd64 we have two 32bit ABIs - i386 and x32.  The latter
+ * has bigger registers, so we use it for compat_elf_regset_t.
+ * The former uses i386_elf_prstatus and PRSTATUS_SIZE/SET_PR_FPVALID
+ * are used to choose the size and location of ->pr_fpvalid of
+ * the layout actually used.
+ */
+typedef struct user_regs_struct compat_elf_gregset_t;
+
+struct i386_elf_prstatus
+{
+	struct compat_elf_prstatus_common	common;
+	struct user_regs_struct32		pr_reg;
+	compat_int_t			pr_fpvalid;
+};
+
+#define PRSTATUS_SIZE \
+	(user_64bit_mode(task_pt_regs(current)) \
+		? sizeof(struct compat_elf_prstatus) \
+		: sizeof(struct i386_elf_prstatus))
+#define SET_PR_FPVALID(S) \
+	(*(user_64bit_mode(task_pt_regs(current)) \
+		? &(S)->pr_fpvalid 	\
+		: &((struct i386_elf_prstatus *)(S))->pr_fpvalid) = 1)
+
+#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 67a4f1cb2aac..ed33a14188f6 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -32,7 +32,19 @@ extern void fpregs_mark_activate(void);
 /* Code that is unaware of kernel_fpu_begin_mask() can use this */
 static inline void kernel_fpu_begin(void)
 {
+#ifdef CONFIG_X86_64
+	/*
+	 * Any 64-bit code that uses 387 instructions must explicitly request
+	 * KFPU_387.
+	 */
+	kernel_fpu_begin_mask(KFPU_MXCSR);
+#else
+	/*
+	 * 32-bit kernel code may use 387 operations as well as SSE2, etc,
+	 * as long as it checks that the CPU has the required capability.
+	 */
 	kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR);
+#endif
 }
 
 /*
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index f656aabd1545..41e2e2e1b439 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -585,6 +585,9 @@ DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
 #else
 DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	exc_machine_check);
 #endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	xenpv_exc_machine_check);
+#endif
 #endif
 
 /* NMI */
@@ -605,6 +608,9 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_DB,	xenpv_exc_debug);
 
 /* #DF */
 DECLARE_IDTENTRY_DF(X86_TRAP_DF,	exc_double_fault);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF,	xenpv_exc_double_fault);
+#endif
 
 /* #VC */
 #ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 528c8a71fe7f..76d389691b5b 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -40,8 +40,6 @@ extern void native_init_IRQ(void);
 
 extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
-extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
-
 extern void init_ISA_irqs(void);
 
 extern void __init init_IRQ(void);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 2dfc8d380dab..144d70ea4393 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -35,15 +35,6 @@ extern __always_inline unsigned long native_save_fl(void)
 	return flags;
 }
 
-extern inline void native_restore_fl(unsigned long flags);
-extern inline void native_restore_fl(unsigned long flags)
-{
-	asm volatile("push %0 ; popf"
-		     : /* no output */
-		     :"g" (flags)
-		     :"memory", "cc");
-}
-
 static __always_inline void native_irq_disable(void)
 {
 	asm volatile("cli": : :"memory");
@@ -79,11 +70,6 @@ static __always_inline unsigned long arch_local_save_flags(void)
 	return native_save_fl();
 }
 
-static __always_inline void arch_local_irq_restore(unsigned long flags)
-{
-	native_restore_fl(flags);
-}
-
 static __always_inline void arch_local_irq_disable(void)
 {
 	native_irq_disable();
@@ -131,25 +117,7 @@ static __always_inline unsigned long arch_local_irq_save(void)
 #define SAVE_FLAGS(x)		pushfq; popq %rax
 #endif
 
-#define SWAPGS	swapgs
-/*
- * Currently paravirt can't handle swapgs nicely when we
- * don't have a stack we can rely on (such as a user space
- * stack).  So we either find a way around these or just fault
- * and emulate if a guest tries to call swapgs directly.
- *
- * Either way, this is a good way to document that we don't
- * have a reliable stack. x86_64 only.
- */
-#define SWAPGS_UNSAFE_STACK	swapgs
-
 #define INTERRUPT_RETURN	jmp native_iret
-#define USERGS_SYSRET64				\
-	swapgs;					\
-	sysretq;
-#define USERGS_SYSRET32				\
-	swapgs;					\
-	sysretl
 
 #else
 #define INTERRUPT_RETURN		iret
@@ -170,6 +138,20 @@ static __always_inline int arch_irqs_disabled(void)
 
 	return arch_irqs_disabled_flags(flags);
 }
+
+static __always_inline void arch_local_irq_restore(unsigned long flags)
+{
+	if (!arch_irqs_disabled_flags(flags))
+		arch_local_irq_enable();
+}
+#else
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_XEN_PV
+#define SWAPGS	ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
+#else
+#define SWAPGS	swapgs
+#endif
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 56cdeaac76a0..ddfb3cad8dff 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -289,28 +289,6 @@ extern void (*mce_threshold_vector)(void);
 extern void (*deferred_error_int_vector)(void);
 
 /*
- * Thermal handler
- */
-
-void intel_init_thermal(struct cpuinfo_x86 *c);
-
-/* Interrupt Handler for core thermal thresholds */
-extern int (*platform_thermal_notify)(__u64 msr_val);
-
-/* Interrupt Handler for package thermal thresholds */
-extern int (*platform_thermal_package_notify)(__u64 msr_val);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-extern bool (*platform_thermal_package_rate_control)(void);
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-extern void mcheck_intel_therm_init(void);
-#else
-static inline void mcheck_intel_therm_init(void) { }
-#endif
-
-/*
  * Used by APEI to report memory error via /dev/mcelog
  */
 
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 2b7cc5397f80..ab45a220fac4 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -127,14 +127,12 @@ static inline unsigned int x86_cpuid_family(void)
 }
 
 #ifdef CONFIG_MICROCODE
-int __init microcode_init(void);
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 void reload_early_microcode(void);
 extern bool get_builtin_firmware(struct cpio_data *cd, const char *name);
 extern bool initrd_gone;
 #else
-static inline int __init microcode_init(void)			{ return 0; };
 static inline void __init load_ucode_bsp(void)			{ }
 static inline void load_ucode_ap(void)				{ }
 static inline void reload_early_microcode(void)			{ }
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 9d5d949e662e..1cb9c17a4cb4 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -9,7 +9,6 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 645bd1d0ee07..64297eabad63 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -66,7 +66,7 @@
  * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
  * address, then that syscall will enter the kernel with a
  * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
+ * We avoid this particular problem by preventing anything
  * from being mapped at the maximum canonical address.
  *
  * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index f8dce11d2bc1..4abf110e2243 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -648,11 +648,6 @@ static inline notrace unsigned long arch_local_save_flags(void)
 	return PVOP_CALLEE0(unsigned long, irq.save_fl);
 }
 
-static inline notrace void arch_local_irq_restore(unsigned long f)
-{
-	PVOP_VCALLEE1(irq.restore_fl, f);
-}
-
 static inline notrace void arch_local_irq_disable(void)
 {
 	PVOP_VCALLEE0(irq.irq_disable);
@@ -776,31 +771,6 @@ extern void default_banner(void);
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT_XXL
-/*
- * If swapgs is used while the userspace stack is still current,
- * there's no way to call a pvop.  The PV replacement *must* be
- * inlined, or the swapgs instruction must be trapped and emulated.
- */
-#define SWAPGS_UNSAFE_STACK						\
-	PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs)
-
-/*
- * Note: swapgs is very special, and in practise is either going to be
- * implemented with a single "swapgs" instruction or something very
- * special.  Either way, we don't need to save any registers for
- * it.
- */
-#define SWAPGS								\
-	PARA_SITE(PARA_PATCH(PV_CPU_swapgs),				\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  call PARA_INDIRECT(pv_ops+PV_CPU_swapgs);		\
-		 )
-
-#define USERGS_SYSRET64							\
-	PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64),			\
-		  ANNOTATE_RETPOLINE_SAFE;				\
-		  jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
-
 #ifdef CONFIG_DEBUG_ENTRY
 #define SAVE_FLAGS(clobbers)                                        \
 	PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),			    \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b6b02b7c19cc..de87087d3bde 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -156,20 +156,10 @@ struct pv_cpu_ops {
 
 	u64 (*read_pmc)(int counter);
 
-	/*
-	 * Switch to usermode gs and return to 64-bit usermode using
-	 * sysret.  Only used in 64-bit kernels to return to 64-bit
-	 * processes.  Usermode register state, including %rsp, must
-	 * already be restored.
-	 */
-	void (*usergs_sysret64)(void);
-
 	/* Normal iret.  Jump to this with the standard iret stack
 	   frame set up. */
 	void (*iret)(void);
 
-	void (*swapgs)(void);
-
 	void (*start_context_switch)(struct task_struct *prev);
 	void (*end_context_switch)(struct task_struct *next);
 #endif
@@ -178,16 +168,13 @@ struct pv_cpu_ops {
 struct pv_irq_ops {
 #ifdef CONFIG_PARAVIRT_XXL
 	/*
-	 * Get/set interrupt state.  save_fl and restore_fl are only
-	 * expected to use X86_EFLAGS_IF; all other bits
-	 * returned from save_fl are undefined, and may be ignored by
-	 * restore_fl.
+	 * Get/set interrupt state.  save_fl is expected to use X86_EFLAGS_IF;
+	 * all other bits returned from save_fl are undefined.
 	 *
 	 * NOTE: These functions callers expect the callee to preserve
 	 * more registers than the standard C calling convention.
 	 */
 	struct paravirt_callee_save save_fl;
-	struct paravirt_callee_save restore_fl;
 	struct paravirt_callee_save irq_disable;
 	struct paravirt_callee_save irq_enable;
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 394757ee030a..f24d7ef8fffa 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -177,8 +177,6 @@ enum page_cache_mode {
 #define __pgprot(x)		((pgprot_t) { (x) } )
 #define __pg(x)			__pgprot(x)
 
-#define _PAGE_PAT_LARGE		(_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
-
 #define PAGE_NONE	     __pg(   0|   0|   0|___A|   0|   0|   0|___G)
 #define PAGE_SHARED	     __pg(__PP|__RW|_USR|___A|__NX|   0|   0|   0)
 #define PAGE_SHARED_EXEC     __pg(__PP|__RW|_USR|___A|   0|   0|   0|   0)
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 3ff0d48469f2..b2d504f11937 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -101,6 +101,7 @@
 #define REQUIRED_MASK16	0
 #define REQUIRED_MASK17	0
 #define REQUIRED_MASK18	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+#define REQUIRED_MASK19	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 07603064df8f..d60ed0668a59 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -56,19 +56,22 @@ static void __resctrl_sched_in(void)
 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
 	u32 closid = state->default_closid;
 	u32 rmid = state->default_rmid;
+	u32 tmp;
 
 	/*
 	 * If this task has a closid/rmid assigned, use it.
 	 * Else use the closid/rmid assigned to this cpu.
 	 */
 	if (static_branch_likely(&rdt_alloc_enable_key)) {
-		if (current->closid)
-			closid = current->closid;
+		tmp = READ_ONCE(current->closid);
+		if (tmp)
+			closid = tmp;
 	}
 
 	if (static_branch_likely(&rdt_mon_enable_key)) {
-		if (current->rmid)
-			rmid = current->rmid;
+		tmp = READ_ONCE(current->rmid);
+		if (tmp)
+			rmid = tmp;
 	}
 
 	if (closid != state->cur_closid || rmid != state->cur_rmid) {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index cc177b4431ae..1d3cbaef4bb7 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -243,10 +243,10 @@ static inline void serialize(void)
 }
 
 /* The dst parameter must be 64-bytes aligned */
-static inline void movdir64b(void *dst, const void *src)
+static inline void movdir64b(void __iomem *dst, const void *src)
 {
 	const struct { char _[64]; } *__src = src;
-	struct { char _[64]; } *__dst = dst;
+	struct { char _[64]; } __iomem *__dst = dst;
 
 	/*
 	 * MOVDIR64B %(rdx), rax.
@@ -286,7 +286,7 @@ static inline void movdir64b(void *dst, const void *src)
 static inline int enqcmds(void __iomem *dst, const void *src)
 {
 	const struct { char _[64]; } *__src = src;
-	struct { char _[64]; } *__dst = dst;
+	struct { char _[64]; } __iomem *__dst = dst;
 	int zf;
 
 	/*
diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
new file mode 100644
index 000000000000..ddbdefd5b94f
--- /dev/null
+++ b/arch/x86/include/asm/thermal.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_THERMAL_H
+#define _ASM_X86_THERMAL_H
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+void intel_init_thermal(struct cpuinfo_x86 *c);
+bool x86_thermal_enabled(void);
+void intel_thermal_interrupt(void);
+#else
+static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+#endif
+
+#endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 820082bd6880..1bfe979bb9bc 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -4,7 +4,6 @@
 
 #define tlb_start_vma(tlb, vma) do { } while (0)
 #define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
 #define tlb_flush tlb_flush
 static inline void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 26efbec94448..9e8ac5073ecb 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -36,7 +36,6 @@ struct vm86 {
 	unsigned long saved_sp0;
 
 	unsigned long flags;
-	unsigned long screen_bitmap;
 	unsigned long cpu_type;
 	struct revectored_struct int_revectored;
 	struct revectored_struct int21_revectored;
diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h
index d2ee4e307ef8..18909b8050bc 100644
--- a/arch/x86/include/uapi/asm/vm86.h
+++ b/arch/x86/include/uapi/asm/vm86.h
@@ -97,7 +97,7 @@ struct revectored_struct {
 struct vm86_struct {
 	struct vm86_regs regs;
 	unsigned long flags;
-	unsigned long screen_bitmap;
+	unsigned long screen_bitmap;		/* unused, preserved by vm86() */
 	unsigned long cpu_type;
 	struct revectored_struct int_revectored;
 	struct revectored_struct int21_revectored;
@@ -106,7 +106,7 @@ struct vm86_struct {
 /*
  * flags masks
  */
-#define VM86_SCREEN_BITMAP	0x0001
+#define VM86_SCREEN_BITMAP	0x0001        /* no longer supported */
 
 struct vm86plus_info_struct {
 	unsigned long force_return_for_pic:1;
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 828be792231e..b14533af7676 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -13,9 +13,6 @@ int main(void)
 {
 #ifdef CONFIG_PARAVIRT
 #ifdef CONFIG_PARAVIRT_XXL
-	OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template,
-	       cpu.usergs_sysret64);
-	OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs);
 #ifdef CONFIG_DEBUG_ENTRY
 	OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl);
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 35ad8480c464..9215b91bc044 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -960,6 +960,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x8000000a)
 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
+	if (c->extended_cpuid_level >= 0x8000001f)
+		c->x86_capability[CPUID_8000_001F_EAX] = cpuid_eax(0x8000001f);
+
 	init_scattered_cpuid_features(c);
 	init_speculation_control(c);
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 816fdbec795a..0e422a544835 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,6 +24,7 @@
 #include <asm/traps.h>
 #include <asm/resctrl.h>
 #include <asm/numa.h>
+#include <asm/thermal.h>
 
 #ifdef CONFIG_X86_64
 #include <linux/topology.h>
@@ -719,6 +720,8 @@ static void init_intel(struct cpuinfo_x86 *c)
 		tsx_disable();
 
 	split_lock_init();
+
+	intel_init_thermal(c);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mce/Makefile b/arch/x86/kernel/cpu/mce/Makefile
index 9f020c994154..015856abdbb1 100644
--- a/arch/x86/kernel/cpu/mce/Makefile
+++ b/arch/x86/kernel/cpu/mce/Makefile
@@ -9,8 +9,6 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
 mce-inject-y			:= inject.o
 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
 
-obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
-
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
 
 obj-$(CONFIG_X86_MCELOG_LEGACY)	+= dev-mcelog.o
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index e133ce1e562b..7962355436da 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -878,6 +878,12 @@ static atomic_t mce_executing;
 static atomic_t mce_callin;
 
 /*
+ * Track which CPUs entered the MCA broadcast synchronization and which not in
+ * order to print holdouts.
+ */
+static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
+
+/*
  * Check if a timeout waiting for other CPUs happened.
  */
 static int mce_timed_out(u64 *t, const char *msg)
@@ -894,8 +900,12 @@ static int mce_timed_out(u64 *t, const char *msg)
 	if (!mca_cfg.monarch_timeout)
 		goto out;
 	if ((s64)*t < SPINUNIT) {
-		if (mca_cfg.tolerant <= 1)
+		if (mca_cfg.tolerant <= 1) {
+			if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
+				pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
+					 cpumask_pr_args(&mce_missing_cpus));
 			mce_panic(msg, NULL, NULL);
+		}
 		cpu_missing = 1;
 		return 1;
 	}
@@ -1006,6 +1016,7 @@ static int mce_start(int *no_way_out)
 	 * is updated before mce_callin.
 	 */
 	order = atomic_inc_return(&mce_callin);
+	cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
 
 	/*
 	 * Wait for everyone.
@@ -1114,6 +1125,7 @@ static int mce_end(int order)
 reset:
 	atomic_set(&global_nwo, 0);
 	atomic_set(&mce_callin, 0);
+	cpumask_setall(&mce_missing_cpus);
 	barrier();
 
 	/*
@@ -2178,7 +2190,6 @@ __setup("mce", mcheck_enable);
 
 int __init mcheck_init(void)
 {
-	mcheck_intel_therm_init();
 	mce_register_decode_chain(&early_nb);
 	mce_register_decode_chain(&mce_uc_nb);
 	mce_register_decode_chain(&mce_default_nb);
@@ -2713,6 +2724,7 @@ static void mce_reset(void)
 	atomic_set(&mce_executing, 0);
 	atomic_set(&mce_callin, 0);
 	atomic_set(&global_nwo, 0);
+	cpumask_setall(&mce_missing_cpus);
 }
 
 static int fake_panic_get(void *data, u64 *val)
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index c2476fe0682e..e309476743b7 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -531,7 +531,6 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
 
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
-	intel_init_thermal(c);
 	intel_init_cmci();
 	intel_init_lmce();
 	intel_ppin_init(c);
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
deleted file mode 100644
index a7cd2d203ced..000000000000
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ /dev/null
@@ -1,739 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Thermal throttle event support code (such as syslog messaging and rate
- * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
- *
- * This allows consistent reporting of CPU thermal throttle events.
- *
- * Maintains a counter in /sys that keeps track of the number of thermal
- * events, such that the user knows how bad the thermal problem might be
- * (since the logging to syslog is rate limited).
- *
- * Author: Dmitriy Zavin (dmitriyz@google.com)
- *
- * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
- *          Inspired by Ross Biro's and Al Borchers' counter code.
- */
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/cpu.h>
-
-#include <asm/processor.h>
-#include <asm/traps.h>
-#include <asm/apic.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-#include <asm/trace/irq_vectors.h>
-
-#include "internal.h"
-
-/* How long to wait between reporting thermal events */
-#define CHECK_INTERVAL		(300 * HZ)
-
-#define THERMAL_THROTTLING_EVENT	0
-#define POWER_LIMIT_EVENT		1
-
-/**
- * struct _thermal_state - Represent the current thermal event state
- * @next_check:			Stores the next timestamp, when it is allowed
- *				to log the next warning message.
- * @last_interrupt_time:	Stores the timestamp for the last threshold
- *				high event.
- * @therm_work:			Delayed workqueue structure
- * @count:			Stores the current running count for thermal
- *				or power threshold interrupts.
- * @last_count:			Stores the previous running count for thermal
- *				or power threshold interrupts.
- * @max_time_ms:		This shows the maximum amount of time CPU was
- *				in throttled state for a single thermal
- *				threshold high to low state.
- * @total_time_ms:		This is a cumulative time during which CPU was
- *				in the throttled state.
- * @rate_control_active:	Set when a throttling message is logged.
- *				This is used for the purpose of rate-control.
- * @new_event:			Stores the last high/low status of the
- *				THERM_STATUS_PROCHOT or
- *				THERM_STATUS_POWER_LIMIT.
- * @level:			Stores whether this _thermal_state instance is
- *				for a CORE level or for PACKAGE level.
- * @sample_index:		Index for storing the next sample in the buffer
- *				temp_samples[].
- * @sample_count:		Total number of samples collected in the buffer
- *				temp_samples[].
- * @average:			The last moving average of temperature samples
- * @baseline_temp:		Temperature at which thermal threshold high
- *				interrupt was generated.
- * @temp_samples:		Storage for temperature samples to calculate
- *				moving average.
- *
- * This structure is used to represent data related to thermal state for a CPU.
- * There is a separate storage for core and package level for each CPU.
- */
-struct _thermal_state {
-	u64			next_check;
-	u64			last_interrupt_time;
-	struct delayed_work	therm_work;
-	unsigned long		count;
-	unsigned long		last_count;
-	unsigned long		max_time_ms;
-	unsigned long		total_time_ms;
-	bool			rate_control_active;
-	bool			new_event;
-	u8			level;
-	u8			sample_index;
-	u8			sample_count;
-	u8			average;
-	u8			baseline_temp;
-	u8			temp_samples[3];
-};
-
-struct thermal_state {
-	struct _thermal_state core_throttle;
-	struct _thermal_state core_power_limit;
-	struct _thermal_state package_throttle;
-	struct _thermal_state package_power_limit;
-	struct _thermal_state core_thresh0;
-	struct _thermal_state core_thresh1;
-	struct _thermal_state pkg_thresh0;
-	struct _thermal_state pkg_thresh1;
-};
-
-/* Callback to handle core threshold interrupts */
-int (*platform_thermal_notify)(__u64 msr_val);
-EXPORT_SYMBOL(platform_thermal_notify);
-
-/* Callback to handle core package threshold_interrupts */
-int (*platform_thermal_package_notify)(__u64 msr_val);
-EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
-
-/* Callback support of rate control, return true, if
- * callback has rate control */
-bool (*platform_thermal_package_rate_control)(void);
-EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
-
-
-static DEFINE_PER_CPU(struct thermal_state, thermal_state);
-
-static atomic_t therm_throt_en	= ATOMIC_INIT(0);
-
-static u32 lvtthmr_init __read_mostly;
-
-#ifdef CONFIG_SYSFS
-#define define_therm_throt_device_one_ro(_name)				\
-	static DEVICE_ATTR(_name, 0444,					\
-			   therm_throt_device_show_##_name,		\
-				   NULL)				\
-
-#define define_therm_throt_device_show_func(event, name)		\
-									\
-static ssize_t therm_throt_device_show_##event##_##name(		\
-			struct device *dev,				\
-			struct device_attribute *attr,			\
-			char *buf)					\
-{									\
-	unsigned int cpu = dev->id;					\
-	ssize_t ret;							\
-									\
-	preempt_disable();	/* CPU hotplug */			\
-	if (cpu_online(cpu)) {						\
-		ret = sprintf(buf, "%lu\n",				\
-			      per_cpu(thermal_state, cpu).event.name);	\
-	} else								\
-		ret = 0;						\
-	preempt_enable();						\
-									\
-	return ret;							\
-}
-
-define_therm_throt_device_show_func(core_throttle, count);
-define_therm_throt_device_one_ro(core_throttle_count);
-
-define_therm_throt_device_show_func(core_power_limit, count);
-define_therm_throt_device_one_ro(core_power_limit_count);
-
-define_therm_throt_device_show_func(package_throttle, count);
-define_therm_throt_device_one_ro(package_throttle_count);
-
-define_therm_throt_device_show_func(package_power_limit, count);
-define_therm_throt_device_one_ro(package_power_limit_count);
-
-define_therm_throt_device_show_func(core_throttle, max_time_ms);
-define_therm_throt_device_one_ro(core_throttle_max_time_ms);
-
-define_therm_throt_device_show_func(package_throttle, max_time_ms);
-define_therm_throt_device_one_ro(package_throttle_max_time_ms);
-
-define_therm_throt_device_show_func(core_throttle, total_time_ms);
-define_therm_throt_device_one_ro(core_throttle_total_time_ms);
-
-define_therm_throt_device_show_func(package_throttle, total_time_ms);
-define_therm_throt_device_one_ro(package_throttle_total_time_ms);
-
-static struct attribute *thermal_throttle_attrs[] = {
-	&dev_attr_core_throttle_count.attr,
-	&dev_attr_core_throttle_max_time_ms.attr,
-	&dev_attr_core_throttle_total_time_ms.attr,
-	NULL
-};
-
-static const struct attribute_group thermal_attr_group = {
-	.attrs	= thermal_throttle_attrs,
-	.name	= "thermal_throttle"
-};
-#endif /* CONFIG_SYSFS */
-
-#define CORE_LEVEL	0
-#define PACKAGE_LEVEL	1
-
-#define THERM_THROT_POLL_INTERVAL	HZ
-#define THERM_STATUS_PROCHOT_LOG	BIT(1)
-
-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
-#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
-
-static void clear_therm_status_log(int level)
-{
-	int msr;
-	u64 mask, msr_val;
-
-	if (level == CORE_LEVEL) {
-		msr  = MSR_IA32_THERM_STATUS;
-		mask = THERM_STATUS_CLEAR_CORE_MASK;
-	} else {
-		msr  = MSR_IA32_PACKAGE_THERM_STATUS;
-		mask = THERM_STATUS_CLEAR_PKG_MASK;
-	}
-
-	rdmsrl(msr, msr_val);
-	msr_val &= mask;
-	wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
-}
-
-static void get_therm_status(int level, bool *proc_hot, u8 *temp)
-{
-	int msr;
-	u64 msr_val;
-
-	if (level == CORE_LEVEL)
-		msr = MSR_IA32_THERM_STATUS;
-	else
-		msr = MSR_IA32_PACKAGE_THERM_STATUS;
-
-	rdmsrl(msr, msr_val);
-	if (msr_val & THERM_STATUS_PROCHOT_LOG)
-		*proc_hot = true;
-	else
-		*proc_hot = false;
-
-	*temp = (msr_val >> 16) & 0x7F;
-}
-
-static void __maybe_unused throttle_active_work(struct work_struct *work)
-{
-	struct _thermal_state *state = container_of(to_delayed_work(work),
-						struct _thermal_state, therm_work);
-	unsigned int i, avg, this_cpu = smp_processor_id();
-	u64 now = get_jiffies_64();
-	bool hot;
-	u8 temp;
-
-	get_therm_status(state->level, &hot, &temp);
-	/* temperature value is offset from the max so lesser means hotter */
-	if (!hot && temp > state->baseline_temp) {
-		if (state->rate_control_active)
-			pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
-				this_cpu,
-				state->level == CORE_LEVEL ? "Core" : "Package",
-				state->count);
-
-		state->rate_control_active = false;
-		return;
-	}
-
-	if (time_before64(now, state->next_check) &&
-			  state->rate_control_active)
-		goto re_arm;
-
-	state->next_check = now + CHECK_INTERVAL;
-
-	if (state->count != state->last_count) {
-		/* There was one new thermal interrupt */
-		state->last_count = state->count;
-		state->average = 0;
-		state->sample_count = 0;
-		state->sample_index = 0;
-	}
-
-	state->temp_samples[state->sample_index] = temp;
-	state->sample_count++;
-	state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
-	if (state->sample_count < ARRAY_SIZE(state->temp_samples))
-		goto re_arm;
-
-	avg = 0;
-	for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
-		avg += state->temp_samples[i];
-
-	avg /= ARRAY_SIZE(state->temp_samples);
-
-	if (state->average > avg) {
-		pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
-			this_cpu,
-			state->level == CORE_LEVEL ? "Core" : "Package",
-			state->count);
-		state->rate_control_active = true;
-	}
-
-	state->average = avg;
-
-re_arm:
-	clear_therm_status_log(state->level);
-	schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
-}
-
-/***
- * therm_throt_process - Process thermal throttling event from interrupt
- * @curr: Whether the condition is current or not (boolean), since the
- *        thermal interrupt normally gets called both when the thermal
- *        event begins and once the event has ended.
- *
- * This function is called by the thermal interrupt after the
- * IRQ has been acknowledged.
- *
- * It will take care of rate limiting and printing messages to the syslog.
- */
-static void therm_throt_process(bool new_event, int event, int level)
-{
-	struct _thermal_state *state;
-	unsigned int this_cpu = smp_processor_id();
-	bool old_event;
-	u64 now;
-	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
-
-	now = get_jiffies_64();
-	if (level == CORE_LEVEL) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			state = &pstate->core_throttle;
-		else if (event == POWER_LIMIT_EVENT)
-			state = &pstate->core_power_limit;
-		else
-			return;
-	} else if (level == PACKAGE_LEVEL) {
-		if (event == THERMAL_THROTTLING_EVENT)
-			state = &pstate->package_throttle;
-		else if (event == POWER_LIMIT_EVENT)
-			state = &pstate->package_power_limit;
-		else
-			return;
-	} else
-		return;
-
-	old_event = state->new_event;
-	state->new_event = new_event;
-
-	if (new_event)
-		state->count++;
-
-	if (event != THERMAL_THROTTLING_EVENT)
-		return;
-
-	if (new_event && !state->last_interrupt_time) {
-		bool hot;
-		u8 temp;
-
-		get_therm_status(state->level, &hot, &temp);
-		/*
-		 * Ignore short temperature spike as the system is not close
-		 * to PROCHOT. 10C offset is large enough to ignore. It is
-		 * already dropped from the high threshold temperature.
-		 */
-		if (temp > 10)
-			return;
-
-		state->baseline_temp = temp;
-		state->last_interrupt_time = now;
-		schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
-	} else if (old_event && state->last_interrupt_time) {
-		unsigned long throttle_time;
-
-		throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
-		if (throttle_time > state->max_time_ms)
-			state->max_time_ms = throttle_time;
-		state->total_time_ms += throttle_time;
-		state->last_interrupt_time = 0;
-	}
-}
-
-static int thresh_event_valid(int level, int event)
-{
-	struct _thermal_state *state;
-	unsigned int this_cpu = smp_processor_id();
-	struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
-	u64 now = get_jiffies_64();
-
-	if (level == PACKAGE_LEVEL)
-		state = (event == 0) ? &pstate->pkg_thresh0 :
-						&pstate->pkg_thresh1;
-	else
-		state = (event == 0) ? &pstate->core_thresh0 :
-						&pstate->core_thresh1;
-
-	if (time_before64(now, state->next_check))
-		return 0;
-
-	state->next_check = now + CHECK_INTERVAL;
-
-	return 1;
-}
-
-static bool int_pln_enable;
-static int __init int_pln_enable_setup(char *s)
-{
-	int_pln_enable = true;
-
-	return 1;
-}
-__setup("int_pln_enable", int_pln_enable_setup);
-
-#ifdef CONFIG_SYSFS
-/* Add/Remove thermal_throttle interface for CPU device: */
-static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
-{
-	int err;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
-	if (err)
-		return err;
-
-	if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
-		err = sysfs_add_file_to_group(&dev->kobj,
-					      &dev_attr_core_power_limit_count.attr,
-					      thermal_attr_group.name);
-		if (err)
-			goto del_group;
-	}
-
-	if (cpu_has(c, X86_FEATURE_PTS)) {
-		err = sysfs_add_file_to_group(&dev->kobj,
-					      &dev_attr_package_throttle_count.attr,
-					      thermal_attr_group.name);
-		if (err)
-			goto del_group;
-
-		err = sysfs_add_file_to_group(&dev->kobj,
-					      &dev_attr_package_throttle_max_time_ms.attr,
-					      thermal_attr_group.name);
-		if (err)
-			goto del_group;
-
-		err = sysfs_add_file_to_group(&dev->kobj,
-					      &dev_attr_package_throttle_total_time_ms.attr,
-					      thermal_attr_group.name);
-		if (err)
-			goto del_group;
-
-		if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
-			err = sysfs_add_file_to_group(&dev->kobj,
-					&dev_attr_package_power_limit_count.attr,
-					thermal_attr_group.name);
-			if (err)
-				goto del_group;
-		}
-	}
-
-	return 0;
-
-del_group:
-	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
-
-	return err;
-}
-
-static void thermal_throttle_remove_dev(struct device *dev)
-{
-	sysfs_remove_group(&dev->kobj, &thermal_attr_group);
-}
-
-/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int thermal_throttle_online(unsigned int cpu)
-{
-	struct thermal_state *state = &per_cpu(thermal_state, cpu);
-	struct device *dev = get_cpu_device(cpu);
-	u32 l;
-
-	state->package_throttle.level = PACKAGE_LEVEL;
-	state->core_throttle.level = CORE_LEVEL;
-
-	INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
-	INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
-
-	/* Unmask the thermal vector after the above workqueues are initialized. */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
-
-	return thermal_throttle_add_dev(dev, cpu);
-}
-
-static int thermal_throttle_offline(unsigned int cpu)
-{
-	struct thermal_state *state = &per_cpu(thermal_state, cpu);
-	struct device *dev = get_cpu_device(cpu);
-	u32 l;
-
-	/* Mask the thermal vector before draining evtl. pending work */
-	l = apic_read(APIC_LVTTHMR);
-	apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
-
-	cancel_delayed_work_sync(&state->package_throttle.therm_work);
-	cancel_delayed_work_sync(&state->core_throttle.therm_work);
-
-	state->package_throttle.rate_control_active = false;
-	state->core_throttle.rate_control_active = false;
-
-	thermal_throttle_remove_dev(dev);
-	return 0;
-}
-
-static __init int thermal_throttle_init_device(void)
-{
-	int ret;
-
-	if (!atomic_read(&therm_throt_en))
-		return 0;
-
-	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
-				thermal_throttle_online,
-				thermal_throttle_offline);
-	return ret < 0 ? ret : 0;
-}
-device_initcall(thermal_throttle_init_device);
-
-#endif /* CONFIG_SYSFS */
-
-static void notify_package_thresholds(__u64 msr_val)
-{
-	bool notify_thres_0 = false;
-	bool notify_thres_1 = false;
-
-	if (!platform_thermal_package_notify)
-		return;
-
-	/* lower threshold check */
-	if (msr_val & THERM_LOG_THRESHOLD0)
-		notify_thres_0 = true;
-	/* higher threshold check */
-	if (msr_val & THERM_LOG_THRESHOLD1)
-		notify_thres_1 = true;
-
-	if (!notify_thres_0 && !notify_thres_1)
-		return;
-
-	if (platform_thermal_package_rate_control &&
-		platform_thermal_package_rate_control()) {
-		/* Rate control is implemented in callback */
-		platform_thermal_package_notify(msr_val);
-		return;
-	}
-
-	/* lower threshold reached */
-	if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
-		platform_thermal_package_notify(msr_val);
-	/* higher threshold reached */
-	if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
-		platform_thermal_package_notify(msr_val);
-}
-
-static void notify_thresholds(__u64 msr_val)
-{
-	/* check whether the interrupt handler is defined;
-	 * otherwise simply return
-	 */
-	if (!platform_thermal_notify)
-		return;
-
-	/* lower threshold reached */
-	if ((msr_val & THERM_LOG_THRESHOLD0) &&
-			thresh_event_valid(CORE_LEVEL, 0))
-		platform_thermal_notify(msr_val);
-	/* higher threshold reached */
-	if ((msr_val & THERM_LOG_THRESHOLD1) &&
-			thresh_event_valid(CORE_LEVEL, 1))
-		platform_thermal_notify(msr_val);
-}
-
-/* Thermal transition interrupt handler */
-static void intel_thermal_interrupt(void)
-{
-	__u64 msr_val;
-
-	if (static_cpu_has(X86_FEATURE_HWP))
-		wrmsrl_safe(MSR_HWP_STATUS, 0);
-
-	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
-
-	/* Check for violation of core thermal thresholds*/
-	notify_thresholds(msr_val);
-
-	therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
-			    THERMAL_THROTTLING_EVENT,
-			    CORE_LEVEL);
-
-	if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
-		therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
-					POWER_LIMIT_EVENT,
-					CORE_LEVEL);
-
-	if (this_cpu_has(X86_FEATURE_PTS)) {
-		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
-		/* check violations of package thermal thresholds */
-		notify_package_thresholds(msr_val);
-		therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
-					THERMAL_THROTTLING_EVENT,
-					PACKAGE_LEVEL);
-		if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
-			therm_throt_process(msr_val &
-					PACKAGE_THERM_STATUS_POWER_LIMIT,
-					POWER_LIMIT_EVENT,
-					PACKAGE_LEVEL);
-	}
-}
-
-static void unexpected_thermal_interrupt(void)
-{
-	pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
-		smp_processor_id());
-}
-
-static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-
-DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
-{
-	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
-	inc_irq_stat(irq_thermal_count);
-	smp_thermal_vector();
-	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
-	ack_APIC_irq();
-}
-
-/* Thermal monitoring depends on APIC, ACPI and clock modulation */
-static int intel_thermal_supported(struct cpuinfo_x86 *c)
-{
-	if (!boot_cpu_has(X86_FEATURE_APIC))
-		return 0;
-	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
-		return 0;
-	return 1;
-}
-
-void __init mcheck_intel_therm_init(void)
-{
-	/*
-	 * This function is only called on boot CPU. Save the init thermal
-	 * LVT value on BSP and use that value to restore APs' thermal LVT
-	 * entry BIOS programmed later
-	 */
-	if (intel_thermal_supported(&boot_cpu_data))
-		lvtthmr_init = apic_read(APIC_LVTTHMR);
-}
-
-void intel_init_thermal(struct cpuinfo_x86 *c)
-{
-	unsigned int cpu = smp_processor_id();
-	int tm2 = 0;
-	u32 l, h;
-
-	if (!intel_thermal_supported(c))
-		return;
-
-	/*
-	 * First check if its enabled already, in which case there might
-	 * be some SMM goo which handles it, so we can't even put a handler
-	 * since it might be delivered via SMI already:
-	 */
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-
-	h = lvtthmr_init;
-	/*
-	 * The initial value of thermal LVT entries on all APs always reads
-	 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
-	 * sequence to them and LVT registers are reset to 0s except for
-	 * the mask bits which are set to 1s when APs receive INIT IPI.
-	 * If BIOS takes over the thermal interrupt and sets its interrupt
-	 * delivery mode to SMI (not fixed), it restores the value that the
-	 * BIOS has programmed on AP based on BSP's info we saved since BIOS
-	 * is always setting the same value for all threads/cores.
-	 */
-	if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
-		apic_write(APIC_LVTTHMR, lvtthmr_init);
-
-
-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
-		if (system_state == SYSTEM_BOOTING)
-			pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
-		return;
-	}
-
-	/* early Pentium M models use different method for enabling TM2 */
-	if (cpu_has(c, X86_FEATURE_TM2)) {
-		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
-			rdmsr(MSR_THERM2_CTL, l, h);
-			if (l & MSR_THERM2_CTL_TM_SELECT)
-				tm2 = 1;
-		} else if (l & MSR_IA32_MISC_ENABLE_TM2)
-			tm2 = 1;
-	}
-
-	/* We'll mask the thermal vector in the lapic till we're ready: */
-	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
-	apic_write(APIC_LVTTHMR, h);
-
-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
-	if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
-		wrmsr(MSR_IA32_THERM_INTERRUPT,
-			(l | (THERM_INT_LOW_ENABLE
-			| THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
-	else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
-		wrmsr(MSR_IA32_THERM_INTERRUPT,
-			l | (THERM_INT_LOW_ENABLE
-			| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
-	else
-		wrmsr(MSR_IA32_THERM_INTERRUPT,
-		      l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
-
-	if (cpu_has(c, X86_FEATURE_PTS)) {
-		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
-		if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
-			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-				(l | (PACKAGE_THERM_INT_LOW_ENABLE
-				| PACKAGE_THERM_INT_HIGH_ENABLE))
-				& ~PACKAGE_THERM_INT_PLN_ENABLE, h);
-		else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
-			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-				l | (PACKAGE_THERM_INT_LOW_ENABLE
-				| PACKAGE_THERM_INT_HIGH_ENABLE
-				| PACKAGE_THERM_INT_PLN_ENABLE), h);
-		else
-			wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
-			      l | (PACKAGE_THERM_INT_LOW_ENABLE
-				| PACKAGE_THERM_INT_HIGH_ENABLE), h);
-	}
-
-	smp_thermal_vector = intel_thermal_interrupt;
-
-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
-
-	pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
-		      tm2 ? "TM2" : "TM1");
-
-	/* enable thermal throttle processing */
-	atomic_set(&therm_throt_en, 1);
-}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index ec6f0415bc6d..b935e1b5f115 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -830,7 +830,7 @@ static const struct attribute_group cpu_root_microcode_group = {
 	.attrs = cpu_root_microcode_attrs,
 };
 
-int __init microcode_init(void)
+static int __init microcode_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 	int error;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 5bd011737272..9231640782fa 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
 		if (!size_base)
 			continue;
 
-		size_base = to_size_factor(size_base, &size_factor),
+		size_base = to_size_factor(size_base, &size_factor);
 		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
-		start_base = to_size_factor(start_base, &start_factor),
+		start_base = to_size_factor(start_base, &start_factor);
 		type = range_state[i].type;
 
 		pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index a29997e6cf9e..b90f3f437765 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -3,7 +3,6 @@
  * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
  * because MTRRs can span up to 40 bits (36bits on most modern x86)
  */
-#define DEBUG
 
 #include <linux/export.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 61eb26edc6d2..28c8a23aa42e 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -31,8 +31,6 @@
     System Programming Guide; Section 9.11. (1997 edition - PPro).
 */
 
-#define DEBUG
-
 #include <linux/types.h> /* FIXME: kvm_para.h needs this */
 
 #include <linux/stop_machine.h>
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index a5ee607a3b89..3ef5868ac588 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -3,7 +3,7 @@
  * local apic based NMI watchdog for various CPUs.
  *
  * This file also handles reservation of performance counters for coordination
- * with other users (like oprofile).
+ * with other users.
  *
  * Note that these events normally don't tick when the CPU idles. This means
  * the frequency varies with CPU load.
@@ -105,15 +105,6 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 
 }
 
-/* checks for a bit availability (hack for oprofile) */
-int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
-{
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
-	return !test_bit(counter, perfctr_nmi_owner);
-}
-EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
-
 int reserve_perfctr_nmi(unsigned int msr)
 {
 	unsigned int counter;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index ee71c47844cb..c4d320d02fd5 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -572,6 +572,7 @@ union cpuid_0x10_x_edx {
 
 void rdt_last_cmd_clear(void);
 void rdt_last_cmd_puts(const char *s);
+__printf(1, 2)
 void rdt_last_cmd_printf(const char *fmt, ...);
 
 void rdt_ctrl_update(void *arg);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 460f3e0df106..f9190adc52cb 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -563,11 +563,11 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 	 */
 
 	if (rdtgrp->type == RDTCTRL_GROUP) {
-		tsk->closid = rdtgrp->closid;
-		tsk->rmid = rdtgrp->mon.rmid;
+		WRITE_ONCE(tsk->closid, rdtgrp->closid);
+		WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 	} else if (rdtgrp->type == RDTMON_GROUP) {
 		if (rdtgrp->mon.parent->closid == tsk->closid) {
-			tsk->rmid = rdtgrp->mon.rmid;
+			WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
 		} else {
 			rdt_last_cmd_puts("Can't move task to different control group\n");
 			return -EINVAL;
@@ -2310,22 +2310,18 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
 	for_each_process_thread(p, t) {
 		if (!from || is_closid_match(t, from) ||
 		    is_rmid_match(t, from)) {
-			t->closid = to->closid;
-			t->rmid = to->mon.rmid;
+			WRITE_ONCE(t->closid, to->closid);
+			WRITE_ONCE(t->rmid, to->mon.rmid);
 
-#ifdef CONFIG_SMP
 			/*
-			 * This is safe on x86 w/o barriers as the ordering
-			 * of writing to task_cpu() and t->on_cpu is
-			 * reverse to the reading here. The detection is
-			 * inaccurate as tasks might move or schedule
-			 * before the smp function call takes place. In
-			 * such a case the function call is pointless, but
+			 * If the task is on a CPU, set the CPU in the mask.
+			 * The detection is inaccurate as tasks might move or
+			 * schedule before the smp function call takes place.
+			 * In such a case the function call is pointless, but
 			 * there is no other side effect.
 			 */
-			if (mask && t->on_cpu)
+			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
 				cpumask_set_cpu(task_cpu(t), mask);
-#endif
 		}
 	}
 	read_unlock(&tasklist_lock);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 236924930bf0..972ec3bfa9c0 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,11 +40,6 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
 	{ X86_FEATURE_MBA,		CPUID_EBX,  6, 0x80000008, 0 },
-	{ X86_FEATURE_SME,		CPUID_EAX,  0, 0x8000001f, 0 },
-	{ X86_FEATURE_SEV,		CPUID_EAX,  1, 0x8000001f, 0 },
-	{ X86_FEATURE_SEV_ES,		CPUID_EAX,  3, 0x8000001f, 0 },
-	{ X86_FEATURE_SME_COHERENT,	CPUID_EAX, 10, 0x8000001f, 0 },
-	{ X86_FEATURE_VM_PAGE_FLUSH,	CPUID_EAX,  2, 0x8000001f, 0 },
 	{ 0, 0, 0, 0, 0 }
 };
 
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index f65564a94b9b..7449ef33f081 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -141,7 +141,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
 	struct sgx_encl_page *entry;
 	unsigned long phys_addr;
 	struct sgx_encl *encl;
-	unsigned long pfn;
 	vm_fault_t ret;
 
 	encl = vma->vm_private_data;
@@ -168,13 +167,6 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
 
 	phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
 
-	/* Check if another thread got here first to insert the PTE. */
-	if (!follow_pfn(vma, addr, &pfn)) {
-		mutex_unlock(&encl->lock);
-
-		return VM_FAULT_NOPAGE;
-	}
-
 	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
 	if (ret != VM_FAULT_NOPAGE) {
 		mutex_unlock(&encl->lock);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index c519fc5f6948..8df81a3ed945 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -700,25 +700,27 @@ static bool __init sgx_page_cache_init(void)
 	return true;
 }
 
-static void __init sgx_init(void)
+static int __init sgx_init(void)
 {
 	int ret;
 	int i;
 
 	if (!cpu_feature_enabled(X86_FEATURE_SGX))
-		return;
+		return -ENODEV;
 
 	if (!sgx_page_cache_init())
-		return;
+		return -ENOMEM;
 
-	if (!sgx_page_reclaimer_init())
+	if (!sgx_page_reclaimer_init()) {
+		ret = -ENOMEM;
 		goto err_page_cache;
+	}
 
 	ret = sgx_drv_init();
 	if (ret)
 		goto err_kthread;
 
-	return;
+	return 0;
 
 err_kthread:
 	kthread_stop(ksgxd_tsk);
@@ -728,6 +730,8 @@ err_page_cache:
 		vfree(sgx_epc_sections[i].pages);
 		memunmap(sgx_epc_sections[i].virt_addr);
 	}
+
+	return ret;
 }
 
 device_initcall(sgx_init);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 5d8047441a0a..683749b80ae2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -167,14 +167,14 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 		fx->fop = 0;
 		fx->rip = 0;
 		fx->rdp = 0;
-		memset(&fx->st_space[0], 0, 128);
+		memset(fx->st_space, 0, sizeof(fx->st_space));
 	}
 
 	/*
 	 * SSE is in init state
 	 */
 	if (!(xfeatures & XFEATURE_MASK_SSE))
-		memset(&fx->xmm_space[0], 0, 256);
+		memset(fx->xmm_space, 0, sizeof(fx->xmm_space));
 
 	/*
 	 * First two features are FPU and SSE, which above we handled
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c5dd50369e2f..d4ad344e80bf 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -21,6 +21,7 @@
 #include <asm/hw_irq.h>
 #include <asm/desc.h>
 #include <asm/traps.h>
+#include <asm/thermal.h>
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/irq_vectors.h>
@@ -374,3 +375,23 @@ void fixup_irqs(void)
 	}
 }
 #endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+static void smp_thermal_vector(void)
+{
+	if (x86_thermal_enabled())
+		intel_thermal_interrupt();
+	else
+		pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+		       smp_processor_id());
+}
+
+DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
+{
+	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+	inc_irq_stat(irq_thermal_count);
+	smp_thermal_vector();
+	trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+	ack_APIC_irq();
+}
+#endif
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 0db0375235b4..8ef35063964b 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -13,14 +13,3 @@ SYM_FUNC_START(native_save_fl)
 	ret
 SYM_FUNC_END(native_save_fl)
 EXPORT_SYMBOL(native_save_fl)
-
-/*
- * void native_restore_fl(unsigned long flags)
- * %eax/%rdi: flags
- */
-SYM_FUNC_START(native_restore_fl)
-	push %_ASM_ARG1
-	popf
-	ret
-SYM_FUNC_END(native_restore_fl)
-EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b8aee71840ae..aa15132228da 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -398,9 +398,15 @@ static void free_ldt_pgtables(struct mm_struct *mm)
 	if (!boot_cpu_has(X86_FEATURE_PTI))
 		return;
 
-	tlb_gather_mmu(&tlb, mm, start, end);
+	/*
+	 * Although free_pgd_range() is intended for freeing user
+	 * page-tables, it also works out for kernel mappings on x86.
+	 * We use tlb_gather_mmu_fullmm() to avoid confusing the
+	 * range-tracking logic in __tlb_adjust_range().
+	 */
+	tlb_gather_mmu_fullmm(&tlb, mm);
 	free_pgd_range(&tlb, start, end, start, end);
-	tlb_finish_mmu(&tlb, start, end);
+	tlb_finish_mmu(&tlb);
 #endif
 }
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 34b153cbd4ac..5e9a34b5bd74 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -114,6 +114,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
 			*location += sym->st_value;
 			break;
 		case R_386_PC32:
+		case R_386_PLT32:
 			/* Add the value, subtract its position */
 			*location += sym->st_value - (uint32_t)location;
 			break;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 8a67d1fa8dc5..ed8ac6bcbafb 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -182,6 +182,13 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
 		err = security_locked_down(LOCKDOWN_MSR);
 		if (err)
 			break;
+
+		err = filter_write(regs[1]);
+		if (err)
+			return err;
+
+		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+
 		err = wrmsr_safe_regs_on_cpu(cpu, regs);
 		if (err)
 			break;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 6c3407ba6ee9..c60222ab8ab9 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -135,8 +135,7 @@ unsigned paravirt_patch_default(u8 type, void *insn_buff,
 	else if (opfunc == _paravirt_ident_64)
 		ret = paravirt_patch_ident_64(insn_buff, len);
 
-	else if (type == PARAVIRT_PATCH(cpu.iret) ||
-		 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
+	else if (type == PARAVIRT_PATCH(cpu.iret))
 		/* If operation requires a jmp, then jmp */
 		ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
 #endif
@@ -170,7 +169,6 @@ static u64 native_steal_clock(int cpu)
 
 /* These are in entry.S */
 extern void native_iret(void);
-extern void native_usergs_sysret64(void);
 
 static struct resource reserve_ioports = {
 	.start = 0,
@@ -310,9 +308,7 @@ struct paravirt_patch_template pv_ops = {
 
 	.cpu.load_sp0		= native_load_sp0,
 
-	.cpu.usergs_sysret64	= native_usergs_sysret64,
 	.cpu.iret		= native_iret,
-	.cpu.swapgs		= native_swapgs,
 
 #ifdef CONFIG_X86_IOPL_IOPERM
 	.cpu.invalidate_io_bitmap	= native_tss_invalidate_io_bitmap,
@@ -324,7 +320,6 @@ struct paravirt_patch_template pv_ops = {
 
 	/* Irq ops. */
 	.irq.save_fl		= __PV_IS_CALLEE_SAVE(native_save_fl),
-	.irq.restore_fl		= __PV_IS_CALLEE_SAVE(native_restore_fl),
 	.irq.irq_disable	= __PV_IS_CALLEE_SAVE(native_irq_disable),
 	.irq.irq_enable		= __PV_IS_CALLEE_SAVE(native_irq_enable),
 	.irq.safe_halt		= native_safe_halt,
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index ace6e334cb39..abd27ec67397 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -25,10 +25,7 @@ struct patch_xxl {
 	const unsigned char	mmu_read_cr2[3];
 	const unsigned char	mmu_read_cr3[3];
 	const unsigned char	mmu_write_cr3[3];
-	const unsigned char	irq_restore_fl[2];
 	const unsigned char	cpu_wbinvd[2];
-	const unsigned char	cpu_usergs_sysret64[6];
-	const unsigned char	cpu_swapgs[3];
 	const unsigned char	mov64[3];
 };
 
@@ -39,11 +36,7 @@ static const struct patch_xxl patch_data_xxl = {
 	.mmu_read_cr2		= { 0x0f, 0x20, 0xd0 },	// mov %cr2, %[re]ax
 	.mmu_read_cr3		= { 0x0f, 0x20, 0xd8 },	// mov %cr3, %[re]ax
 	.mmu_write_cr3		= { 0x0f, 0x22, 0xdf },	// mov %rdi, %cr3
-	.irq_restore_fl		= { 0x57, 0x9d },	// push %rdi; popfq
 	.cpu_wbinvd		= { 0x0f, 0x09 },	// wbinvd
-	.cpu_usergs_sysret64	= { 0x0f, 0x01, 0xf8,
-				    0x48, 0x0f, 0x07 },	// swapgs; sysretq
-	.cpu_swapgs		= { 0x0f, 0x01, 0xf8 },	// swapgs
 	.mov64			= { 0x48, 0x89, 0xf8 },	// mov %rdi, %rax
 };
 
@@ -76,7 +69,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
 	switch (type) {
 
 #ifdef CONFIG_PARAVIRT_XXL
-	PATCH_CASE(irq, restore_fl, xxl, insn_buff, len);
 	PATCH_CASE(irq, save_fl, xxl, insn_buff, len);
 	PATCH_CASE(irq, irq_enable, xxl, insn_buff, len);
 	PATCH_CASE(irq, irq_disable, xxl, insn_buff, len);
@@ -85,8 +77,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr,
 	PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len);
 	PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len);
 
-	PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len);
-	PATCH_CASE(cpu, swapgs, xxl, insn_buff, len);
 	PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len);
 #endif
 
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 2e9006c1e240..42e92ec62973 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -4,9 +4,6 @@
 #include <linux/string.h>
 #include <linux/kallsyms.h>
 
-
-#define DEBUG 1
-
 static struct iommu_table_entry * __init
 find_dependents_of(struct iommu_table_entry *start,
 		   struct iommu_table_entry *finish,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index bedca011459c..87a4143aa7d7 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -704,6 +704,9 @@ void ptrace_disable(struct task_struct *child)
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 static const struct user_regset_view user_x86_32_view; /* Initialized below. */
 #endif
+#ifdef CONFIG_X86_64
+static const struct user_regset_view user_x86_64_view; /* Initialized below. */
+#endif
 
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
@@ -711,6 +714,14 @@ long arch_ptrace(struct task_struct *child, long request,
 	int ret;
 	unsigned long __user *datap = (unsigned long __user *)data;
 
+#ifdef CONFIG_X86_64
+	/* This is native 64-bit ptrace() */
+	const struct user_regset_view *regset_view = &user_x86_64_view;
+#else
+	/* This is native 32-bit ptrace() */
+	const struct user_regset_view *regset_view = &user_x86_32_view;
+#endif
+
 	switch (request) {
 	/* read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
@@ -749,28 +760,28 @@ long arch_ptrace(struct task_struct *child, long request,
 
 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
 		return copy_regset_to_user(child,
-					   task_user_regset_view(current),
+					   regset_view,
 					   REGSET_GENERAL,
 					   0, sizeof(struct user_regs_struct),
 					   datap);
 
 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
 		return copy_regset_from_user(child,
-					     task_user_regset_view(current),
+					     regset_view,
 					     REGSET_GENERAL,
 					     0, sizeof(struct user_regs_struct),
 					     datap);
 
 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
 		return copy_regset_to_user(child,
-					   task_user_regset_view(current),
+					   regset_view,
 					   REGSET_FP,
 					   0, sizeof(struct user_i387_struct),
 					   datap);
 
 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
 		return copy_regset_from_user(child,
-					     task_user_regset_view(current),
+					     regset_view,
 					     REGSET_FP,
 					     0, sizeof(struct user_i387_struct),
 					     datap);
@@ -1152,28 +1163,28 @@ static long x32_arch_ptrace(struct task_struct *child,
 
 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
 		return copy_regset_to_user(child,
-					   task_user_regset_view(current),
+					   &user_x86_64_view,
 					   REGSET_GENERAL,
 					   0, sizeof(struct user_regs_struct),
 					   datap);
 
 	case PTRACE_SETREGS:	/* Set all gp regs in the child. */
 		return copy_regset_from_user(child,
-					     task_user_regset_view(current),
+					     &user_x86_64_view,
 					     REGSET_GENERAL,
 					     0, sizeof(struct user_regs_struct),
 					     datap);
 
 	case PTRACE_GETFPREGS:	/* Get the child FPU state. */
 		return copy_regset_to_user(child,
-					   task_user_regset_view(current),
+					   &user_x86_64_view,
 					   REGSET_FP,
 					   0, sizeof(struct user_i387_struct),
 					   datap);
 
 	case PTRACE_SETFPREGS:	/* Set the child FPU state. */
 		return copy_regset_from_user(child,
-					     task_user_regset_view(current),
+					     &user_x86_64_view,
 					     REGSET_FP,
 					     0, sizeof(struct user_i387_struct),
 					     datap);
@@ -1309,6 +1320,25 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
 	xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
 }
 
+/*
+ * This is used by the core dump code to decide which regset to dump.  The
+ * core dump code writes out the resulting .e_machine and the corresponding
+ * regsets.  This is suboptimal if the task is messing around with its CS.L
+ * field, but at worst the core dump will end up missing some information.
+ *
+ * Unfortunately, it is also used by the broken PTRACE_GETREGSET and
+ * PTRACE_SETREGSET APIs.  These APIs look at the .regsets field but have
+ * no way to make sure that the e_machine they use matches the caller's
+ * expectations.  The result is that the data format returned by
+ * PTRACE_GETREGSET depends on the returned CS field (and even the offset
+ * of the returned CS field depends on its value!) and the data format
+ * accepted by PTRACE_SETREGSET is determined by the old CS value.  The
+ * upshot is that it is basically impossible to use these APIs correctly.
+ *
+ * The best way to fix it in the long run would probably be to add new
+ * improved ptrace() APIs to read and write registers reliably, possibly by
+ * allowing userspace to select the ELF e_machine variant that they expect.
+ */
 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 {
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index db115943e8bd..9991c5920aac 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -477,6 +477,15 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
 		},
 	},
 
+	{	/* PCIe Wifi card isn't detected after reboot otherwise */
+		.callback = set_pci_reboot,
+		.ident = "Zotac ZBOX CI327 nano",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "NA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"),
+		},
+	},
+
 	/* Sony */
 	{	/* Handle problems with rebooting on Sony VGN-Z540N */
 		.callback = set_bios_reboot,
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 504fa5425bce..660b78827638 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -90,14 +90,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, unsigned long, off)
 {
-	long error;
-	error = -EINVAL;
 	if (off & ~PAGE_MASK)
-		goto out;
+		return -EINVAL;
 
-	error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
-out:
-	return error;
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
 
 static void find_start_end(unsigned long addr, unsigned long flags,
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 764573de3996..e5a7a10a0164 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -134,7 +134,11 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 	unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
 	unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
 	unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
-	unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
+
+	/*
+	 * Don't write screen_bitmap in case some user had a value there
+	 * and expected it to remain unchanged.
+	 */
 
 	user_access_end();
 
@@ -160,49 +164,6 @@ Efault:
 	do_exit(SIGSEGV);
 }
 
-static void mark_screen_rdonly(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma;
-	spinlock_t *ptl;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	int i;
-
-	mmap_write_lock(mm);
-	pgd = pgd_offset(mm, 0xA0000);
-	if (pgd_none_or_clear_bad(pgd))
-		goto out;
-	p4d = p4d_offset(pgd, 0xA0000);
-	if (p4d_none_or_clear_bad(p4d))
-		goto out;
-	pud = pud_offset(p4d, 0xA0000);
-	if (pud_none_or_clear_bad(pud))
-		goto out;
-	pmd = pmd_offset(pud, 0xA0000);
-
-	if (pmd_trans_huge(*pmd)) {
-		vma = find_vma(mm, 0xA0000);
-		split_huge_pmd(vma, pmd, 0xA0000);
-	}
-	if (pmd_none_or_clear_bad(pmd))
-		goto out;
-	pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
-	for (i = 0; i < 32; i++) {
-		if (pte_present(*pte))
-			set_pte(pte, pte_wrprotect(*pte));
-		pte++;
-	}
-	pte_unmap_unlock(pte, ptl);
-out:
-	mmap_write_unlock(mm);
-	flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
-}
-
-
-
 static int do_vm86_irq_handling(int subfunction, int irqnumber);
 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
 
@@ -282,6 +243,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 			offsetof(struct vm86_struct, int_revectored)))
 		return -EFAULT;
 
+
+	/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
+	if (v.flags & VM86_SCREEN_BITMAP) {
+		char comm[TASK_COMM_LEN];
+
+		pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
+		return -EINVAL;
+	}
+
 	memset(&vm86regs, 0, sizeof(vm86regs));
 
 	vm86regs.pt.bx = v.regs.ebx;
@@ -302,7 +272,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	vm86regs.gs = v.regs.gs;
 
 	vm86->flags = v.flags;
-	vm86->screen_bitmap = v.screen_bitmap;
 	vm86->cpu_type = v.cpu_type;
 
 	if (copy_from_user(&vm86->int_revectored,
@@ -370,9 +339,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	update_task_stack(tsk);
 	preempt_enable();
 
-	if (vm86->flags & VM86_SCREEN_BITMAP)
-		mark_screen_rdonly(tsk->mm);
-
 	memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
 	return regs->ax;
 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f1f1b5a0956a..525197381baa 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -16,7 +16,7 @@
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
-#include <linux/efi.h>			/* efi_recover_from_page_fault()*/
+#include <linux/efi.h>			/* efi_crash_gracefully_on_page_fault()*/
 #include <linux/mm_types.h>
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
@@ -25,7 +25,7 @@
 #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
 #include <asm/vm86.h>			/* struct vm86			*/
 #include <asm/mmu_context.h>		/* vma_pkey()			*/
-#include <asm/efi.h>			/* efi_recover_from_page_fault()*/
+#include <asm/efi.h>			/* efi_crash_gracefully_on_page_fault()*/
 #include <asm/desc.h>			/* store_idt(), ...		*/
 #include <asm/cpu_entry_area.h>		/* exception stack		*/
 #include <asm/pgtable_areas.h>		/* VMALLOC_START, ...		*/
@@ -54,7 +54,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
  * 32-bit mode:
  *
  *   Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
- *   Check that here and ignore it.
+ *   Check that here and ignore it.  This is AMD erratum #91.
  *
  * 64-bit mode:
  *
@@ -83,11 +83,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
 #ifdef CONFIG_X86_64
 	case 0x40:
 		/*
-		 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
-		 * Need to figure out under what instruction mode the
-		 * instruction was issued. Could check the LDT for lm,
-		 * but for now it's good enough to assume that long
-		 * mode only uses well known segments or kernel.
+		 * In 64-bit mode 0x40..0x4F are valid REX prefixes
 		 */
 		return (!user_mode(regs) || user_64bit_mode(regs));
 #endif
@@ -110,6 +106,15 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
 	}
 }
 
+static bool is_amd_k8_pre_npt(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	return unlikely(IS_ENABLED(CONFIG_CPU_SUP_AMD) &&
+			c->x86_vendor == X86_VENDOR_AMD &&
+			c->x86 == 0xf && c->x86_model < 0x40);
+}
+
 static int
 is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 {
@@ -117,6 +122,10 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 	unsigned char *instr;
 	int prefetch = 0;
 
+	/* Erratum #91 affects AMD K8, pre-NPT CPUs */
+	if (!is_amd_k8_pre_npt())
+		return 0;
+
 	/*
 	 * If it was a exec (instruction fetch) fault on NX page, then
 	 * do not ignore the fault:
@@ -127,20 +136,31 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 	instr = (void *)convert_ip_to_linear(current, regs);
 	max_instr = instr + 15;
 
-	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
-		return 0;
+	/*
+	 * This code has historically always bailed out if IP points to a
+	 * not-present page (e.g. due to a race).  No one has ever
+	 * complained about this.
+	 */
+	pagefault_disable();
 
 	while (instr < max_instr) {
 		unsigned char opcode;
 
-		if (get_kernel_nofault(opcode, instr))
-			break;
+		if (user_mode(regs)) {
+			if (get_user(opcode, instr))
+				break;
+		} else {
+			if (get_kernel_nofault(opcode, instr))
+				break;
+		}
 
 		instr++;
 
 		if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
 			break;
 	}
+
+	pagefault_enable();
 	return prefetch;
 }
 
@@ -262,25 +282,6 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 	}
 }
 
-/*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
-static inline void
-check_v8086_mode(struct pt_regs *regs, unsigned long address,
-		 struct task_struct *tsk)
-{
-#ifdef CONFIG_VM86
-	unsigned long bit;
-
-	if (!v8086_mode(regs) || !tsk->thread.vm86)
-		return;
-
-	bit = (address - 0xA0000) >> PAGE_SHIFT;
-	if (bit < 32)
-		tsk->thread.vm86->screen_bitmap |= 1 << bit;
-#endif
-}
-
 static bool low_pfn(unsigned long pfn)
 {
 	return pfn < max_low_pfn;
@@ -335,15 +336,6 @@ KERN_ERR
 "******* Disabling USB legacy in the BIOS may also help.\n";
 #endif
 
-/*
- * No vm86 mode in 64-bit mode:
- */
-static inline void
-check_v8086_mode(struct pt_regs *regs, unsigned long address,
-		 struct task_struct *tsk)
-{
-}
-
 static int bad_address(void *p)
 {
 	unsigned long dummy;
@@ -427,6 +419,9 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 	    || boot_cpu_data.x86 != 0xf)
 		return 0;
 
+	if (user_mode(regs))
+		return 0;
+
 	if (address != regs->ip)
 		return 0;
 
@@ -462,10 +457,12 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
 }
 
 /* Pentium F0 0F C7 C8 bug workaround: */
-static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
+static int is_f00f_bug(struct pt_regs *regs, unsigned long error_code,
+		       unsigned long address)
 {
 #ifdef CONFIG_X86_F00F_BUG
-	if (boot_cpu_has_bug(X86_BUG_F00F) && idt_is_f00f_address(address)) {
+	if (boot_cpu_has_bug(X86_BUG_F00F) && !(error_code & X86_PF_USER) &&
+	    idt_is_f00f_address(address)) {
 		handle_invalid_op(regs);
 		return 1;
 	}
@@ -630,53 +627,20 @@ static void set_signal_archinfo(unsigned long address,
 }
 
 static noinline void
-no_context(struct pt_regs *regs, unsigned long error_code,
-	   unsigned long address, int signal, int si_code)
+page_fault_oops(struct pt_regs *regs, unsigned long error_code,
+		unsigned long address)
 {
-	struct task_struct *tsk = current;
 	unsigned long flags;
 	int sig;
 
 	if (user_mode(regs)) {
 		/*
-		 * This is an implicit supervisor-mode access from user
-		 * mode.  Bypass all the kernel-mode recovery code and just
-		 * OOPS.
+		 * Implicit kernel access from user mode?  Skip the stack
+		 * overflow and EFI special cases.
 		 */
 		goto oops;
 	}
 
-	/* Are we prepared to handle this kernel fault? */
-	if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
-		/*
-		 * Any interrupt that takes a fault gets the fixup. This makes
-		 * the below recursive fault logic only apply to a faults from
-		 * task context.
-		 */
-		if (in_interrupt())
-			return;
-
-		/*
-		 * Per the above we're !in_interrupt(), aka. task context.
-		 *
-		 * In this case we need to make sure we're not recursively
-		 * faulting through the emulate_vsyscall() logic.
-		 */
-		if (current->thread.sig_on_uaccess_err && signal) {
-			sanitize_error_code(address, &error_code);
-
-			set_signal_archinfo(address, error_code);
-
-			/* XXX: hwpoison faults will set the wrong code. */
-			force_sig_fault(signal, si_code, (void __user *)address);
-		}
-
-		/*
-		 * Barring that, we can do the fixup and be happy.
-		 */
-		return;
-	}
-
 #ifdef CONFIG_VMAP_STACK
 	/*
 	 * Stack overflow?  During boot, we can fault near the initial
@@ -684,8 +648,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	 * that we're in vmalloc space to avoid this.
 	 */
 	if (is_vmalloc_addr((void *)address) &&
-	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
-	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
+	    (((unsigned long)current->stack - 1 - address < PAGE_SIZE) ||
+	     address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) {
 		unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
 		/*
 		 * We're likely to be running with very little stack space
@@ -709,28 +673,12 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 #endif
 
 	/*
-	 * 32-bit:
-	 *
-	 *   Valid to do another page fault here, because if this fault
-	 *   had been triggered by is_prefetch fixup_exception would have
-	 *   handled it.
-	 *
-	 * 64-bit:
-	 *
-	 *   Hall of shame of CPU/BIOS bugs.
-	 */
-	if (is_prefetch(regs, error_code, address))
-		return;
-
-	if (is_errata93(regs, address))
-		return;
-
-	/*
-	 * Buggy firmware could access regions which might page fault, try to
-	 * recover from such faults.
+	 * Buggy firmware could access regions which might page fault.  If
+	 * this happens, EFI has a special OOPS path that will try to
+	 * avoid hanging the system.
 	 */
 	if (IS_ENABLED(CONFIG_EFI))
-		efi_recover_from_page_fault(address);
+		efi_crash_gracefully_on_page_fault(address);
 
 oops:
 	/*
@@ -741,7 +689,7 @@ oops:
 
 	show_fault_oops(regs, error_code, address);
 
-	if (task_stack_end_corrupted(tsk))
+	if (task_stack_end_corrupted(current))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	sig = SIGKILL;
@@ -754,6 +702,53 @@ oops:
 	oops_end(flags, regs, sig);
 }
 
+static noinline void
+kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
+			 unsigned long address, int signal, int si_code)
+{
+	WARN_ON_ONCE(user_mode(regs));
+
+	/* Are we prepared to handle this kernel fault? */
+	if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
+		/*
+		 * Any interrupt that takes a fault gets the fixup. This makes
+		 * the below recursive fault logic only apply to a faults from
+		 * task context.
+		 */
+		if (in_interrupt())
+			return;
+
+		/*
+		 * Per the above we're !in_interrupt(), aka. task context.
+		 *
+		 * In this case we need to make sure we're not recursively
+		 * faulting through the emulate_vsyscall() logic.
+		 */
+		if (current->thread.sig_on_uaccess_err && signal) {
+			sanitize_error_code(address, &error_code);
+
+			set_signal_archinfo(address, error_code);
+
+			/* XXX: hwpoison faults will set the wrong code. */
+			force_sig_fault(signal, si_code, (void __user *)address);
+		}
+
+		/*
+		 * Barring that, we can do the fixup and be happy.
+		 */
+		return;
+	}
+
+	/*
+	 * AMD erratum #91 manifests as a spurious page fault on a PREFETCH
+	 * instruction.
+	 */
+	if (is_prefetch(regs, error_code, address))
+		return;
+
+	page_fault_oops(regs, error_code, address);
+}
+
 /*
  * Print out info about fatal segfaults, if the show_unhandled_signals
  * sysctl is set:
@@ -796,47 +791,49 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 {
 	struct task_struct *tsk = current;
 
-	/* User mode accesses just cause a SIGSEGV */
-	if (user_mode(regs) && (error_code & X86_PF_USER)) {
-		/*
-		 * It's possible to have interrupts off here:
-		 */
-		local_irq_enable();
-
-		/*
-		 * Valid to do another page fault here because this one came
-		 * from user space:
-		 */
-		if (is_prefetch(regs, error_code, address))
-			return;
+	if (!user_mode(regs)) {
+		kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code);
+		return;
+	}
 
-		if (is_errata100(regs, address))
-			return;
+	if (!(error_code & X86_PF_USER)) {
+		/* Implicit user access to kernel memory -- just oops */
+		page_fault_oops(regs, error_code, address);
+		return;
+	}
 
-		sanitize_error_code(address, &error_code);
+	/*
+	 * User mode accesses just cause a SIGSEGV.
+	 * It's possible to have interrupts off here:
+	 */
+	local_irq_enable();
 
-		if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
-			return;
+	/*
+	 * Valid to do another page fault here because this one came
+	 * from user space:
+	 */
+	if (is_prefetch(regs, error_code, address))
+		return;
 
-		if (likely(show_unhandled_signals))
-			show_signal_msg(regs, error_code, address, tsk);
+	if (is_errata100(regs, address))
+		return;
 
-		set_signal_archinfo(address, error_code);
+	sanitize_error_code(address, &error_code);
 
-		if (si_code == SEGV_PKUERR)
-			force_sig_pkuerr((void __user *)address, pkey);
+	if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address))
+		return;
 
-		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+	if (likely(show_unhandled_signals))
+		show_signal_msg(regs, error_code, address, tsk);
 
-		local_irq_disable();
+	set_signal_archinfo(address, error_code);
 
-		return;
-	}
+	if (si_code == SEGV_PKUERR)
+		force_sig_pkuerr((void __user *)address, pkey);
 
-	if (is_f00f_bug(regs, address))
-		return;
+	force_sig_fault(SIGSEGV, si_code, (void __user *)address);
 
-	no_context(regs, error_code, address, SIGSEGV, si_code);
+	local_irq_disable();
 }
 
 static noinline void
@@ -926,8 +923,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 	  vm_fault_t fault)
 {
 	/* Kernel mode? Handle exceptions or die: */
-	if (!(error_code & X86_PF_USER)) {
-		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
+	if (!user_mode(regs)) {
+		kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR);
 		return;
 	}
 
@@ -961,40 +958,6 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
 }
 
-static noinline void
-mm_fault_error(struct pt_regs *regs, unsigned long error_code,
-	       unsigned long address, vm_fault_t fault)
-{
-	if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
-		no_context(regs, error_code, address, 0, 0);
-		return;
-	}
-
-	if (fault & VM_FAULT_OOM) {
-		/* Kernel mode? Handle exceptions or die: */
-		if (!(error_code & X86_PF_USER)) {
-			no_context(regs, error_code, address,
-				   SIGSEGV, SEGV_MAPERR);
-			return;
-		}
-
-		/*
-		 * We ran out of memory, call the OOM killer, and return the
-		 * userspace (which will retry the fault, or kill us if we got
-		 * oom-killed):
-		 */
-		pagefault_out_of_memory();
-	} else {
-		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
-			     VM_FAULT_HWPOISON_LARGE))
-			do_sigbus(regs, error_code, address, fault);
-		else if (fault & VM_FAULT_SIGSEGV)
-			bad_area_nosemaphore(regs, error_code, address);
-		else
-			BUG();
-	}
-}
-
 static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
 {
 	if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
@@ -1209,6 +1172,9 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 	}
 #endif
 
+	if (is_f00f_bug(regs, hw_error_code, address))
+		return;
+
 	/* Was the fault spurious, caused by lazy TLB invalidation? */
 	if (spurious_kernel_fault(hw_error_code, address))
 		return;
@@ -1229,10 +1195,17 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 }
 NOKPROBE_SYMBOL(do_kern_addr_fault);
 
-/* Handle faults in the user portion of the address space */
+/*
+ * Handle faults in the user portion of the address space.  Nothing in here
+ * should check X86_PF_USER without a specific justification: for almost
+ * all purposes, we should treat a normal kernel access to user memory
+ * (e.g. get_user(), put_user(), etc.) the same as the WRUSS instruction.
+ * The one exception is AC flag handling, which is, per the x86
+ * architecture, special for WRUSS.
+ */
 static inline
 void do_user_addr_fault(struct pt_regs *regs,
-			unsigned long hw_error_code,
+			unsigned long error_code,
 			unsigned long address)
 {
 	struct vm_area_struct *vma;
@@ -1244,6 +1217,21 @@ void do_user_addr_fault(struct pt_regs *regs,
 	tsk = current;
 	mm = tsk->mm;
 
+	if (unlikely((error_code & (X86_PF_USER | X86_PF_INSTR)) == X86_PF_INSTR)) {
+		/*
+		 * Whoops, this is kernel mode code trying to execute from
+		 * user memory.  Unless this is AMD erratum #93, which
+		 * corrupts RIP such that it looks like a user address,
+		 * this is unrecoverable.  Don't even try to look up the
+		 * VMA or look for extable entries.
+		 */
+		if (is_errata93(regs, address))
+			return;
+
+		page_fault_oops(regs, error_code, address);
+		return;
+	}
+
 	/* kprobes don't want to hook the spurious faults: */
 	if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
 		return;
@@ -1252,8 +1240,8 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * Reserved bits are never expected to be set on
 	 * entries in the user portion of the page tables.
 	 */
-	if (unlikely(hw_error_code & X86_PF_RSVD))
-		pgtable_bad(regs, hw_error_code, address);
+	if (unlikely(error_code & X86_PF_RSVD))
+		pgtable_bad(regs, error_code, address);
 
 	/*
 	 * If SMAP is on, check for invalid kernel (supervisor) access to user
@@ -1263,10 +1251,13 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * enforcement appears to be consistent with the USER bit.
 	 */
 	if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
-		     !(hw_error_code & X86_PF_USER) &&
-		     !(regs->flags & X86_EFLAGS_AC)))
-	{
-		bad_area_nosemaphore(regs, hw_error_code, address);
+		     !(error_code & X86_PF_USER) &&
+		     !(regs->flags & X86_EFLAGS_AC))) {
+		/*
+		 * No extable entry here.  This was a kernel access to an
+		 * invalid pointer.  get_kernel_nofault() will not get here.
+		 */
+		page_fault_oops(regs, error_code, address);
 		return;
 	}
 
@@ -1275,7 +1266,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * in a region with pagefaults disabled then we must not take the fault
 	 */
 	if (unlikely(faulthandler_disabled() || !mm)) {
-		bad_area_nosemaphore(regs, hw_error_code, address);
+		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
 
@@ -1296,9 +1287,9 @@ void do_user_addr_fault(struct pt_regs *regs,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
-	if (hw_error_code & X86_PF_WRITE)
+	if (error_code & X86_PF_WRITE)
 		flags |= FAULT_FLAG_WRITE;
-	if (hw_error_code & X86_PF_INSTR)
+	if (error_code & X86_PF_INSTR)
 		flags |= FAULT_FLAG_INSTRUCTION;
 
 #ifdef CONFIG_X86_64
@@ -1314,7 +1305,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 * to consider the PF_PK bit.
 	 */
 	if (is_vsyscall_vaddr(address)) {
-		if (emulate_vsyscall(hw_error_code, regs, address))
+		if (emulate_vsyscall(error_code, regs, address))
 			return;
 	}
 #endif
@@ -1337,7 +1328,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 			 * Fault from code in kernel from
 			 * which we do not expect faults.
 			 */
-			bad_area_nosemaphore(regs, hw_error_code, address);
+			bad_area_nosemaphore(regs, error_code, address);
 			return;
 		}
 retry:
@@ -1353,17 +1344,17 @@ retry:
 
 	vma = find_vma(mm, address);
 	if (unlikely(!vma)) {
-		bad_area(regs, hw_error_code, address);
+		bad_area(regs, error_code, address);
 		return;
 	}
 	if (likely(vma->vm_start <= address))
 		goto good_area;
 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
-		bad_area(regs, hw_error_code, address);
+		bad_area(regs, error_code, address);
 		return;
 	}
 	if (unlikely(expand_stack(vma, address))) {
-		bad_area(regs, hw_error_code, address);
+		bad_area(regs, error_code, address);
 		return;
 	}
 
@@ -1372,8 +1363,8 @@ retry:
 	 * we can handle it..
 	 */
 good_area:
-	if (unlikely(access_error(hw_error_code, vma))) {
-		bad_area_access_error(regs, hw_error_code, address, vma);
+	if (unlikely(access_error(error_code, vma))) {
+		bad_area_access_error(regs, error_code, address, vma);
 		return;
 	}
 
@@ -1392,11 +1383,14 @@ good_area:
 	 */
 	fault = handle_mm_fault(vma, address, flags, regs);
 
-	/* Quick path to respond to signals */
 	if (fault_signal_pending(fault, regs)) {
+		/*
+		 * Quick path to respond to signals.  The core mm code
+		 * has unlocked the mm for us if we get here.
+		 */
 		if (!user_mode(regs))
-			no_context(regs, hw_error_code, address, SIGBUS,
-				   BUS_ADRERR);
+			kernelmode_fixup_or_oops(regs, error_code, address,
+						 SIGBUS, BUS_ADRERR);
 		return;
 	}
 
@@ -1412,12 +1406,37 @@ good_area:
 	}
 
 	mmap_read_unlock(mm);
-	if (unlikely(fault & VM_FAULT_ERROR)) {
-		mm_fault_error(regs, hw_error_code, address, fault);
+	if (likely(!(fault & VM_FAULT_ERROR)))
+		return;
+
+	if (fatal_signal_pending(current) && !user_mode(regs)) {
+		kernelmode_fixup_or_oops(regs, error_code, address, 0, 0);
 		return;
 	}
 
-	check_v8086_mode(regs, address, tsk);
+	if (fault & VM_FAULT_OOM) {
+		/* Kernel mode? Handle exceptions or die: */
+		if (!user_mode(regs)) {
+			kernelmode_fixup_or_oops(regs, error_code, address,
+						 SIGSEGV, SEGV_MAPERR);
+			return;
+		}
+
+		/*
+		 * We ran out of memory, call the OOM killer, and return the
+		 * userspace (which will retry the fault, or kill us if we got
+		 * oom-killed):
+		 */
+		pagefault_out_of_memory();
+	} else {
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+			     VM_FAULT_HWPOISON_LARGE))
+			do_sigbus(regs, error_code, address, fault);
+		else if (fault & VM_FAULT_SIGSEGV)
+			bad_area_nosemaphore(regs, error_code, address);
+		else
+			BUG();
+	}
 }
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e26f5c5c6565..dd694fb93916 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -157,16 +157,25 @@ __ref void *alloc_low_pages(unsigned int num)
 }
 
 /*
- * By default need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS.
- * With KASLR memory randomization, depending on the machine e820 memory
- * and the PUD alignment. We may need twice more pages when KASLR memory
+ * By default need to be able to allocate page tables below PGD firstly for
+ * the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping.
+ * With KASLR memory randomization, depending on the machine e820 memory and the
+ * PUD alignment, twice that many pages may be needed when KASLR memory
  * randomization is enabled.
  */
+
+#ifndef CONFIG_X86_5LEVEL
+#define INIT_PGD_PAGE_TABLES    3
+#else
+#define INIT_PGD_PAGE_TABLES    4
+#endif
+
 #ifndef CONFIG_RANDOMIZE_MEMORY
-#define INIT_PGD_PAGE_COUNT      6
+#define INIT_PGD_PAGE_COUNT      (2 * INIT_PGD_PAGE_TABLES)
 #else
-#define INIT_PGD_PAGE_COUNT      12
+#define INIT_PGD_PAGE_COUNT      (4 * INIT_PGD_PAGE_TABLES)
 #endif
+
 #define INIT_PGT_BUF_SIZE	(INIT_PGD_PAGE_COUNT * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index c3d5f0236f35..4b01f7dbaf30 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -475,9 +475,10 @@ void __init mem_encrypt_init(void)
 	swiotlb_update_mem_attributes();
 
 	/*
-	 * With SEV, we need to unroll the rep string I/O instructions.
+	 * With SEV, we need to unroll the rep string I/O instructions,
+	 * but SEV-ES supports them through the #VC handler.
 	 */
-	if (sev_active())
+	if (sev_active() && !sev_es_active())
 		static_branch_enable(&sev_enable_key);
 
 	print_mem_encrypt_feature_info();
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index bd7aff5c51f7..cd768dafca9e 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -10,8 +10,6 @@
 
 #define pr_fmt(fmt) "mmiotrace: " fmt
 
-#define DEBUG 1
-
 #include <linux/moduleparam.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 796506dcfc42..79e7a0ec1da5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+/* Some 1-byte opcodes for binary ALU operations */
+static u8 simple_alu_opcodes[] = {
+	[BPF_ADD] = 0x01,
+	[BPF_SUB] = 0x29,
+	[BPF_AND] = 0x21,
+	[BPF_OR] = 0x09,
+	[BPF_XOR] = 0x31,
+	[BPF_LSH] = 0xE0,
+	[BPF_RSH] = 0xE8,
+	[BPF_ARSH] = 0xF8,
+};
+
 static void jit_fill_hole(void *area, unsigned int size)
 {
 	/* Fill whole space with INT3 instructions */
@@ -681,6 +693,42 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
 	*pprog = prog;
 }
 
+/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */
+static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is_imm8(off)) {
+		/* 1-byte signed displacement.
+		 *
+		 * If off == 0 we could skip this and save one extra byte, but
+		 * special case of x86 R13 which always needs an offset is not
+		 * worth the hassle
+		 */
+		EMIT2(add_2reg(0x40, ptr_reg, val_reg), off);
+	} else {
+		/* 4-byte signed displacement */
+		EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off);
+	}
+	*pprog = prog;
+}
+
+/*
+ * Emit a REX byte if it will be necessary to address these registers
+ */
+static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	if (is64)
+		EMIT1(add_2mod(0x48, dst_reg, src_reg));
+	else if (is_ereg(dst_reg) || is_ereg(src_reg))
+		EMIT1(add_2mod(0x40, dst_reg, src_reg));
+	*pprog = prog;
+}
+
 /* LDX: dst_reg = *(u8*)(src_reg + off) */
 static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
@@ -708,15 +756,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 		EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 		break;
 	}
-	/*
-	 * If insn->off == 0 we can save one extra byte, but
-	 * special case of x86 R13 which always needs an offset
-	 * is not worth the hassle
-	 */
-	if (is_imm8(off))
-		EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
-	else
-		EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+	emit_insn_suffix(&prog, src_reg, dst_reg, off);
 	*pprog = prog;
 }
 
@@ -751,11 +791,51 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 		EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 		break;
 	}
-	if (is_imm8(off))
-		EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
-	else
-		EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+	emit_insn_suffix(&prog, dst_reg, src_reg, off);
+	*pprog = prog;
+}
+
+static int emit_atomic(u8 **pprog, u8 atomic_op,
+		       u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	EMIT1(0xF0); /* lock prefix */
+
+	maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW);
+
+	/* emit opcode */
+	switch (atomic_op) {
+	case BPF_ADD:
+	case BPF_SUB:
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		/* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */
+		EMIT1(simple_alu_opcodes[atomic_op]);
+		break;
+	case BPF_ADD | BPF_FETCH:
+		/* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */
+		EMIT2(0x0F, 0xC1);
+		break;
+	case BPF_XCHG:
+		/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
+		EMIT1(0x87);
+		break;
+	case BPF_CMPXCHG:
+		/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
+		EMIT2(0x0F, 0xB1);
+		break;
+	default:
+		pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op);
+		return -EFAULT;
+	}
+
+	emit_insn_suffix(&prog, dst_reg, src_reg, off);
+
 	*pprog = prog;
+	return 0;
 }
 
 static bool ex_handler_bpf(const struct exception_table_entry *x,
@@ -789,8 +869,31 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
 	}
 }
 
+static int emit_nops(u8 **pprog, int len)
+{
+	u8 *prog = *pprog;
+	int i, noplen, cnt = 0;
+
+	while (len > 0) {
+		noplen = len;
+
+		if (noplen > ASM_NOP_MAX)
+			noplen = ASM_NOP_MAX;
+
+		for (i = 0; i < noplen; i++)
+			EMIT1(ideal_nops[noplen][i]);
+		len -= noplen;
+	}
+
+	*pprog = prog;
+
+	return cnt;
+}
+
+#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
-		  int oldproglen, struct jit_context *ctx)
+		  int oldproglen, struct jit_context *ctx, bool jmp_padding)
 {
 	bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
 	struct bpf_insn *insn = bpf_prog->insnsi;
@@ -800,8 +903,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	bool seen_exit = false;
 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
 	int i, cnt = 0, excnt = 0;
-	int proglen = 0;
+	int ilen, proglen = 0;
 	u8 *prog = temp;
+	int err;
 
 	detect_reg_usage(insn, insn_cnt, callee_regs_used,
 			 &tail_call_seen);
@@ -813,17 +917,24 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		      bpf_prog_was_classic(bpf_prog), tail_call_reachable,
 		      bpf_prog->aux->func_idx != 0);
 	push_callee_regs(&prog, callee_regs_used);
-	addrs[0] = prog - temp;
+
+	ilen = prog - temp;
+	if (image)
+		memcpy(image + proglen, temp, ilen);
+	proglen += ilen;
+	addrs[0] = proglen;
+	prog = temp;
 
 	for (i = 1; i <= insn_cnt; i++, insn++) {
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
 		u8 b2 = 0, b3 = 0;
+		u8 *start_of_ldx;
 		s64 jmp_offset;
 		u8 jmp_cond;
-		int ilen;
 		u8 *func;
+		int nops;
 
 		switch (insn->code) {
 			/* ALU */
@@ -837,17 +948,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_AND | BPF_X:
 		case BPF_ALU64 | BPF_OR | BPF_X:
 		case BPF_ALU64 | BPF_XOR | BPF_X:
-			switch (BPF_OP(insn->code)) {
-			case BPF_ADD: b2 = 0x01; break;
-			case BPF_SUB: b2 = 0x29; break;
-			case BPF_AND: b2 = 0x21; break;
-			case BPF_OR: b2 = 0x09; break;
-			case BPF_XOR: b2 = 0x31; break;
-			}
-			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_2mod(0x48, dst_reg, src_reg));
-			else if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			maybe_emit_mod(&prog, dst_reg, src_reg,
+				       BPF_CLASS(insn->code) == BPF_ALU64);
+			b2 = simple_alu_opcodes[BPF_OP(insn->code)];
 			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
@@ -1027,12 +1130,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			else if (is_ereg(dst_reg))
 				EMIT1(add_1mod(0x40, dst_reg));
 
-			switch (BPF_OP(insn->code)) {
-			case BPF_LSH: b3 = 0xE0; break;
-			case BPF_RSH: b3 = 0xE8; break;
-			case BPF_ARSH: b3 = 0xF8; break;
-			}
-
+			b3 = simple_alu_opcodes[BPF_OP(insn->code)];
 			if (imm32 == 1)
 				EMIT2(0xD1, add_1reg(b3, dst_reg));
 			else
@@ -1066,11 +1164,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			else if (is_ereg(dst_reg))
 				EMIT1(add_1mod(0x40, dst_reg));
 
-			switch (BPF_OP(insn->code)) {
-			case BPF_LSH: b3 = 0xE0; break;
-			case BPF_RSH: b3 = 0xE8; break;
-			case BPF_ARSH: b3 = 0xF8; break;
-			}
+			b3 = simple_alu_opcodes[BPF_OP(insn->code)];
 			EMIT2(0xD3, add_1reg(b3, dst_reg));
 
 			if (src_reg != BPF_REG_4)
@@ -1185,12 +1279,30 @@ st:			if (is_imm8(insn->off))
 		case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		case BPF_LDX | BPF_MEM | BPF_DW:
 		case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+				/* test src_reg, src_reg */
+				maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */
+				EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg));
+				/* jne start_of_ldx */
+				EMIT2(X86_JNE, 0);
+				/* xor dst_reg, dst_reg */
+				emit_mov_imm32(&prog, false, dst_reg, 0);
+				/* jmp byte_after_ldx */
+				EMIT2(0xEB, 0);
+
+				/* populate jmp_offset for JNE above */
+				temp[4] = prog - temp - 5 /* sizeof(test + jne) */;
+				start_of_ldx = prog;
+			}
 			emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
 				struct exception_table_entry *ex;
 				u8 *_insn = image + proglen;
 				s64 delta;
 
+				/* populate jmp_offset for JMP above */
+				start_of_ldx[-1] = prog - start_of_ldx;
+
 				if (!bpf_prog->aux->extable)
 					break;
 
@@ -1230,21 +1342,56 @@ st:			if (is_imm8(insn->off))
 			}
 			break;
 
-			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
-		case BPF_STX | BPF_XADD | BPF_W:
-			/* Emit 'lock add dword ptr [rax + off], eax' */
-			if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
-			else
-				EMIT2(0xF0, 0x01);
-			goto xadd;
-		case BPF_STX | BPF_XADD | BPF_DW:
-			EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
-xadd:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
-			else
-				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
-					    insn->off);
+		case BPF_STX | BPF_ATOMIC | BPF_W:
+		case BPF_STX | BPF_ATOMIC | BPF_DW:
+			if (insn->imm == (BPF_AND | BPF_FETCH) ||
+			    insn->imm == (BPF_OR | BPF_FETCH) ||
+			    insn->imm == (BPF_XOR | BPF_FETCH)) {
+				u8 *branch_target;
+				bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+
+				/*
+				 * Can't be implemented with a single x86 insn.
+				 * Need to do a CMPXCHG loop.
+				 */
+
+				/* Will need RAX as a CMPXCHG operand so save R0 */
+				emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
+				branch_target = prog;
+				/* Load old value */
+				emit_ldx(&prog, BPF_SIZE(insn->code),
+					 BPF_REG_0, dst_reg, insn->off);
+				/*
+				 * Perform the (commutative) operation locally,
+				 * put the result in the AUX_REG.
+				 */
+				emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
+				maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+				EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
+				      add_2reg(0xC0, AUX_REG, src_reg));
+				/* Attempt to swap in new value */
+				err = emit_atomic(&prog, BPF_CMPXCHG,
+						  dst_reg, AUX_REG, insn->off,
+						  BPF_SIZE(insn->code));
+				if (WARN_ON(err))
+					return err;
+				/*
+				 * ZF tells us whether we won the race. If it's
+				 * cleared we need to try again.
+				 */
+				EMIT2(X86_JNE, -(prog - branch_target) - 2);
+				/* Return the pre-modification value */
+				emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+				/* Restore R0 after clobbering RAX */
+				emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
+				break;
+
+			}
+
+			err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
+						  insn->off, BPF_SIZE(insn->code));
+			if (err)
+				return err;
 			break;
 
 			/* call */
@@ -1295,20 +1442,16 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP32 | BPF_JSGE | BPF_X:
 		case BPF_JMP32 | BPF_JSLE | BPF_X:
 			/* cmp dst_reg, src_reg */
-			if (BPF_CLASS(insn->code) == BPF_JMP)
-				EMIT1(add_2mod(0x48, dst_reg, src_reg));
-			else if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			maybe_emit_mod(&prog, dst_reg, src_reg,
+				       BPF_CLASS(insn->code) == BPF_JMP);
 			EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_X:
 		case BPF_JMP32 | BPF_JSET | BPF_X:
 			/* test dst_reg, src_reg */
-			if (BPF_CLASS(insn->code) == BPF_JMP)
-				EMIT1(add_2mod(0x48, dst_reg, src_reg));
-			else if (is_ereg(dst_reg) || is_ereg(src_reg))
-				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			maybe_emit_mod(&prog, dst_reg, src_reg,
+				       BPF_CLASS(insn->code) == BPF_JMP);
 			EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
@@ -1344,10 +1487,8 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP32 | BPF_JSLE | BPF_K:
 			/* test dst_reg, dst_reg to save one extra byte */
 			if (imm32 == 0) {
-				if (BPF_CLASS(insn->code) == BPF_JMP)
-					EMIT1(add_2mod(0x48, dst_reg, dst_reg));
-				else if (is_ereg(dst_reg))
-					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+				maybe_emit_mod(&prog, dst_reg, dst_reg,
+					       BPF_CLASS(insn->code) == BPF_JMP);
 				EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
 				goto emit_cond_jmp;
 			}
@@ -1409,6 +1550,30 @@ emit_cond_jmp:		/* Convert BPF opcode to x86 */
 			}
 			jmp_offset = addrs[i + insn->off] - addrs[i];
 			if (is_imm8(jmp_offset)) {
+				if (jmp_padding) {
+					/* To keep the jmp_offset valid, the extra bytes are
+					 * padded before the jump insn, so we substract the
+					 * 2 bytes of jmp_cond insn from INSN_SZ_DIFF.
+					 *
+					 * If the previous pass already emits an imm8
+					 * jmp_cond, then this BPF insn won't shrink, so
+					 * "nops" is 0.
+					 *
+					 * On the other hand, if the previous pass emits an
+					 * imm32 jmp_cond, the extra 4 bytes(*) is padded to
+					 * keep the image from shrinking further.
+					 *
+					 * (*) imm32 jmp_cond is 6 bytes, and imm8 jmp_cond
+					 *     is 2 bytes, so the size difference is 4 bytes.
+					 */
+					nops = INSN_SZ_DIFF - 2;
+					if (nops != 0 && nops != 4) {
+						pr_err("unexpected jmp_cond padding: %d bytes\n",
+						       nops);
+						return -EFAULT;
+					}
+					cnt += emit_nops(&prog, nops);
+				}
 				EMIT2(jmp_cond, jmp_offset);
 			} else if (is_simm32(jmp_offset)) {
 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
@@ -1431,11 +1596,55 @@ emit_cond_jmp:		/* Convert BPF opcode to x86 */
 			else
 				jmp_offset = addrs[i + insn->off] - addrs[i];
 
-			if (!jmp_offset)
-				/* Optimize out nop jumps */
+			if (!jmp_offset) {
+				/*
+				 * If jmp_padding is enabled, the extra nops will
+				 * be inserted. Otherwise, optimize out nop jumps.
+				 */
+				if (jmp_padding) {
+					/* There are 3 possible conditions.
+					 * (1) This BPF_JA is already optimized out in
+					 *     the previous run, so there is no need
+					 *     to pad any extra byte (0 byte).
+					 * (2) The previous pass emits an imm8 jmp,
+					 *     so we pad 2 bytes to match the previous
+					 *     insn size.
+					 * (3) Similarly, the previous pass emits an
+					 *     imm32 jmp, and 5 bytes is padded.
+					 */
+					nops = INSN_SZ_DIFF;
+					if (nops != 0 && nops != 2 && nops != 5) {
+						pr_err("unexpected nop jump padding: %d bytes\n",
+						       nops);
+						return -EFAULT;
+					}
+					cnt += emit_nops(&prog, nops);
+				}
 				break;
+			}
 emit_jmp:
 			if (is_imm8(jmp_offset)) {
+				if (jmp_padding) {
+					/* To avoid breaking jmp_offset, the extra bytes
+					 * are padded before the actual jmp insn, so
+					 * 2 bytes is substracted from INSN_SZ_DIFF.
+					 *
+					 * If the previous pass already emits an imm8
+					 * jmp, there is nothing to pad (0 byte).
+					 *
+					 * If it emits an imm32 jmp (5 bytes) previously
+					 * and now an imm8 jmp (2 bytes), then we pad
+					 * (5 - 2 = 3) bytes to stop the image from
+					 * shrinking further.
+					 */
+					nops = INSN_SZ_DIFF - 2;
+					if (nops != 0 && nops != 3) {
+						pr_err("unexpected jump padding: %d bytes\n",
+						       nops);
+						return -EFAULT;
+					}
+					cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
+				}
 				EMIT2(0xEB, jmp_offset);
 			} else if (is_simm32(jmp_offset)) {
 				EMIT1_off32(0xE9, jmp_offset);
@@ -1531,17 +1740,25 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 			   struct bpf_prog *p, int stack_size, bool mod_ret)
 {
 	u8 *prog = *pprog;
+	u8 *jmp_insn;
 	int cnt = 0;
 
-	if (p->aux->sleepable) {
-		if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
-			return -EINVAL;
-	} else {
-		if (emit_call(&prog, __bpf_prog_enter, prog))
+	/* arg1: mov rdi, progs[i] */
+	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+	if (emit_call(&prog,
+		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
+		      __bpf_prog_enter, prog))
 			return -EINVAL;
-		/* remember prog start time returned by __bpf_prog_enter */
-		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
-	}
+	/* remember prog start time returned by __bpf_prog_enter */
+	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+	/* if (__bpf_prog_enter*(prog) == 0)
+	 *	goto skip_exec_of_prog;
+	 */
+	EMIT3(0x48, 0x85, 0xC0);  /* test rax,rax */
+	/* emit 2 nops that will be replaced with JE insn */
+	jmp_insn = prog;
+	emit_nops(&prog, 2);
 
 	/* arg1: lea rdi, [rbp - stack_size] */
 	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1561,43 +1778,23 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (mod_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
-	if (p->aux->sleepable) {
-		if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
+	/* replace 2 nops with JE insn, since jmp target is known */
+	jmp_insn[0] = X86_JE;
+	jmp_insn[1] = prog - jmp_insn - 2;
+
+	/* arg1: mov rdi, progs[i] */
+	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+	/* arg2: mov rsi, rbx <- start time in nsec */
+	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+	if (emit_call(&prog,
+		      p->aux->sleepable ? __bpf_prog_exit_sleepable :
+		      __bpf_prog_exit, prog))
 			return -EINVAL;
-	} else {
-		/* arg1: mov rdi, progs[i] */
-		emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
-			       (u32) (long) p);
-		/* arg2: mov rsi, rbx <- start time in nsec */
-		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
-		if (emit_call(&prog, __bpf_prog_exit, prog))
-			return -EINVAL;
-	}
 
 	*pprog = prog;
 	return 0;
 }
 
-static void emit_nops(u8 **pprog, unsigned int len)
-{
-	unsigned int i, noplen;
-	u8 *prog = *pprog;
-	int cnt = 0;
-
-	while (len > 0) {
-		noplen = len;
-
-		if (noplen > ASM_NOP_MAX)
-			noplen = ASM_NOP_MAX;
-
-		for (i = 0; i < noplen; i++)
-			EMIT1(ideal_nops[noplen][i]);
-		len -= noplen;
-	}
-
-	*pprog = prog;
-}
-
 static void emit_align(u8 **pprog, u32 align)
 {
 	u8 *target, *prog = *pprog;
@@ -1972,6 +2169,9 @@ struct x64_jit_data {
 	struct jit_context ctx;
 };
 
+#define MAX_PASSES 20
+#define PADDING_PASSES (MAX_PASSES - 5)
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
@@ -1981,6 +2181,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	struct jit_context ctx = {};
 	bool tmp_blinded = false;
 	bool extra_pass = false;
+	bool padding = false;
 	u8 *image = NULL;
 	int *addrs;
 	int pass;
@@ -2017,6 +2218,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		image = jit_data->image;
 		header = jit_data->header;
 		extra_pass = true;
+		padding = true;
 		goto skip_init_addrs;
 	}
 	addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
@@ -2042,8 +2244,10 @@ skip_init_addrs:
 	 * may converge on the last pass. In such case do one more
 	 * pass to emit the final image.
 	 */
-	for (pass = 0; pass < 20 || image; pass++) {
-		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+	for (pass = 0; pass < MAX_PASSES || image; pass++) {
+		if (!padding && pass >= PADDING_PASSES)
+			padding = true;
+		proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
 		if (proglen <= 0) {
 out_image:
 			image = NULL;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 96fde03aa987..d17b67c69f89 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2243,10 +2243,8 @@ emit_jmp:
 				return -EFAULT;
 			}
 			break;
-		/* STX XADD: lock *(u32 *)(dst + off) += src */
-		case BPF_STX | BPF_XADD | BPF_W:
-		/* STX XADD: lock *(u64 *)(dst + off) += src */
-		case BPF_STX | BPF_XADD | BPF_DW:
+		case BPF_STX | BPF_ATOMIC | BPF_W:
+		case BPF_STX | BPF_ATOMIC | BPF_DW:
 			goto notyet;
 		case BPF_JMP | BPF_EXIT:
 			if (seen_exit) {
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
deleted file mode 100644
index 4d49b5a27025..000000000000
--- a/arch/x86/oprofile/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
-		oprof.o cpu_buffer.o buffer_sync.o \
-		event_buffer.o oprofile_files.o \
-		oprofilefs.o oprofile_stats.o  \
-		timer_int.o nmi_timer_int.o )
-
-oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
-oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_amd.o \
-					   op_model_ppro.o op_model_p4.o
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
deleted file mode 100644
index 1d8391fcca68..000000000000
--- a/arch/x86/oprofile/backtrace.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/**
- * @file backtrace.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author David Smith
- */
-
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/compat.h>
-#include <linux/uaccess.h>
-
-#include <asm/ptrace.h>
-#include <asm/stacktrace.h>
-#include <asm/unwind.h>
-
-#ifdef CONFIG_COMPAT
-static struct stack_frame_ia32 *
-dump_user_backtrace_32(struct stack_frame_ia32 *head)
-{
-	/* Also check accessibility of one struct frame_head beyond: */
-	struct stack_frame_ia32 bufhead[2];
-	struct stack_frame_ia32 *fp;
-	unsigned long bytes;
-
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != 0)
-		return NULL;
-
-	fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
-
-	oprofile_add_trace(bufhead[0].return_address);
-
-	/* frame pointers should strictly progress back up the stack
-	* (towards higher addresses) */
-	if (head >= fp)
-		return NULL;
-
-	return fp;
-}
-
-static inline int
-x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
-{
-	struct stack_frame_ia32 *head;
-
-	/* User process is IA32 */
-	if (!current || user_64bit_mode(regs))
-		return 0;
-
-	head = (struct stack_frame_ia32 *) regs->bp;
-	while (depth-- && head)
-		head = dump_user_backtrace_32(head);
-
-	return 1;
-}
-
-#else
-static inline int
-x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
-{
-	return 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
-{
-	/* Also check accessibility of one struct frame_head beyond: */
-	struct stack_frame bufhead[2];
-	unsigned long bytes;
-
-	bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
-	if (bytes != 0)
-		return NULL;
-
-	oprofile_add_trace(bufhead[0].return_address);
-
-	/* frame pointers should strictly progress back up the stack
-	 * (towards higher addresses) */
-	if (head >= bufhead[0].next_frame)
-		return NULL;
-
-	return bufhead[0].next_frame;
-}
-
-void
-x86_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
-	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
-
-	if (!user_mode(regs)) {
-		struct unwind_state state;
-		unsigned long addr;
-
-		if (!depth)
-			return;
-
-		oprofile_add_trace(regs->ip);
-
-		if (!--depth)
-			return;
-
-		for (unwind_start(&state, current, regs, NULL);
-		     !unwind_done(&state); unwind_next_frame(&state)) {
-			addr = unwind_get_return_address(&state);
-			if (!addr)
-				break;
-
-			oprofile_add_trace(addr);
-
-			if (!--depth)
-				break;
-		}
-
-		return;
-	}
-
-	if (x86_backtrace_32(regs, depth))
-		return;
-
-	while (depth-- && head)
-		head = dump_user_backtrace(head);
-}
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
deleted file mode 100644
index 9e138d00ad36..000000000000
--- a/arch/x86/oprofile/init.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * @file init.c
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- */
-
-#include <linux/oprofile.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-
-/*
- * We support CPUs that have performance counters like the Pentium Pro
- * with the NMI mode driver.
- */
-
-#ifdef CONFIG_X86_LOCAL_APIC
-extern int op_nmi_init(struct oprofile_operations *ops);
-extern void op_nmi_exit(void);
-#else
-static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
-static void op_nmi_exit(void) { }
-#endif
-
-extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-int __init oprofile_arch_init(struct oprofile_operations *ops)
-{
-	ops->backtrace = x86_backtrace;
-	return op_nmi_init(ops);
-}
-
-void oprofile_arch_exit(void)
-{
-	op_nmi_exit();
-}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
deleted file mode 100644
index a7a7677265b6..000000000000
--- a/arch/x86/oprofile/nmi_int.c
+++ /dev/null
@@ -1,780 +0,0 @@
-/**
- * @file nmi_int.c
- *
- * @remark Copyright 2002-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- * @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf <barry.kasindorf@amd.com>
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- */
-
-#include <linux/init.h>
-#include <linux/notifier.h>
-#include <linux/smp.h>
-#include <linux/oprofile.h>
-#include <linux/syscore_ops.h>
-#include <linux/slab.h>
-#include <linux/moduleparam.h>
-#include <linux/kdebug.h>
-#include <linux/cpu.h>
-#include <asm/nmi.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-
-#include "op_counter.h"
-#include "op_x86_model.h"
-
-static struct op_x86_model_spec *model;
-static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
-static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
-
-/* must be protected with get_online_cpus()/put_online_cpus(): */
-static int nmi_enabled;
-static int ctr_running;
-
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
-/* common functions */
-
-u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
-		    struct op_counter_config *counter_config)
-{
-	u64 val = 0;
-	u16 event = (u16)counter_config->event;
-
-	val |= ARCH_PERFMON_EVENTSEL_INT;
-	val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
-	val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
-	val |= (counter_config->unit_mask & 0xFF) << 8;
-	counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV |
-				  ARCH_PERFMON_EVENTSEL_EDGE |
-				  ARCH_PERFMON_EVENTSEL_CMASK);
-	val |= counter_config->extra;
-	event &= model->event_mask ? model->event_mask : 0xFF;
-	val |= event & 0xFF;
-	val |= (u64)(event & 0x0F00) << 24;
-
-	return val;
-}
-
-
-static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
-{
-	if (ctr_running)
-		model->check_ctrs(regs, this_cpu_ptr(&cpu_msrs));
-	else if (!nmi_enabled)
-		return NMI_DONE;
-	else
-		model->stop(this_cpu_ptr(&cpu_msrs));
-	return NMI_HANDLED;
-}
-
-static void nmi_cpu_save_registers(struct op_msrs *msrs)
-{
-	struct op_msr *counters = msrs->counters;
-	struct op_msr *controls = msrs->controls;
-	unsigned int i;
-
-	for (i = 0; i < model->num_counters; ++i) {
-		if (counters[i].addr)
-			rdmsrl(counters[i].addr, counters[i].saved);
-	}
-
-	for (i = 0; i < model->num_controls; ++i) {
-		if (controls[i].addr)
-			rdmsrl(controls[i].addr, controls[i].saved);
-	}
-}
-
-static void nmi_cpu_start(void *dummy)
-{
-	struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
-	if (!msrs->controls)
-		WARN_ON_ONCE(1);
-	else
-		model->start(msrs);
-}
-
-static int nmi_start(void)
-{
-	get_online_cpus();
-	ctr_running = 1;
-	/* make ctr_running visible to the nmi handler: */
-	smp_mb();
-	on_each_cpu(nmi_cpu_start, NULL, 1);
-	put_online_cpus();
-	return 0;
-}
-
-static void nmi_cpu_stop(void *dummy)
-{
-	struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs);
-	if (!msrs->controls)
-		WARN_ON_ONCE(1);
-	else
-		model->stop(msrs);
-}
-
-static void nmi_stop(void)
-{
-	get_online_cpus();
-	on_each_cpu(nmi_cpu_stop, NULL, 1);
-	ctr_running = 0;
-	put_online_cpus();
-}
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static DEFINE_PER_CPU(int, switch_index);
-
-static inline int has_mux(void)
-{
-	return !!model->switch_ctrl;
-}
-
-inline int op_x86_phys_to_virt(int phys)
-{
-	return __this_cpu_read(switch_index) + phys;
-}
-
-inline int op_x86_virt_to_phys(int virt)
-{
-	return virt % model->num_counters;
-}
-
-static void nmi_shutdown_mux(void)
-{
-	int i;
-
-	if (!has_mux())
-		return;
-
-	for_each_possible_cpu(i) {
-		kfree(per_cpu(cpu_msrs, i).multiplex);
-		per_cpu(cpu_msrs, i).multiplex = NULL;
-		per_cpu(switch_index, i) = 0;
-	}
-}
-
-static int nmi_setup_mux(void)
-{
-	size_t multiplex_size =
-		sizeof(struct op_msr) * model->num_virt_counters;
-	int i;
-
-	if (!has_mux())
-		return 1;
-
-	for_each_possible_cpu(i) {
-		per_cpu(cpu_msrs, i).multiplex =
-			kzalloc(multiplex_size, GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).multiplex)
-			return 0;
-	}
-
-	return 1;
-}
-
-static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
-{
-	int i;
-	struct op_msr *multiplex = msrs->multiplex;
-
-	if (!has_mux())
-		return;
-
-	for (i = 0; i < model->num_virt_counters; ++i) {
-		if (counter_config[i].enabled) {
-			multiplex[i].saved = -(u64)counter_config[i].count;
-		} else {
-			multiplex[i].saved = 0;
-		}
-	}
-
-	per_cpu(switch_index, cpu) = 0;
-}
-
-static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
-{
-	struct op_msr *counters = msrs->counters;
-	struct op_msr *multiplex = msrs->multiplex;
-	int i;
-
-	for (i = 0; i < model->num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (counters[i].addr)
-			rdmsrl(counters[i].addr, multiplex[virt].saved);
-	}
-}
-
-static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
-{
-	struct op_msr *counters = msrs->counters;
-	struct op_msr *multiplex = msrs->multiplex;
-	int i;
-
-	for (i = 0; i < model->num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (counters[i].addr)
-			wrmsrl(counters[i].addr, multiplex[virt].saved);
-	}
-}
-
-static void nmi_cpu_switch(void *dummy)
-{
-	int cpu = smp_processor_id();
-	int si = per_cpu(switch_index, cpu);
-	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
-
-	nmi_cpu_stop(NULL);
-	nmi_cpu_save_mpx_registers(msrs);
-
-	/* move to next set */
-	si += model->num_counters;
-	if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
-		per_cpu(switch_index, cpu) = 0;
-	else
-		per_cpu(switch_index, cpu) = si;
-
-	model->switch_ctrl(model, msrs);
-	nmi_cpu_restore_mpx_registers(msrs);
-
-	nmi_cpu_start(NULL);
-}
-
-
-/*
- * Quick check to see if multiplexing is necessary.
- * The check should be sufficient since counters are used
- * in ordre.
- */
-static int nmi_multiplex_on(void)
-{
-	return counter_config[model->num_counters].count ? 0 : -EINVAL;
-}
-
-static int nmi_switch_event(void)
-{
-	if (!has_mux())
-		return -ENOSYS;		/* not implemented */
-	if (nmi_multiplex_on() < 0)
-		return -EINVAL;		/* not necessary */
-
-	get_online_cpus();
-	if (ctr_running)
-		on_each_cpu(nmi_cpu_switch, NULL, 1);
-	put_online_cpus();
-
-	return 0;
-}
-
-static inline void mux_init(struct oprofile_operations *ops)
-{
-	if (has_mux())
-		ops->switch_events = nmi_switch_event;
-}
-
-static void mux_clone(int cpu)
-{
-	if (!has_mux())
-		return;
-
-	memcpy(per_cpu(cpu_msrs, cpu).multiplex,
-	       per_cpu(cpu_msrs, 0).multiplex,
-	       sizeof(struct op_msr) * model->num_virt_counters);
-}
-
-#else
-
-inline int op_x86_phys_to_virt(int phys) { return phys; }
-inline int op_x86_virt_to_phys(int virt) { return virt; }
-static inline void nmi_shutdown_mux(void) { }
-static inline int nmi_setup_mux(void) { return 1; }
-static inline void
-nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
-static inline void mux_init(struct oprofile_operations *ops) { }
-static void mux_clone(int cpu) { }
-
-#endif
-
-static void free_msrs(void)
-{
-	int i;
-	for_each_possible_cpu(i) {
-		kfree(per_cpu(cpu_msrs, i).counters);
-		per_cpu(cpu_msrs, i).counters = NULL;
-		kfree(per_cpu(cpu_msrs, i).controls);
-		per_cpu(cpu_msrs, i).controls = NULL;
-	}
-	nmi_shutdown_mux();
-}
-
-static int allocate_msrs(void)
-{
-	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
-	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
-
-	int i;
-	for_each_possible_cpu(i) {
-		per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
-							GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).counters)
-			goto fail;
-		per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
-							GFP_KERNEL);
-		if (!per_cpu(cpu_msrs, i).controls)
-			goto fail;
-	}
-
-	if (!nmi_setup_mux())
-		goto fail;
-
-	return 1;
-
-fail:
-	free_msrs();
-	return 0;
-}
-
-static void nmi_cpu_setup(void)
-{
-	int cpu = smp_processor_id();
-	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
-
-	nmi_cpu_save_registers(msrs);
-	raw_spin_lock(&oprofilefs_lock);
-	model->setup_ctrs(model, msrs);
-	nmi_cpu_setup_mux(cpu, msrs);
-	raw_spin_unlock(&oprofilefs_lock);
-	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-}
-
-static void nmi_cpu_restore_registers(struct op_msrs *msrs)
-{
-	struct op_msr *counters = msrs->counters;
-	struct op_msr *controls = msrs->controls;
-	unsigned int i;
-
-	for (i = 0; i < model->num_controls; ++i) {
-		if (controls[i].addr)
-			wrmsrl(controls[i].addr, controls[i].saved);
-	}
-
-	for (i = 0; i < model->num_counters; ++i) {
-		if (counters[i].addr)
-			wrmsrl(counters[i].addr, counters[i].saved);
-	}
-}
-
-static void nmi_cpu_shutdown(void)
-{
-	unsigned int v;
-	int cpu = smp_processor_id();
-	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
-
-	/* restoring APIC_LVTPC can trigger an apic error because the delivery
-	 * mode and vector nr combination can be illegal. That's by design: on
-	 * power on apic lvt contain a zero vector nr which are legal only for
-	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
-	 */
-	v = apic_read(APIC_LVTERR);
-	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
-	apic_write(APIC_LVTERR, v);
-	nmi_cpu_restore_registers(msrs);
-}
-
-static int nmi_cpu_online(unsigned int cpu)
-{
-	local_irq_disable();
-	if (nmi_enabled)
-		nmi_cpu_setup();
-	if (ctr_running)
-		nmi_cpu_start(NULL);
-	local_irq_enable();
-	return 0;
-}
-
-static int nmi_cpu_down_prep(unsigned int cpu)
-{
-	local_irq_disable();
-	if (ctr_running)
-		nmi_cpu_stop(NULL);
-	if (nmi_enabled)
-		nmi_cpu_shutdown();
-	local_irq_enable();
-	return 0;
-}
-
-static int nmi_create_files(struct dentry *root)
-{
-	unsigned int i;
-
-	for (i = 0; i < model->num_virt_counters; ++i) {
-		struct dentry *dir;
-		char buf[4];
-
-		/* quick little hack to _not_ expose a counter if it is not
-		 * available for use.  This should protect userspace app.
-		 * NOTE:  assumes 1:1 mapping here (that counters are organized
-		 *        sequentially in their struct assignment).
-		 */
-		if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
-			continue;
-
-		snprintf(buf,  sizeof(buf), "%d", i);
-		dir = oprofilefs_mkdir(root, buf);
-		oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled);
-		oprofilefs_create_ulong(dir, "event", &counter_config[i].event);
-		oprofilefs_create_ulong(dir, "count", &counter_config[i].count);
-		oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask);
-		oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel);
-		oprofilefs_create_ulong(dir, "user", &counter_config[i].user);
-		oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra);
-	}
-
-	return 0;
-}
-
-static enum cpuhp_state cpuhp_nmi_online;
-
-static int nmi_setup(void)
-{
-	int err = 0;
-	int cpu;
-
-	if (!allocate_msrs())
-		return -ENOMEM;
-
-	/* We need to serialize save and setup for HT because the subset
-	 * of msrs are distinct for save and setup operations
-	 */
-
-	/* Assume saved/restored counters are the same on all CPUs */
-	err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
-	if (err)
-		goto fail;
-
-	for_each_possible_cpu(cpu) {
-		if (!IS_ENABLED(CONFIG_SMP) || !cpu)
-			continue;
-
-		memcpy(per_cpu(cpu_msrs, cpu).counters,
-		       per_cpu(cpu_msrs, 0).counters,
-		       sizeof(struct op_msr) * model->num_counters);
-
-		memcpy(per_cpu(cpu_msrs, cpu).controls,
-		       per_cpu(cpu_msrs, 0).controls,
-		       sizeof(struct op_msr) * model->num_controls);
-
-		mux_clone(cpu);
-	}
-
-	nmi_enabled = 0;
-	ctr_running = 0;
-	/* make variables visible to the nmi handler: */
-	smp_mb();
-	err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
-					0, "oprofile");
-	if (err)
-		goto fail;
-
-	nmi_enabled = 1;
-	/* make nmi_enabled visible to the nmi handler: */
-	smp_mb();
-	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/oprofile:online",
-				nmi_cpu_online, nmi_cpu_down_prep);
-	if (err < 0)
-		goto fail_nmi;
-	cpuhp_nmi_online = err;
-	return 0;
-fail_nmi:
-	unregister_nmi_handler(NMI_LOCAL, "oprofile");
-fail:
-	free_msrs();
-	return err;
-}
-
-static void nmi_shutdown(void)
-{
-	struct op_msrs *msrs;
-
-	cpuhp_remove_state(cpuhp_nmi_online);
-	nmi_enabled = 0;
-	ctr_running = 0;
-
-	/* make variables visible to the nmi handler: */
-	smp_mb();
-	unregister_nmi_handler(NMI_LOCAL, "oprofile");
-	msrs = &get_cpu_var(cpu_msrs);
-	model->shutdown(msrs);
-	free_msrs();
-	put_cpu_var(cpu_msrs);
-}
-
-#ifdef CONFIG_PM
-
-static int nmi_suspend(void)
-{
-	/* Only one CPU left, just stop that one */
-	if (nmi_enabled == 1)
-		nmi_cpu_stop(NULL);
-	return 0;
-}
-
-static void nmi_resume(void)
-{
-	if (nmi_enabled == 1)
-		nmi_cpu_start(NULL);
-}
-
-static struct syscore_ops oprofile_syscore_ops = {
-	.resume		= nmi_resume,
-	.suspend	= nmi_suspend,
-};
-
-static void __init init_suspend_resume(void)
-{
-	register_syscore_ops(&oprofile_syscore_ops);
-}
-
-static void exit_suspend_resume(void)
-{
-	unregister_syscore_ops(&oprofile_syscore_ops);
-}
-
-#else
-
-static inline void init_suspend_resume(void) { }
-static inline void exit_suspend_resume(void) { }
-
-#endif /* CONFIG_PM */
-
-static int __init p4_init(char **cpu_type)
-{
-	__u8 cpu_model = boot_cpu_data.x86_model;
-
-	if (cpu_model > 6 || cpu_model == 5)
-		return 0;
-
-#ifndef CONFIG_SMP
-	*cpu_type = "i386/p4";
-	model = &op_p4_spec;
-	return 1;
-#else
-	switch (smp_num_siblings) {
-	case 1:
-		*cpu_type = "i386/p4";
-		model = &op_p4_spec;
-		return 1;
-
-	case 2:
-		*cpu_type = "i386/p4-ht";
-		model = &op_p4_ht2_spec;
-		return 1;
-	}
-#endif
-
-	printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
-	printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
-	return 0;
-}
-
-enum __force_cpu_type {
-	reserved = 0,		/* do not force */
-	timer,
-	arch_perfmon,
-};
-
-static int force_cpu_type;
-
-static int set_cpu_type(const char *str, const struct kernel_param *kp)
-{
-	if (!strcmp(str, "timer")) {
-		force_cpu_type = timer;
-		printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
-	} else if (!strcmp(str, "arch_perfmon")) {
-		force_cpu_type = arch_perfmon;
-		printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
-	} else {
-		force_cpu_type = 0;
-	}
-
-	return 0;
-}
-module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
-
-static int __init ppro_init(char **cpu_type)
-{
-	__u8 cpu_model = boot_cpu_data.x86_model;
-	struct op_x86_model_spec *spec = &op_ppro_spec;	/* default */
-
-	if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
-		return 0;
-
-	/*
-	 * Documentation on identifying Intel processors by CPU family
-	 * and model can be found in the Intel Software Developer's
-	 * Manuals (SDM):
-	 *
-	 *  http://www.intel.com/products/processor/manuals/
-	 *
-	 * As of May 2010 the documentation for this was in the:
-	 * "Intel 64 and IA-32 Architectures Software Developer's
-	 * Manual Volume 3B: System Programming Guide", "Table B-1
-	 * CPUID Signature Values of DisplayFamily_DisplayModel".
-	 */
-	switch (cpu_model) {
-	case 0 ... 2:
-		*cpu_type = "i386/ppro";
-		break;
-	case 3 ... 5:
-		*cpu_type = "i386/pii";
-		break;
-	case 6 ... 8:
-	case 10 ... 11:
-		*cpu_type = "i386/piii";
-		break;
-	case 9:
-	case 13:
-		*cpu_type = "i386/p6_mobile";
-		break;
-	case 14:
-		*cpu_type = "i386/core";
-		break;
-	case 0x0f:
-	case 0x16:
-	case 0x17:
-	case 0x1d:
-		*cpu_type = "i386/core_2";
-		break;
-	case 0x1a:
-	case 0x1e:
-	case 0x2e:
-		spec = &op_arch_perfmon_spec;
-		*cpu_type = "i386/core_i7";
-		break;
-	case 0x1c:
-		*cpu_type = "i386/atom";
-		break;
-	default:
-		/* Unknown */
-		return 0;
-	}
-
-	model = spec;
-	return 1;
-}
-
-int __init op_nmi_init(struct oprofile_operations *ops)
-{
-	__u8 vendor = boot_cpu_data.x86_vendor;
-	__u8 family = boot_cpu_data.x86;
-	char *cpu_type = NULL;
-	int ret = 0;
-
-	if (!boot_cpu_has(X86_FEATURE_APIC))
-		return -ENODEV;
-
-	if (force_cpu_type == timer)
-		return -ENODEV;
-
-	switch (vendor) {
-	case X86_VENDOR_AMD:
-		/* Needs to be at least an Athlon (or hammer in 32bit mode) */
-
-		switch (family) {
-		case 6:
-			cpu_type = "i386/athlon";
-			break;
-		case 0xf:
-			/*
-			 * Actually it could be i386/hammer too, but
-			 * give user space an consistent name.
-			 */
-			cpu_type = "x86-64/hammer";
-			break;
-		case 0x10:
-			cpu_type = "x86-64/family10";
-			break;
-		case 0x11:
-			cpu_type = "x86-64/family11h";
-			break;
-		case 0x12:
-			cpu_type = "x86-64/family12h";
-			break;
-		case 0x14:
-			cpu_type = "x86-64/family14h";
-			break;
-		case 0x15:
-			cpu_type = "x86-64/family15h";
-			break;
-		default:
-			return -ENODEV;
-		}
-		model = &op_amd_spec;
-		break;
-
-	case X86_VENDOR_INTEL:
-		switch (family) {
-			/* Pentium IV */
-		case 0xf:
-			p4_init(&cpu_type);
-			break;
-
-			/* A P6-class processor */
-		case 6:
-			ppro_init(&cpu_type);
-			break;
-
-		default:
-			break;
-		}
-
-		if (cpu_type)
-			break;
-
-		if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
-			return -ENODEV;
-
-		/* use arch perfmon as fallback */
-		cpu_type = "i386/arch_perfmon";
-		model = &op_arch_perfmon_spec;
-		break;
-
-	default:
-		return -ENODEV;
-	}
-
-	/* default values, can be overwritten by model */
-	ops->create_files	= nmi_create_files;
-	ops->setup		= nmi_setup;
-	ops->shutdown		= nmi_shutdown;
-	ops->start		= nmi_start;
-	ops->stop		= nmi_stop;
-	ops->cpu_type		= cpu_type;
-
-	if (model->init)
-		ret = model->init(ops);
-	if (ret)
-		return ret;
-
-	if (!model->num_virt_counters)
-		model->num_virt_counters = model->num_counters;
-
-	mux_init(ops);
-
-	init_suspend_resume();
-
-	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
-	return 0;
-}
-
-void op_nmi_exit(void)
-{
-	exit_suspend_resume();
-}
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
deleted file mode 100644
index 0b7b7b179cbe..000000000000
--- a/arch/x86/oprofile/op_counter.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * @file op_counter.h
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- */
-
-#ifndef OP_COUNTER_H
-#define OP_COUNTER_H
-
-#define OP_MAX_COUNTER 32
-
-/* Per-perfctr configuration as set via
- * oprofilefs.
- */
-struct op_counter_config {
-	unsigned long count;
-	unsigned long enabled;
-	unsigned long event;
-	unsigned long kernel;
-	unsigned long user;
-	unsigned long unit_mask;
-	unsigned long extra;
-};
-
-extern struct op_counter_config counter_config[];
-
-#endif /* OP_COUNTER_H */
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
deleted file mode 100644
index 660a83c8287b..000000000000
--- a/arch/x86/oprofile/op_model_amd.c
+++ /dev/null
@@ -1,542 +0,0 @@
-/*
- * @file op_model_amd.c
- * athlon / K7 / K8 / Family 10h model-specific MSR operations
- *
- * @remark Copyright 2002-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @author Graydon Hoare
- * @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf <barry.kasindorf@amd.com>
- * @author Jason Yeh <jason.yeh@amd.com>
- * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
- */
-
-#include <linux/oprofile.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/percpu.h>
-
-#include <asm/ptrace.h>
-#include <asm/msr.h>
-#include <asm/nmi.h>
-#include <asm/apic.h>
-#include <asm/processor.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-#define NUM_VIRT_COUNTERS	32
-#else
-#define NUM_VIRT_COUNTERS	0
-#endif
-
-#define OP_EVENT_MASK			0x0FFF
-#define OP_CTR_OVERFLOW			(1ULL<<31)
-
-#define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
-
-static int num_counters;
-static unsigned long reset_value[OP_MAX_COUNTER];
-
-#define IBS_FETCH_SIZE			6
-#define IBS_OP_SIZE			12
-
-static u32 ibs_caps;
-
-struct ibs_config {
-	unsigned long op_enabled;
-	unsigned long fetch_enabled;
-	unsigned long max_cnt_fetch;
-	unsigned long max_cnt_op;
-	unsigned long rand_en;
-	unsigned long dispatched_ops;
-	unsigned long branch_target;
-};
-
-struct ibs_state {
-	u64		ibs_op_ctl;
-	int		branch_target;
-	unsigned long	sample_size;
-};
-
-static struct ibs_config ibs_config;
-static struct ibs_state ibs_state;
-
-/*
- * IBS randomization macros
- */
-#define IBS_RANDOM_BITS			12
-#define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
-#define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
-
-/*
- * 16-bit Linear Feedback Shift Register (LFSR)
- *
- *                       16   14   13    11
- * Feedback polynomial = X  + X  + X  +  X  + 1
- */
-static unsigned int lfsr_random(void)
-{
-	static unsigned int lfsr_value = 0xF00D;
-	unsigned int bit;
-
-	/* Compute next bit to shift in */
-	bit = ((lfsr_value >> 0) ^
-	       (lfsr_value >> 2) ^
-	       (lfsr_value >> 3) ^
-	       (lfsr_value >> 5)) & 0x0001;
-
-	/* Advance to next register value */
-	lfsr_value = (lfsr_value >> 1) | (bit << 15);
-
-	return lfsr_value;
-}
-
-/*
- * IBS software randomization
- *
- * The IBS periodic op counter is randomized in software. The lower 12
- * bits of the 20 bit counter are randomized. IbsOpCurCnt is
- * initialized with a 12 bit random value.
- */
-static inline u64 op_amd_randomize_ibs_op(u64 val)
-{
-	unsigned int random = lfsr_random();
-
-	if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
-		/*
-		 * Work around if the hw can not write to IbsOpCurCnt
-		 *
-		 * Randomize the lower 8 bits of the 16 bit
-		 * IbsOpMaxCnt [15:0] value in the range of -128 to
-		 * +127 by adding/subtracting an offset to the
-		 * maximum count (IbsOpMaxCnt).
-		 *
-		 * To avoid over or underflows and protect upper bits
-		 * starting at bit 16, the initial value for
-		 * IbsOpMaxCnt must fit in the range from 0x0081 to
-		 * 0xff80.
-		 */
-		val += (s8)(random >> 4);
-	else
-		val |= (u64)(random & IBS_RANDOM_MASK) << 32;
-
-	return val;
-}
-
-static inline void
-op_amd_handle_ibs(struct pt_regs * const regs,
-		  struct op_msrs const * const msrs)
-{
-	u64 val, ctl;
-	struct op_entry entry;
-
-	if (!ibs_caps)
-		return;
-
-	if (ibs_config.fetch_enabled) {
-		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
-		if (ctl & IBS_FETCH_VAL) {
-			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
-			oprofile_write_reserve(&entry, regs, val,
-					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
-			oprofile_add_data64(&entry, val);
-			oprofile_add_data64(&entry, ctl);
-			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
-			oprofile_add_data64(&entry, val);
-			oprofile_write_commit(&entry);
-
-			/* reenable the IRQ */
-			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
-			ctl |= IBS_FETCH_ENABLE;
-			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
-		}
-	}
-
-	if (ibs_config.op_enabled) {
-		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
-		if (ctl & IBS_OP_VAL) {
-			rdmsrl(MSR_AMD64_IBSOPRIP, val);
-			oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
-					       ibs_state.sample_size);
-			oprofile_add_data64(&entry, val);
-			rdmsrl(MSR_AMD64_IBSOPDATA, val);
-			oprofile_add_data64(&entry, val);
-			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
-			oprofile_add_data64(&entry, val);
-			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
-			oprofile_add_data64(&entry, val);
-			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
-			oprofile_add_data64(&entry, val);
-			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
-			oprofile_add_data64(&entry, val);
-			if (ibs_state.branch_target) {
-				rdmsrl(MSR_AMD64_IBSBRTARGET, val);
-				oprofile_add_data(&entry, (unsigned long)val);
-			}
-			oprofile_write_commit(&entry);
-
-			/* reenable the IRQ */
-			ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
-			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
-		}
-	}
-}
-
-static inline void op_amd_start_ibs(void)
-{
-	u64 val;
-
-	if (!ibs_caps)
-		return;
-
-	memset(&ibs_state, 0, sizeof(ibs_state));
-
-	/*
-	 * Note: Since the max count settings may out of range we
-	 * write back the actual used values so that userland can read
-	 * it.
-	 */
-
-	if (ibs_config.fetch_enabled) {
-		val = ibs_config.max_cnt_fetch >> 4;
-		val = min(val, IBS_FETCH_MAX_CNT);
-		ibs_config.max_cnt_fetch = val << 4;
-		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
-		val |= IBS_FETCH_ENABLE;
-		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
-	}
-
-	if (ibs_config.op_enabled) {
-		val = ibs_config.max_cnt_op >> 4;
-		if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
-			/*
-			 * IbsOpCurCnt not supported.  See
-			 * op_amd_randomize_ibs_op() for details.
-			 */
-			val = clamp(val, 0x0081ULL, 0xFF80ULL);
-			ibs_config.max_cnt_op = val << 4;
-		} else {
-			/*
-			 * The start value is randomized with a
-			 * positive offset, we need to compensate it
-			 * with the half of the randomized range. Also
-			 * avoid underflows.
-			 */
-			val += IBS_RANDOM_MAXCNT_OFFSET;
-			if (ibs_caps & IBS_CAPS_OPCNTEXT)
-				val = min(val, IBS_OP_MAX_CNT_EXT);
-			else
-				val = min(val, IBS_OP_MAX_CNT);
-			ibs_config.max_cnt_op =
-				(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
-		}
-		val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
-		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
-		val |= IBS_OP_ENABLE;
-		ibs_state.ibs_op_ctl = val;
-		ibs_state.sample_size = IBS_OP_SIZE;
-		if (ibs_config.branch_target) {
-			ibs_state.branch_target = 1;
-			ibs_state.sample_size++;
-		}
-		val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
-		wrmsrl(MSR_AMD64_IBSOPCTL, val);
-	}
-}
-
-static void op_amd_stop_ibs(void)
-{
-	if (!ibs_caps)
-		return;
-
-	if (ibs_config.fetch_enabled)
-		/* clear max count and enable */
-		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
-
-	if (ibs_config.op_enabled)
-		/* clear max count and enable */
-		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
-}
-
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-
-static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
-			       struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	/* enable active counters */
-	for (i = 0; i < num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (!reset_value[virt])
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		val &= model->reserved;
-		val |= op_x86_get_ctrl(model, &counter_config[virt]);
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-}
-
-#endif
-
-/* functions for op_amd_spec */
-
-static void op_amd_shutdown(struct op_msrs const * const msrs)
-{
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!msrs->counters[i].addr)
-			continue;
-		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-	}
-}
-
-static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
-{
-	int i;
-
-	for (i = 0; i < num_counters; i++) {
-		if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
-			goto fail;
-		if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
-			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-			goto fail;
-		}
-		/* both registers must be reserved */
-		if (num_counters == AMD64_NUM_COUNTERS_CORE) {
-			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
-			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
-		} else {
-			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
-			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-		}
-		continue;
-	fail:
-		if (!counter_config[i].enabled)
-			continue;
-		op_x86_warn_reserved(i);
-		op_amd_shutdown(msrs);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
-			      struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	/* setup reset_value */
-	for (i = 0; i < OP_MAX_COUNTER; ++i) {
-		if (counter_config[i].enabled
-		    && msrs->counters[op_x86_virt_to_phys(i)].addr)
-			reset_value[i] = counter_config[i].count;
-		else
-			reset_value[i] = 0;
-	}
-
-	/* clear all counters */
-	for (i = 0; i < num_counters; ++i) {
-		if (!msrs->controls[i].addr)
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
-			op_x86_warn_in_use(i);
-		val &= model->reserved;
-		wrmsrl(msrs->controls[i].addr, val);
-		/*
-		 * avoid a false detection of ctr overflows in NMI
-		 * handler
-		 */
-		wrmsrl(msrs->counters[i].addr, -1LL);
-	}
-
-	/* enable active counters */
-	for (i = 0; i < num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (!reset_value[virt])
-			continue;
-
-		/* setup counter registers */
-		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
-
-		/* setup control registers */
-		rdmsrl(msrs->controls[i].addr, val);
-		val &= model->reserved;
-		val |= op_x86_get_ctrl(model, &counter_config[virt]);
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-}
-
-static int op_amd_check_ctrs(struct pt_regs * const regs,
-			     struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		int virt = op_x86_phys_to_virt(i);
-		if (!reset_value[virt])
-			continue;
-		rdmsrl(msrs->counters[i].addr, val);
-		/* bit is clear if overflowed: */
-		if (val & OP_CTR_OVERFLOW)
-			continue;
-		oprofile_add_sample(regs, virt);
-		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
-	}
-
-	op_amd_handle_ibs(regs, msrs);
-
-	/* See op_model_ppro.c */
-	return 1;
-}
-
-static void op_amd_start(struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[op_x86_phys_to_virt(i)])
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-
-	op_amd_start_ibs();
-}
-
-static void op_amd_stop(struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	/*
-	 * Subtle: stop on all counters to avoid race with setting our
-	 * pm callback
-	 */
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[op_x86_phys_to_virt(i)])
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-
-	op_amd_stop_ibs();
-}
-
-/*
- * check and reserve APIC extended interrupt LVT offset for IBS if
- * available
- */
-
-static void init_ibs(void)
-{
-	ibs_caps = get_ibs_caps();
-
-	if (!ibs_caps)
-		return;
-
-	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
-}
-
-static int (*create_arch_files)(struct dentry *root);
-
-static int setup_ibs_files(struct dentry *root)
-{
-	struct dentry *dir;
-	int ret = 0;
-
-	/* architecture specific files */
-	if (create_arch_files)
-		ret = create_arch_files(root);
-
-	if (ret)
-		return ret;
-
-	if (!ibs_caps)
-		return ret;
-
-	/* model specific files */
-
-	/* setup some reasonable defaults */
-	memset(&ibs_config, 0, sizeof(ibs_config));
-	ibs_config.max_cnt_fetch = 250000;
-	ibs_config.max_cnt_op = 250000;
-
-	if (ibs_caps & IBS_CAPS_FETCHSAM) {
-		dir = oprofilefs_mkdir(root, "ibs_fetch");
-		oprofilefs_create_ulong(dir, "enable",
-					&ibs_config.fetch_enabled);
-		oprofilefs_create_ulong(dir, "max_count",
-					&ibs_config.max_cnt_fetch);
-		oprofilefs_create_ulong(dir, "rand_enable",
-					&ibs_config.rand_en);
-	}
-
-	if (ibs_caps & IBS_CAPS_OPSAM) {
-		dir = oprofilefs_mkdir(root, "ibs_op");
-		oprofilefs_create_ulong(dir, "enable",
-					&ibs_config.op_enabled);
-		oprofilefs_create_ulong(dir, "max_count",
-					&ibs_config.max_cnt_op);
-		if (ibs_caps & IBS_CAPS_OPCNT)
-			oprofilefs_create_ulong(dir, "dispatched_ops",
-						&ibs_config.dispatched_ops);
-		if (ibs_caps & IBS_CAPS_BRNTRGT)
-			oprofilefs_create_ulong(dir, "branch_target",
-						&ibs_config.branch_target);
-	}
-
-	return 0;
-}
-
-struct op_x86_model_spec op_amd_spec;
-
-static int op_amd_init(struct oprofile_operations *ops)
-{
-	init_ibs();
-	create_arch_files = ops->create_files;
-	ops->create_files = setup_ibs_files;
-
-	if (boot_cpu_data.x86 == 0x15) {
-		num_counters = AMD64_NUM_COUNTERS_CORE;
-	} else {
-		num_counters = AMD64_NUM_COUNTERS;
-	}
-
-	op_amd_spec.num_counters = num_counters;
-	op_amd_spec.num_controls = num_counters;
-	op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
-
-	return 0;
-}
-
-struct op_x86_model_spec op_amd_spec = {
-	/* num_counters/num_controls filled in at runtime */
-	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
-	.event_mask		= OP_EVENT_MASK,
-	.init			= op_amd_init,
-	.fill_in_addresses	= &op_amd_fill_in_addresses,
-	.setup_ctrs		= &op_amd_setup_ctrs,
-	.check_ctrs		= &op_amd_check_ctrs,
-	.start			= &op_amd_start,
-	.stop			= &op_amd_stop,
-	.shutdown		= &op_amd_shutdown,
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	.switch_ctrl		= &op_mux_switch_ctrl,
-#endif
-};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
deleted file mode 100644
index ad1d91f475ab..000000000000
--- a/arch/x86/oprofile/op_model_p4.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/**
- * @file op_model_p4.c
- * P4 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Graydon Hoare
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <linux/ptrace.h>
-#include <asm/nmi.h>
-#include <asm/msr.h>
-#include <asm/fixmap.h>
-#include <asm/apic.h>
-
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-#define NUM_EVENTS 39
-
-#define NUM_COUNTERS_NON_HT 8
-#define NUM_ESCRS_NON_HT 45
-#define NUM_CCCRS_NON_HT 18
-#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
-
-#define NUM_COUNTERS_HT2 4
-#define NUM_ESCRS_HT2 23
-#define NUM_CCCRS_HT2 9
-#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
-
-#define OP_CTR_OVERFLOW			(1ULL<<31)
-
-static unsigned int num_counters = NUM_COUNTERS_NON_HT;
-static unsigned int num_controls = NUM_CONTROLS_NON_HT;
-
-/* this has to be checked dynamically since the
-   hyper-threadedness of a chip is discovered at
-   kernel boot-time. */
-static inline void setup_num_counters(void)
-{
-#ifdef CONFIG_SMP
-	if (smp_num_siblings == 2) {
-		num_counters = NUM_COUNTERS_HT2;
-		num_controls = NUM_CONTROLS_HT2;
-	}
-#endif
-}
-
-static inline int addr_increment(void)
-{
-#ifdef CONFIG_SMP
-	return smp_num_siblings == 2 ? 2 : 1;
-#else
-	return 1;
-#endif
-}
-
-
-/* tables to simulate simplified hardware view of p4 registers */
-struct p4_counter_binding {
-	int virt_counter;
-	int counter_address;
-	int cccr_address;
-};
-
-struct p4_event_binding {
-	int escr_select;  /* value to put in CCCR */
-	int event_select; /* value to put in ESCR */
-	struct {
-		int virt_counter; /* for this counter... */
-		int escr_address; /* use this ESCR       */
-	} bindings[2];
-};
-
-/* nb: these CTR_* defines are a duplicate of defines in
-   event/i386.p4*events. */
-
-
-#define CTR_BPU_0      (1 << 0)
-#define CTR_MS_0       (1 << 1)
-#define CTR_FLAME_0    (1 << 2)
-#define CTR_IQ_4       (1 << 3)
-#define CTR_BPU_2      (1 << 4)
-#define CTR_MS_2       (1 << 5)
-#define CTR_FLAME_2    (1 << 6)
-#define CTR_IQ_5       (1 << 7)
-
-static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
-	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
-	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
-	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
-	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
-	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
-	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
-	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
-	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
-};
-
-#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
-
-/* p4 event codes in libop/op_event.h are indices into this table. */
-
-static struct p4_event_binding p4_events[NUM_EVENTS] = {
-
-	{ /* BRANCH_RETIRED */
-		0x05, 0x06,
-		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* MISPRED_BRANCH_RETIRED */
-		0x04, 0x03,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
-	},
-
-	{ /* TC_DELIVER_MODE */
-		0x01, 0x01,
-		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
-		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
-	},
-
-	{ /* BPU_FETCH_REQUEST */
-		0x00, 0x03,
-		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
-		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
-	},
-
-	{ /* ITLB_REFERENCE */
-		0x03, 0x18,
-		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
-		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
-	},
-
-	{ /* MEMORY_CANCEL */
-		0x05, 0x02,
-		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
-	},
-
-	{ /* MEMORY_COMPLETE */
-		0x02, 0x08,
-		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
-	},
-
-	{ /* LOAD_PORT_REPLAY */
-		0x02, 0x04,
-		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
-	},
-
-	{ /* STORE_PORT_REPLAY */
-		0x02, 0x05,
-		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
-	},
-
-	{ /* MOB_LOAD_REPLAY */
-		0x02, 0x03,
-		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
-		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
-	},
-
-	{ /* PAGE_WALK_TYPE */
-		0x04, 0x01,
-		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
-		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
-	},
-
-	{ /* BSQ_CACHE_REFERENCE */
-		0x07, 0x0c,
-		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
-		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
-	},
-
-	{ /* IOQ_ALLOCATION */
-		0x06, 0x03,
-		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
-		  { 0, 0 } }
-	},
-
-	{ /* IOQ_ACTIVE_ENTRIES */
-		0x06, 0x1a,
-		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
-		  { 0, 0 } }
-	},
-
-	{ /* FSB_DATA_ACTIVITY */
-		0x06, 0x17,
-		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
-		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
-	},
-
-	{ /* BSQ_ALLOCATION */
-		0x07, 0x05,
-		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
-		  { 0, 0 } }
-	},
-
-	{ /* BSQ_ACTIVE_ENTRIES */
-		0x07, 0x06,
-		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
-		  { 0, 0 } }
-	},
-
-	{ /* X87_ASSIST */
-		0x05, 0x03,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* SSE_INPUT_ASSIST */
-		0x01, 0x34,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* PACKED_SP_UOP */
-		0x01, 0x08,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* PACKED_DP_UOP */
-		0x01, 0x0c,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* SCALAR_SP_UOP */
-		0x01, 0x0a,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* SCALAR_DP_UOP */
-		0x01, 0x0e,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* 64BIT_MMX_UOP */
-		0x01, 0x02,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* 128BIT_MMX_UOP */
-		0x01, 0x1a,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* X87_FP_UOP */
-		0x01, 0x04,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* X87_SIMD_MOVES_UOP */
-		0x01, 0x2e,
-		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
-		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
-	},
-
-	{ /* MACHINE_CLEAR */
-		0x05, 0x02,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* GLOBAL_POWER_EVENTS */
-		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
-		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
-		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
-	},
-
-	{ /* TC_MS_XFER */
-		0x00, 0x05,
-		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
-		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
-	},
-
-	{ /* UOP_QUEUE_WRITES */
-		0x00, 0x09,
-		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
-		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
-	},
-
-	{ /* FRONT_END_EVENT */
-		0x05, 0x08,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* EXECUTION_EVENT */
-		0x05, 0x0c,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* REPLAY_EVENT */
-		0x05, 0x09,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
-	},
-
-	{ /* INSTR_RETIRED */
-		0x04, 0x02,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
-	},
-
-	{ /* UOPS_RETIRED */
-		0x04, 0x01,
-		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
-		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
-	},
-
-	{ /* UOP_TYPE */
-		0x02, 0x02,
-		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
-		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
-	},
-
-	{ /* RETIRED_MISPRED_BRANCH_TYPE */
-		0x02, 0x05,
-		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
-		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
-	},
-
-	{ /* RETIRED_BRANCH_TYPE */
-		0x02, 0x04,
-		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
-		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
-	}
-};
-
-
-#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
-
-#define ESCR_RESERVED_BITS 0x80000003
-#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
-#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
-#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
-#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
-#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
-#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
-#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-
-#define CCCR_RESERVED_BITS 0x38030FFF
-#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
-#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
-#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
-#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
-#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
-#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
-#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
-#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
-
-
-/* this assigns a "stagger" to the current CPU, which is used throughout
-   the code in this module as an extra array offset, to select the "even"
-   or "odd" part of all the divided resources. */
-static unsigned int get_stagger(void)
-{
-#ifdef CONFIG_SMP
-	int cpu = smp_processor_id();
-	return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
-#endif
-	return 0;
-}
-
-
-/* finally, mediate access to a real hardware counter
-   by passing a "virtual" counter numer to this macro,
-   along with your stagger setting. */
-#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
-
-static unsigned long reset_value[NUM_COUNTERS_NON_HT];
-
-static void p4_shutdown(struct op_msrs const * const msrs)
-{
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (msrs->counters[i].addr)
-			release_perfctr_nmi(msrs->counters[i].addr);
-	}
-	/*
-	 * some of the control registers are specially reserved in
-	 * conjunction with the counter registers (hence the starting offset).
-	 * This saves a few bits.
-	 */
-	for (i = num_counters; i < num_controls; ++i) {
-		if (msrs->controls[i].addr)
-			release_evntsel_nmi(msrs->controls[i].addr);
-	}
-}
-
-static int p4_fill_in_addresses(struct op_msrs * const msrs)
-{
-	unsigned int i;
-	unsigned int addr, cccraddr, stag;
-
-	setup_num_counters();
-	stag = get_stagger();
-
-	/* the counter & cccr registers we pay attention to */
-	for (i = 0; i < num_counters; ++i) {
-		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
-		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
-		if (reserve_perfctr_nmi(addr)) {
-			msrs->counters[i].addr = addr;
-			msrs->controls[i].addr = cccraddr;
-		}
-	}
-
-	/* 43 ESCR registers in three or four discontiguous group */
-	for (addr = MSR_P4_BSU_ESCR0 + stag;
-	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
-		if (reserve_evntsel_nmi(addr))
-			msrs->controls[i].addr = addr;
-	}
-
-	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
-	 * to avoid special case in nmi_{save|restore}_registers() */
-	if (boot_cpu_data.x86_model >= 0x3) {
-		for (addr = MSR_P4_BSU_ESCR0 + stag;
-		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
-			if (reserve_evntsel_nmi(addr))
-				msrs->controls[i].addr = addr;
-		}
-	} else {
-		for (addr = MSR_P4_IQ_ESCR0 + stag;
-		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
-			if (reserve_evntsel_nmi(addr))
-				msrs->controls[i].addr = addr;
-		}
-	}
-
-	for (addr = MSR_P4_RAT_ESCR0 + stag;
-	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
-		if (reserve_evntsel_nmi(addr))
-			msrs->controls[i].addr = addr;
-	}
-
-	for (addr = MSR_P4_MS_ESCR0 + stag;
-	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
-		if (reserve_evntsel_nmi(addr))
-			msrs->controls[i].addr = addr;
-	}
-
-	for (addr = MSR_P4_IX_ESCR0 + stag;
-	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
-		if (reserve_evntsel_nmi(addr))
-			msrs->controls[i].addr = addr;
-	}
-
-	/* there are 2 remaining non-contiguously located ESCRs */
-
-	if (num_counters == NUM_COUNTERS_NON_HT) {
-		/* standard non-HT CPUs handle both remaining ESCRs*/
-		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
-			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
-		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
-			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
-
-	} else if (stag == 0) {
-		/* HT CPUs give the first remainder to the even thread, as
-		   the 32nd control register */
-		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
-			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
-
-	} else {
-		/* and two copies of the second to the odd thread,
-		   for the 22st and 23nd control registers */
-		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
-			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
-			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
-		}
-	}
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!counter_config[i].enabled)
-			continue;
-		if (msrs->controls[i].addr)
-			continue;
-		op_x86_warn_reserved(i);
-		p4_shutdown(msrs);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-
-static void pmc_setup_one_p4_counter(unsigned int ctr)
-{
-	int i;
-	int const maxbind = 2;
-	unsigned int cccr = 0;
-	unsigned int escr = 0;
-	unsigned int high = 0;
-	unsigned int counter_bit;
-	struct p4_event_binding *ev = NULL;
-	unsigned int stag;
-
-	stag = get_stagger();
-
-	/* convert from counter *number* to counter *bit* */
-	counter_bit = 1 << VIRT_CTR(stag, ctr);
-
-	/* find our event binding structure. */
-	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
-		printk(KERN_ERR
-		       "oprofile: P4 event code 0x%lx out of range\n",
-		       counter_config[ctr].event);
-		return;
-	}
-
-	ev = &(p4_events[counter_config[ctr].event - 1]);
-
-	for (i = 0; i < maxbind; i++) {
-		if (ev->bindings[i].virt_counter & counter_bit) {
-
-			/* modify ESCR */
-			rdmsr(ev->bindings[i].escr_address, escr, high);
-			ESCR_CLEAR(escr);
-			if (stag == 0) {
-				ESCR_SET_USR_0(escr, counter_config[ctr].user);
-				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
-			} else {
-				ESCR_SET_USR_1(escr, counter_config[ctr].user);
-				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
-			}
-			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
-			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
-			wrmsr(ev->bindings[i].escr_address, escr, high);
-
-			/* modify CCCR */
-			rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
-			      cccr, high);
-			CCCR_CLEAR(cccr);
-			CCCR_SET_REQUIRED_BITS(cccr);
-			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
-			if (stag == 0)
-				CCCR_SET_PMI_OVF_0(cccr);
-			else
-				CCCR_SET_PMI_OVF_1(cccr);
-			wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
-			      cccr, high);
-			return;
-		}
-	}
-
-	printk(KERN_ERR
-	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
-	       counter_config[ctr].event, stag, ctr);
-}
-
-
-static void p4_setup_ctrs(struct op_x86_model_spec const *model,
-			  struct op_msrs const * const msrs)
-{
-	unsigned int i;
-	unsigned int low, high;
-	unsigned int stag;
-
-	stag = get_stagger();
-
-	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-	if (!MISC_PMC_ENABLED_P(low)) {
-		printk(KERN_ERR "oprofile: P4 PMC not available\n");
-		return;
-	}
-
-	/* clear the cccrs we will use */
-	for (i = 0; i < num_counters; i++) {
-		if (unlikely(!msrs->controls[i].addr))
-			continue;
-		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-		CCCR_CLEAR(low);
-		CCCR_SET_REQUIRED_BITS(low);
-		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-	}
-
-	/* clear all escrs (including those outside our concern) */
-	for (i = num_counters; i < num_controls; i++) {
-		if (unlikely(!msrs->controls[i].addr))
-			continue;
-		wrmsr(msrs->controls[i].addr, 0, 0);
-	}
-
-	/* setup all counters */
-	for (i = 0; i < num_counters; ++i) {
-		if (counter_config[i].enabled && msrs->controls[i].addr) {
-			reset_value[i] = counter_config[i].count;
-			pmc_setup_one_p4_counter(i);
-			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
-			       -(u64)counter_config[i].count);
-		} else {
-			reset_value[i] = 0;
-		}
-	}
-}
-
-
-static int p4_check_ctrs(struct pt_regs * const regs,
-			 struct op_msrs const * const msrs)
-{
-	unsigned long ctr, low, high, stag, real;
-	int i;
-
-	stag = get_stagger();
-
-	for (i = 0; i < num_counters; ++i) {
-
-		if (!reset_value[i])
-			continue;
-
-		/*
-		 * there is some eccentricity in the hardware which
-		 * requires that we perform 2 extra corrections:
-		 *
-		 * - check both the CCCR:OVF flag for overflow and the
-		 *   counter high bit for un-flagged overflows.
-		 *
-		 * - write the counter back twice to ensure it gets
-		 *   updated properly.
-		 *
-		 * the former seems to be related to extra NMIs happening
-		 * during the current NMI; the latter is reported as errata
-		 * N15 in intel doc 249199-029, pentium 4 specification
-		 * update, though their suggested work-around does not
-		 * appear to solve the problem.
-		 */
-
-		real = VIRT_CTR(stag, i);
-
-		rdmsr(p4_counters[real].cccr_address, low, high);
-		rdmsr(p4_counters[real].counter_address, ctr, high);
-		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
-			oprofile_add_sample(regs, i);
-			wrmsrl(p4_counters[real].counter_address,
-			       -(u64)reset_value[i]);
-			CCCR_CLEAR_OVF(low);
-			wrmsr(p4_counters[real].cccr_address, low, high);
-			wrmsrl(p4_counters[real].counter_address,
-			       -(u64)reset_value[i]);
-		}
-	}
-
-	/* P4 quirk: you have to re-unmask the apic vector */
-	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
-
-	/* See op_model_ppro.c */
-	return 1;
-}
-
-
-static void p4_start(struct op_msrs const * const msrs)
-{
-	unsigned int low, high, stag;
-	int i;
-
-	stag = get_stagger();
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[i])
-			continue;
-		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-		CCCR_SET_ENABLE(low);
-		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-	}
-}
-
-
-static void p4_stop(struct op_msrs const * const msrs)
-{
-	unsigned int low, high, stag;
-	int i;
-
-	stag = get_stagger();
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[i])
-			continue;
-		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-		CCCR_SET_DISABLE(low);
-		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
-	}
-}
-
-#ifdef CONFIG_SMP
-struct op_x86_model_spec op_p4_ht2_spec = {
-	.num_counters		= NUM_COUNTERS_HT2,
-	.num_controls		= NUM_CONTROLS_HT2,
-	.fill_in_addresses	= &p4_fill_in_addresses,
-	.setup_ctrs		= &p4_setup_ctrs,
-	.check_ctrs		= &p4_check_ctrs,
-	.start			= &p4_start,
-	.stop			= &p4_stop,
-	.shutdown		= &p4_shutdown
-};
-#endif
-
-struct op_x86_model_spec op_p4_spec = {
-	.num_counters		= NUM_COUNTERS_NON_HT,
-	.num_controls		= NUM_CONTROLS_NON_HT,
-	.fill_in_addresses	= &p4_fill_in_addresses,
-	.setup_ctrs		= &p4_setup_ctrs,
-	.check_ctrs		= &p4_check_ctrs,
-	.start			= &p4_start,
-	.stop			= &p4_stop,
-	.shutdown		= &p4_shutdown
-};
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
deleted file mode 100644
index 7913b6921959..000000000000
--- a/arch/x86/oprofile/op_model_ppro.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * @file op_model_ppro.h
- * Family 6 perfmon and architectural perfmon MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Copyright 2008 Intel Corporation
- * @remark Read the file COPYING
- *
- * @author John Levon
- * @author Philippe Elie
- * @author Graydon Hoare
- * @author Andi Kleen
- * @author Robert Richter <robert.richter@amd.com>
- */
-
-#include <linux/oprofile.h>
-#include <linux/slab.h>
-#include <asm/ptrace.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
-#include <asm/nmi.h>
-
-#include "op_x86_model.h"
-#include "op_counter.h"
-
-static int num_counters = 2;
-static int counter_width = 32;
-
-#define MSR_PPRO_EVENTSEL_RESERVED	((0xFFFFFFFFULL<<32)|(1ULL<<21))
-
-static u64 reset_value[OP_MAX_COUNTER];
-
-static void ppro_shutdown(struct op_msrs const * const msrs)
-{
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!msrs->counters[i].addr)
-			continue;
-		release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
-		release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
-	}
-}
-
-static int ppro_fill_in_addresses(struct op_msrs * const msrs)
-{
-	int i;
-
-	for (i = 0; i < num_counters; i++) {
-		if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
-			goto fail;
-		if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) {
-			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
-			goto fail;
-		}
-		/* both registers must be reserved */
-		msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
-		msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
-		continue;
-	fail:
-		if (!counter_config[i].enabled)
-			continue;
-		op_x86_warn_reserved(i);
-		ppro_shutdown(msrs);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-
-static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
-			    struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
-		union cpuid10_eax eax;
-		eax.full = cpuid_eax(0xa);
-
-		/*
-		 * For Core2 (family 6, model 15), don't reset the
-		 * counter width:
-		 */
-		if (!(eax.split.version_id == 0 &&
-			__this_cpu_read(cpu_info.x86) == 6 &&
-				__this_cpu_read(cpu_info.x86_model) == 15)) {
-
-			if (counter_width < eax.split.bit_width)
-				counter_width = eax.split.bit_width;
-		}
-	}
-
-	/* clear all counters */
-	for (i = 0; i < num_counters; ++i) {
-		if (!msrs->controls[i].addr)
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
-			op_x86_warn_in_use(i);
-		val &= model->reserved;
-		wrmsrl(msrs->controls[i].addr, val);
-		/*
-		 * avoid a false detection of ctr overflows in NMI *
-		 * handler
-		 */
-		wrmsrl(msrs->counters[i].addr, -1LL);
-	}
-
-	/* enable active counters */
-	for (i = 0; i < num_counters; ++i) {
-		if (counter_config[i].enabled && msrs->counters[i].addr) {
-			reset_value[i] = counter_config[i].count;
-			wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-			rdmsrl(msrs->controls[i].addr, val);
-			val &= model->reserved;
-			val |= op_x86_get_ctrl(model, &counter_config[i]);
-			wrmsrl(msrs->controls[i].addr, val);
-		} else {
-			reset_value[i] = 0;
-		}
-	}
-}
-
-
-static int ppro_check_ctrs(struct pt_regs * const regs,
-			   struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[i])
-			continue;
-		rdmsrl(msrs->counters[i].addr, val);
-		if (val & (1ULL << (counter_width - 1)))
-			continue;
-		oprofile_add_sample(regs, i);
-		wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-	}
-
-	/* Only P6 based Pentium M need to re-unmask the apic vector but it
-	 * doesn't hurt other P6 variant */
-	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
-
-	/* We can't work out if we really handled an interrupt. We
-	 * might have caught a *second* counter just after overflowing
-	 * the interrupt for this counter then arrives
-	 * and we don't find a counter that's overflowed, so we
-	 * would return 0 and get dazed + confused. Instead we always
-	 * assume we found an overflow. This sucks.
-	 */
-	return 1;
-}
-
-
-static void ppro_start(struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (reset_value[i]) {
-			rdmsrl(msrs->controls[i].addr, val);
-			val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-			wrmsrl(msrs->controls[i].addr, val);
-		}
-	}
-}
-
-
-static void ppro_stop(struct op_msrs const * const msrs)
-{
-	u64 val;
-	int i;
-
-	for (i = 0; i < num_counters; ++i) {
-		if (!reset_value[i])
-			continue;
-		rdmsrl(msrs->controls[i].addr, val);
-		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-		wrmsrl(msrs->controls[i].addr, val);
-	}
-}
-
-struct op_x86_model_spec op_ppro_spec = {
-	.num_counters		= 2,
-	.num_controls		= 2,
-	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
-	.fill_in_addresses	= &ppro_fill_in_addresses,
-	.setup_ctrs		= &ppro_setup_ctrs,
-	.check_ctrs		= &ppro_check_ctrs,
-	.start			= &ppro_start,
-	.stop			= &ppro_stop,
-	.shutdown		= &ppro_shutdown
-};
-
-/*
- * Architectural performance monitoring.
- *
- * Newer Intel CPUs (Core1+) have support for architectural
- * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
- * The advantage of this is that it can be done without knowing about
- * the specific CPU.
- */
-
-static void arch_perfmon_setup_counters(void)
-{
-	union cpuid10_eax eax;
-
-	eax.full = cpuid_eax(0xa);
-
-	/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
-	if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
-	    boot_cpu_data.x86_model == 15) {
-		eax.split.version_id = 2;
-		eax.split.num_counters = 2;
-		eax.split.bit_width = 40;
-	}
-
-	num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
-
-	op_arch_perfmon_spec.num_counters = num_counters;
-	op_arch_perfmon_spec.num_controls = num_counters;
-}
-
-static int arch_perfmon_init(struct oprofile_operations *ignore)
-{
-	arch_perfmon_setup_counters();
-	return 0;
-}
-
-struct op_x86_model_spec op_arch_perfmon_spec = {
-	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
-	.init			= &arch_perfmon_init,
-	/* num_counters/num_controls filled in at runtime */
-	.fill_in_addresses	= &ppro_fill_in_addresses,
-	/* user space does the cpuid check for available events */
-	.setup_ctrs		= &ppro_setup_ctrs,
-	.check_ctrs		= &ppro_check_ctrs,
-	.start			= &ppro_start,
-	.stop			= &ppro_stop,
-	.shutdown		= &ppro_shutdown
-};
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
deleted file mode 100644
index 276cf79b5d24..000000000000
--- a/arch/x86/oprofile/op_x86_model.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * @file op_x86_model.h
- * interface to x86 model-specific MSR operations
- *
- * @remark Copyright 2002 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Graydon Hoare
- * @author Robert Richter <robert.richter@amd.com>
- */
-
-#ifndef OP_X86_MODEL_H
-#define OP_X86_MODEL_H
-
-#include <asm/types.h>
-#include <asm/perf_event.h>
-
-struct op_msr {
-	unsigned long	addr;
-	u64		saved;
-};
-
-struct op_msrs {
-	struct op_msr *counters;
-	struct op_msr *controls;
-	struct op_msr *multiplex;
-};
-
-struct pt_regs;
-
-struct oprofile_operations;
-
-/* The model vtable abstracts the differences between
- * various x86 CPU models' perfctr support.
- */
-struct op_x86_model_spec {
-	unsigned int	num_counters;
-	unsigned int	num_controls;
-	unsigned int	num_virt_counters;
-	u64		reserved;
-	u16		event_mask;
-	int		(*init)(struct oprofile_operations *ops);
-	int		(*fill_in_addresses)(struct op_msrs * const msrs);
-	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
-				      struct op_msrs const * const msrs);
-	int		(*check_ctrs)(struct pt_regs * const regs,
-				      struct op_msrs const * const msrs);
-	void		(*start)(struct op_msrs const * const msrs);
-	void		(*stop)(struct op_msrs const * const msrs);
-	void		(*shutdown)(struct op_msrs const * const msrs);
-#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-	void		(*switch_ctrl)(struct op_x86_model_spec const *model,
-				       struct op_msrs const * const msrs);
-#endif
-};
-
-struct op_counter_config;
-
-static inline void op_x86_warn_in_use(int counter)
-{
-	/*
-	 * The warning indicates an already running counter. If
-	 * oprofile doesn't collect data, then try using a different
-	 * performance counter on your platform to monitor the desired
-	 * event. Delete counter #%d from the desired event by editing
-	 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
-	 * cannot be monitored by any other counter, contact your
-	 * hardware or BIOS vendor.
-	 */
-	pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
-		counter, smp_processor_id());
-}
-
-static inline void op_x86_warn_reserved(int counter)
-{
-	pr_warn("oprofile: counter #%d is already reserved\n", counter);
-}
-
-extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
-			   struct op_counter_config *counter_config);
-extern int op_x86_phys_to_virt(int phys);
-extern int op_x86_virt_to_phys(int virt);
-
-extern struct op_x86_model_spec op_ppro_spec;
-extern struct op_x86_model_spec op_p4_spec;
-extern struct op_x86_model_spec op_p4_ht2_spec;
-extern struct op_x86_model_spec op_amd_spec;
-extern struct op_x86_model_spec op_arch_perfmon_spec;
-
-#endif /* OP_X86_MODEL_H */
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index d0e835470d01..b2f90a1a89f1 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -4,7 +4,6 @@ obj-y	+= atom/
 obj-y	+= ce4100/
 obj-y	+= efi/
 obj-y	+= geode/
-obj-y	+= goldfish/
 obj-y	+= iris/
 obj-y	+= intel/
 obj-y	+= intel-mid/
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 8efd003540ca..1b82d77019b1 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -54,10 +54,7 @@
  * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
  */
 static u64 efi_va = EFI_VA_START;
-
-struct efi_scratch efi_scratch;
-
-EXPORT_SYMBOL_GPL(efi_mm);
+static struct mm_struct *efi_prev_mm;
 
 /*
  * We need our own copy of the higher levels of the page tables
@@ -237,7 +234,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 		return 1;
 	}
 
-	efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */
+	efi_mixed_mode_stack_pa = page_to_phys(page + 1); /* stack grows down */
 
 	npages = (_etext - _text) >> PAGE_SHIFT;
 	text = __pa(_text);
@@ -462,11 +459,17 @@ void __init efi_dump_pagetable(void)
  * can not change under us.
  * It should be ensured that there are no concurent calls to this function.
  */
-void efi_switch_mm(struct mm_struct *mm)
+void efi_enter_mm(void)
+{
+	efi_prev_mm = current->active_mm;
+	current->active_mm = &efi_mm;
+	switch_mm(efi_prev_mm, &efi_mm, NULL);
+}
+
+void efi_leave_mm(void)
 {
-	efi_scratch.prev_mm = current->active_mm;
-	current->active_mm = mm;
-	switch_mm(efi_scratch.prev_mm, mm, NULL);
+	current->active_mm = efi_prev_mm;
+	switch_mm(&efi_mm, efi_prev_mm, NULL);
 }
 
 static DEFINE_SPINLOCK(efi_runtime_lock);
@@ -530,12 +533,12 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
 	efi_sync_low_kernel_mappings();
 	local_irq_save(flags);
 
-	efi_switch_mm(&efi_mm);
+	efi_enter_mm();
 
 	status = __efi_thunk(set_virtual_address_map, memory_map_size,
 			     descriptor_size, descriptor_version, virtual_map);
 
-	efi_switch_mm(efi_scratch.prev_mm);
+	efi_leave_mm();
 	local_irq_restore(flags);
 
 	return status;
@@ -829,9 +832,9 @@ efi_set_virtual_address_map(unsigned long memory_map_size,
 							 descriptor_size,
 							 descriptor_version,
 							 virtual_map);
-	efi_switch_mm(&efi_mm);
+	efi_enter_mm();
 
-	kernel_fpu_begin();
+	efi_fpu_begin();
 
 	/* Disable interrupts around EFI calls: */
 	local_irq_save(flags);
@@ -840,12 +843,12 @@ efi_set_virtual_address_map(unsigned long memory_map_size,
 			  descriptor_version, virtual_map);
 	local_irq_restore(flags);
 
-	kernel_fpu_end();
+	efi_fpu_end();
 
 	/* grab the virtually remapped EFI runtime services table pointer */
 	efi.runtime = READ_ONCE(systab->runtime);
 
-	efi_switch_mm(efi_scratch.prev_mm);
+	efi_leave_mm();
 
 	return status;
 }
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 26f0da238c1c..fd3dd1708eba 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -33,7 +33,7 @@ SYM_CODE_START(__efi64_thunk)
 	 * Switch to 1:1 mapped 32-bit stack pointer.
 	 */
 	movq	%rsp, %rax
-	movq	efi_scratch(%rip), %rsp
+	movq	efi_mixed_mode_stack_pa(%rip), %rsp
 	push	%rax
 
 	/*
@@ -70,3 +70,7 @@ SYM_CODE_START(__efi64_thunk)
 	pushl	%ebp
 	lret
 SYM_CODE_END(__efi64_thunk)
+
+	.bss
+	.balign 8
+SYM_DATA(efi_mixed_mode_stack_pa, .quad 0)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 5a40fe411ebd..67d93a243c35 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -687,15 +687,25 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
  * @return: Returns, if the page fault is not handled. This function
  * will never return if the page fault is handled successfully.
  */
-void efi_recover_from_page_fault(unsigned long phys_addr)
+void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
 {
 	if (!IS_ENABLED(CONFIG_X86_64))
 		return;
 
 	/*
+	 * If we get an interrupt/NMI while processing an EFI runtime service
+	 * then this is a regular OOPS, not an EFI failure.
+	 */
+	if (in_interrupt())
+		return;
+
+	/*
 	 * Make sure that an efi runtime service caused the page fault.
+	 * READ_ONCE() because we might be OOPSing in a different thread,
+	 * and we don't want to trip KTSAN while trying to OOPS.
 	 */
-	if (efi_rts_work.efi_rts_id == EFI_NONE)
+	if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE ||
+	    current_work() != &efi_rts_work.work)
 		return;
 
 	/*
@@ -747,6 +757,4 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
 		set_current_state(TASK_IDLE);
 		schedule();
 	}
-
-	return;
 }
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index c33f744b5388..b39bf3b5e108 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -22,6 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
 #include <linux/dmi.h>
 
 #include <asm/geode.h>
@@ -69,21 +70,15 @@ static struct platform_device alix_buttons_dev = {
 static struct gpio_led alix_leds[] = {
 	{
 		.name = "alix:1",
-		.gpio = 6,
 		.default_trigger = "default-on",
-		.active_low = 1,
 	},
 	{
 		.name = "alix:2",
-		.gpio = 25,
 		.default_trigger = "default-off",
-		.active_low = 1,
 	},
 	{
 		.name = "alix:3",
-		.gpio = 27,
 		.default_trigger = "default-off",
-		.active_low = 1,
 	},
 };
 
@@ -92,6 +87,17 @@ static struct gpio_led_platform_data alix_leds_data = {
 	.leds = alix_leds,
 };
 
+static struct gpiod_lookup_table alix_leds_gpio_table = {
+	.dev_id = "leds-gpio",
+	.table = {
+		/* The Geode GPIOs should be on the CS5535 companion chip */
+		GPIO_LOOKUP_IDX("cs5535-gpio", 6, NULL, 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("cs5535-gpio", 25, NULL, 1, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("cs5535-gpio", 27, NULL, 2, GPIO_ACTIVE_LOW),
+		{ }
+	},
+};
+
 static struct platform_device alix_leds_dev = {
 	.name = "leds-gpio",
 	.id = -1,
@@ -106,6 +112,7 @@ static struct platform_device *alix_devs[] __initdata = {
 static void __init register_alix(void)
 {
 	/* Setup LED control through leds-gpio driver */
+	gpiod_add_lookup_table(&alix_leds_gpio_table);
 	platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
 }
 
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index 73a3f49b4eb6..d263528c90bb 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
 #include <linux/dmi.h>
 
 #include <asm/geode.h>
@@ -53,21 +54,15 @@ static struct platform_device geos_buttons_dev = {
 static struct gpio_led geos_leds[] = {
 	{
 		.name = "geos:1",
-		.gpio = 6,
 		.default_trigger = "default-on",
-		.active_low = 1,
 	},
 	{
 		.name = "geos:2",
-		.gpio = 25,
 		.default_trigger = "default-off",
-		.active_low = 1,
 	},
 	{
 		.name = "geos:3",
-		.gpio = 27,
 		.default_trigger = "default-off",
-		.active_low = 1,
 	},
 };
 
@@ -76,6 +71,17 @@ static struct gpio_led_platform_data geos_leds_data = {
 	.leds = geos_leds,
 };
 
+static struct gpiod_lookup_table geos_leds_gpio_table = {
+	.dev_id = "leds-gpio",
+	.table = {
+		/* The Geode GPIOs should be on the CS5535 companion chip */
+		GPIO_LOOKUP_IDX("cs5535-gpio", 6, NULL, 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("cs5535-gpio", 25, NULL, 1, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("cs5535-gpio", 27, NULL, 2, GPIO_ACTIVE_LOW),
+		{ }
+	},
+};
+
 static struct platform_device geos_leds_dev = {
 	.name = "leds-gpio",
 	.id = -1,
@@ -90,6 +96,7 @@ static struct platform_device *geos_devs[] __initdata = {
 static void __init register_geos(void)
 {
 	/* Setup LED control through leds-gpio driver */
+	gpiod_add_lookup_table(&geos_leds_gpio_table);
 	platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs));
 }
 
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 163e1b545517..558384acd777 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
+#include <linux/gpio/machine.h>
 
 #include <asm/geode.h>
 
@@ -55,9 +56,7 @@ static struct platform_device net5501_buttons_dev = {
 static struct gpio_led net5501_leds[] = {
 	{
 		.name = "net5501:1",
-		.gpio = 6,
 		.default_trigger = "default-on",
-		.active_low = 0,
 	},
 };
 
@@ -66,6 +65,15 @@ static struct gpio_led_platform_data net5501_leds_data = {
 	.leds = net5501_leds,
 };
 
+static struct gpiod_lookup_table net5501_leds_gpio_table = {
+	.dev_id = "leds-gpio",
+	.table = {
+		/* The Geode GPIOs should be on the CS5535 companion chip */
+		GPIO_LOOKUP_IDX("cs5535-gpio", 6, NULL, 0, GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
 static struct platform_device net5501_leds_dev = {
 	.name = "leds-gpio",
 	.id = -1,
@@ -80,6 +88,7 @@ static struct platform_device *net5501_devs[] __initdata = {
 static void __init register_net5501(void)
 {
 	/* Setup LED control through leds-gpio driver */
+	gpiod_add_lookup_table(&net5501_leds_gpio_table);
 	platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
 }
 
diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile
deleted file mode 100644
index 072c395379ac..000000000000
--- a/arch/x86/platform/goldfish/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_GOLDFISH)	+= goldfish.o
diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c
deleted file mode 100644
index 6b6f8b4360dd..000000000000
--- a/arch/x86/platform/goldfish/goldfish.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2007 Google, Inc.
- * Copyright (C) 2011 Intel, Inc.
- * Copyright (C) 2013 Intel, Inc.
- */
-
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/platform_device.h>
-
-/*
- * Where in virtual device memory the IO devices (timers, system controllers
- * and so on)
- */
-
-#define GOLDFISH_PDEV_BUS_BASE	(0xff001000)
-#define GOLDFISH_PDEV_BUS_END	(0xff7fffff)
-#define GOLDFISH_PDEV_BUS_IRQ	(4)
-
-#define GOLDFISH_TTY_BASE	(0x2000)
-
-static struct resource goldfish_pdev_bus_resources[] = {
-	{
-		.start  = GOLDFISH_PDEV_BUS_BASE,
-		.end    = GOLDFISH_PDEV_BUS_END,
-		.flags  = IORESOURCE_MEM,
-	},
-	{
-		.start	= GOLDFISH_PDEV_BUS_IRQ,
-		.end	= GOLDFISH_PDEV_BUS_IRQ,
-		.flags	= IORESOURCE_IRQ,
-	}
-};
-
-static bool goldfish_enable __initdata;
-
-static int __init goldfish_setup(char *str)
-{
-	goldfish_enable = true;
-	return 0;
-}
-__setup("goldfish", goldfish_setup);
-
-static int __init goldfish_init(void)
-{
-	if (!goldfish_enable)
-		return -ENODEV;
-
-	platform_device_register_simple("goldfish_pdev_bus", -1,
-					goldfish_pdev_bus_resources, 2);
-	return 0;
-}
-device_initcall(goldfish_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
index 31dda18bb370..2930b6e9473e 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
@@ -88,8 +88,8 @@ static int __init bt_sfi_init(void)
 	memset(&info, 0, sizeof(info));
 	info.fwnode	= ddata->dev->fwnode;
 	info.parent	= ddata->dev;
-	info.name	= ddata->name,
-	info.id		= PLATFORM_DEVID_NONE,
+	info.name	= ddata->name;
+	info.id		= PLATFORM_DEVID_NONE;
 
 	pdev = platform_device_register_full(&info);
 	if (IS_ERR(pdev))
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index ce7188cbdae5..1c3a1962cade 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -867,9 +867,11 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
 	case R_386_PC32:
 	case R_386_PC16:
 	case R_386_PC8:
+	case R_386_PLT32:
 		/*
-		 * NONE can be ignored and PC relative relocations don't
-		 * need to be adjusted.
+		 * NONE can be ignored and PC relative relocations don't need
+		 * to be adjusted. Because sym must be defined, R_386_PLT32 can
+		 * be treated the same way as R_386_PC32.
 		 */
 		break;
 
@@ -910,9 +912,11 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
 	case R_386_PC32:
 	case R_386_PC16:
 	case R_386_PC8:
+	case R_386_PLT32:
 		/*
-		 * NONE can be ignored and PC relative relocations don't
-		 * need to be adjusted.
+		 * NONE can be ignored and PC relative relocations don't need
+		 * to be adjusted. Because sym must be defined, R_386_PLT32 can
+		 * be treated the same way as R_386_PC32.
 		 */
 		break;
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 9a5a50cdaab5..dc0a337f985b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -567,10 +567,16 @@ void noist_exc_debug(struct pt_regs *regs);
 
 DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
 {
-	/* On Xen PV, NMI doesn't use IST.  The C part is the sane as native. */
+	/* On Xen PV, NMI doesn't use IST.  The C part is the same as native. */
 	exc_nmi(regs);
 }
 
+DEFINE_IDTENTRY_RAW_ERRORCODE(xenpv_exc_double_fault)
+{
+	/* On Xen PV, DF doesn't use IST.  The C part is the same as native. */
+	exc_double_fault(regs, error_code);
+}
+
 DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
 {
 	/*
@@ -590,6 +596,20 @@ DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
 	BUG();
 }
 
+#ifdef CONFIG_X86_MCE
+DEFINE_IDTENTRY_RAW(xenpv_exc_machine_check)
+{
+	/*
+	 * There's no IST on Xen PV, but we still need to dispatch
+	 * to the correct handler.
+	 */
+	if (user_mode(regs))
+		noist_exc_machine_check(regs);
+	else
+		exc_machine_check(regs);
+}
+#endif
+
 struct trap_array_entry {
 	void (*orig)(void);
 	void (*xen)(void);
@@ -608,9 +628,9 @@ struct trap_array_entry {
 
 static struct trap_array_entry trap_array[] = {
 	TRAP_ENTRY_REDIR(exc_debug,			true  ),
-	TRAP_ENTRY(exc_double_fault,			true  ),
+	TRAP_ENTRY_REDIR(exc_double_fault,		true  ),
 #ifdef CONFIG_X86_MCE
-	TRAP_ENTRY(exc_machine_check,			true  ),
+	TRAP_ENTRY_REDIR(exc_machine_check,		true  ),
 #endif
 	TRAP_ENTRY_REDIR(exc_nmi,			true  ),
 	TRAP_ENTRY(exc_int3,				false ),
@@ -1015,8 +1035,6 @@ void __init xen_setup_vcpu_info_placement(void)
 	 */
 	if (xen_have_vcpu_info_placement) {
 		pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
-		pv_ops.irq.restore_fl =
-			__PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
 		pv_ops.irq.irq_disable =
 			__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
 		pv_ops.irq.irq_enable =
@@ -1053,7 +1071,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_pmc = xen_read_pmc,
 
 	.iret = xen_iret,
-	.usergs_sysret64 = xen_sysret64,
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
@@ -1078,9 +1095,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 #endif
 	.io_delay = xen_io_delay,
 
-	/* Xen takes care of %gs when switching to usermode for us */
-	.swapgs = paravirt_nop,
-
 	.start_context_switch = paravirt_start_context_switch,
 	.end_context_switch = xen_end_context_switch,
 };
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 850c93f346c7..dfa091d79c2e 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -42,28 +42,6 @@ asmlinkage __visible unsigned long xen_save_fl(void)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
 
-__visible void xen_restore_fl(unsigned long flags)
-{
-	struct vcpu_info *vcpu;
-
-	/* convert from IF type flag */
-	flags = !(flags & X86_EFLAGS_IF);
-
-	/* See xen_irq_enable() for why preemption must be disabled. */
-	preempt_disable();
-	vcpu = this_cpu_read(xen_vcpu);
-	vcpu->evtchn_upcall_mask = flags;
-
-	if (flags == 0) {
-		barrier(); /* unmask then check (avoid races) */
-		if (unlikely(vcpu->evtchn_upcall_pending))
-			xen_force_evtchn_callback();
-		preempt_enable();
-	} else
-		preempt_enable_no_resched();
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
-
 asmlinkage __visible void xen_irq_disable(void)
 {
 	/* There's a one instruction preempt window here.  We need to
@@ -118,7 +96,6 @@ static void xen_halt(void)
 
 static const struct pv_irq_ops xen_irq_ops __initconst = {
 	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
-	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
 	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
 	.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
 
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 53cf8aa35032..02f31341e435 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -72,34 +72,6 @@ SYM_FUNC_START(xen_save_fl_direct)
 	ret
 SYM_FUNC_END(xen_save_fl_direct)
 
-
-/*
- * In principle the caller should be passing us a value return from
- * xen_save_fl_direct, but for robustness sake we test only the
- * X86_EFLAGS_IF flag rather than the whole byte. After setting the
- * interrupt mask state, it checks for unmasked pending events and
- * enters the hypervisor to get them delivered if so.
- */
-SYM_FUNC_START(xen_restore_fl_direct)
-	FRAME_BEGIN
-	testw $X86_EFLAGS_IF, %di
-	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
-	/*
-	 * Preempt here doesn't matter because that will deal with any
-	 * pending interrupts.  The pending check may end up being run
-	 * on the wrong CPU, but that doesn't hurt.
-	 */
-
-	/* check for unmasked and pending */
-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
-	jnz 1f
-	call check_events
-1:
-	FRAME_END
-	ret
-SYM_FUNC_END(xen_restore_fl_direct)
-
-
 /*
  * Force an event check by making a hypercall, but preserve regs
  * before making the call.
@@ -161,7 +133,7 @@ xen_pv_trap asm_exc_overflow
 xen_pv_trap asm_exc_bounds
 xen_pv_trap asm_exc_invalid_op
 xen_pv_trap asm_exc_device_not_available
-xen_pv_trap asm_exc_double_fault
+xen_pv_trap asm_xenpv_exc_double_fault
 xen_pv_trap asm_exc_coproc_segment_overrun
 xen_pv_trap asm_exc_invalid_tss
 xen_pv_trap asm_exc_segment_not_present
@@ -172,7 +144,7 @@ xen_pv_trap asm_exc_spurious_interrupt_bug
 xen_pv_trap asm_exc_coprocessor_error
 xen_pv_trap asm_exc_alignment_check
 #ifdef CONFIG_X86_MCE
-xen_pv_trap asm_exc_machine_check
+xen_pv_trap asm_xenpv_exc_machine_check
 #endif /* CONFIG_X86_MCE */
 xen_pv_trap asm_exc_simd_coprocessor_error
 #ifdef CONFIG_IA32_EMULATION
@@ -215,26 +187,6 @@ SYM_CODE_START(xen_iret)
 	jmp hypercall_iret
 SYM_CODE_END(xen_iret)
 
-SYM_CODE_START(xen_sysret64)
-	/*
-	 * We're already on the usermode stack at this point, but
-	 * still with the kernel gs, so we can easily switch back.
-	 *
-	 * tss.sp2 is scratch space.
-	 */
-	movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
-	movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
-
-	pushq $__USER_DS
-	pushq PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
-	pushq %r11
-	pushq $__USER_CS
-	pushq %rcx
-
-	pushq $VGCF_in_syscall
-	jmp hypercall_iret
-SYM_CODE_END(xen_sysret64)
-
 /*
  * Xen handles syscall callbacks much like ordinary exceptions, which
  * means we have:
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9546c3384c75..8d7ec49a35fb 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -131,15 +131,12 @@ static inline void __init xen_efi_init(struct boot_params *boot_params)
 __visible void xen_irq_enable_direct(void);
 __visible void xen_irq_disable_direct(void);
 __visible unsigned long xen_save_fl_direct(void);
-__visible void xen_restore_fl_direct(unsigned long);
 
 __visible unsigned long xen_read_cr2(void);
 __visible unsigned long xen_read_cr2_direct(void);
 
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
-__visible void xen_sysret32(void);
-__visible void xen_sysret64(void);
 
 extern int xen_panic_handler_init(void);