x86/hyperv: fix root partition faults when writing to VP assist page MSR

For root partition the VP assist pages are pre-determined by the hypervisor. The root kernel is not allowed to change them to different locations. And thus, we are getting below stack as in current implementation root is trying to perform write to specific MSR. [ 2.778197] unchecked MSR access error: WRMSR to 0x40000073 (tried to write 0x0000000145ac5001) at rIP: 0xffffffff810c1084 (native_write_msr+0x4/0x30) [ 2.784867] Call Trace: [ 2.791507] hv_cpu_init+0xf1/0x1c0 [ 2.798144] ? hyperv_report_panic+0xd0/0xd0 [ 2.804806] cpuhp_invoke_callback+0x11a/0x440 [ 2.811465] ? hv_resume+0x90/0x90 [ 2.818137] cpuhp_issue_call+0x126/0x130 [ 2.824782] __cpuhp_setup_state_cpuslocked+0x102/0x2b0 [ 2.831427] ? hyperv_report_panic+0xd0/0xd0 [ 2.838075] ? hyperv_report_panic+0xd0/0xd0 [ 2.844723] ? hv_resume+0x90/0x90 [ 2.851375] __cpuhp_setup_state+0x3d/0x90 [ 2.858030] hyperv_init+0x14e/0x410 [ 2.864689] ? enable_IR_x2apic+0x190/0x1a0 [ 2.871349] apic_intr_mode_init+0x8b/0x100 [ 2.878017] x86_late_time_init+0x20/0x30 [ 2.884675] start_kernel+0x459/0x4fb [ 2.891329] secondary_startup_64_no_verify+0xb0/0xbb Since the hypervisor already provides the VP assist pages for root partition, we need to memremap the memory from hypervisor for root kernel to use. The mapping is done in hv_cpu_init during bringup and is unmapped in hv_cpu_die during teardown. Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com> Reviewed-by: Sunil Muthuswamy <sunilmut@microsoft.com> Link: https://lore.kernel.org/r/20210731120519.17154-1-kumarpraveen@linux.microsoft.com Signed-off-by: Wei Liu <wei.liu@kernel.org>
author: Praveen Kumar <kumarpraveen@linux.microsoft.com> 2021-07-31 14:05:19 +0200
committer: Wei Liu <wei.liu@kernel.org> 2021-08-04 13:56:53 +0200
commit: e5d9b714fe40270222a7de9dcd1cf62dad63eeef (patch)
tree: 5a75292240f981d4494dd05a092c1a8217e80a9d /arch
parent: hv: hyperv.h: Remove unused inline functions (diff)
download: linux-e5d9b714fe40270222a7de9dcd1cf62dad63eeef.tar.xz
linux-e5d9b714fe40270222a7de9dcd1cf62dad63eeef.zip
2 files changed, 53 insertions, 20 deletions
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6f247e7e07eb..708a2712a516 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -44,6 +44,7 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
 
 static int hv_cpu_init(unsigned int cpu)
 {
+	union hv_vp_assist_msr_contents msr = { 0 };
 	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
 	int ret;
 
@@ -54,25 +55,34 @@ static int hv_cpu_init(unsigned int cpu)
 	if (!hv_vp_assist_page)
 		return 0;
 
-	/*
-	 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
-	 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
-	 * we always write the EOI MSR in hv_apic_eoi_write() *after* the
-	 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
-	 * not be stopped in the case of CPU offlining and the VM will hang.
-	 */
 	if (!*hvp) {
-		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
-	}
-
-	if (*hvp) {
-		u64 val;
-
-		val = vmalloc_to_pfn(*hvp);
-		val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
-			HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
+		if (hv_root_partition) {
+			/*
+			 * For root partition we get the hypervisor provided VP assist
+			 * page, instead of allocating a new page.
+			 */
+			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+			*hvp = memremap(msr.pfn <<
+					HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT,
+					PAGE_SIZE, MEMREMAP_WB);
+		} else {
+			/*
+			 * The VP assist page is an "overlay" page (see Hyper-V TLFS's
+			 * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed
+			 * out to make sure we always write the EOI MSR in
+			 * hv_apic_eoi_write() *after* the EOI optimization is disabled
+			 * in hv_cpu_die(), otherwise a CPU may not be stopped in the
+			 * case of CPU offlining and the VM will hang.
+			 */
+			*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO);
+			if (*hvp)
+				msr.pfn = vmalloc_to_pfn(*hvp);
+		}
+		WARN_ON(!(*hvp));
+		if (*hvp) {
+			msr.enable = 1;
+			wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+		}
 	}
 
 	return 0;
@@ -170,8 +180,22 @@ static int hv_cpu_die(unsigned int cpu)
 
 	hv_common_cpu_die(cpu);
 
-	if (hv_vp_assist_page && hv_vp_assist_page[cpu])
-		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
+	if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
+		union hv_vp_assist_msr_contents msr = { 0 };
+		if (hv_root_partition) {
+			/*
+			 * For root partition the VP assist page is mapped to
+			 * hypervisor provided page, and thus we unmap the
+			 * page here and nullify it, so that in future we have
+			 * correct page address mapped in hv_cpu_init.
+			 */
+			memunmap(hv_vp_assist_page[cpu]);
+			hv_vp_assist_page[cpu] = NULL;
+			rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+			msr.enable = 0;
+		}
+		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64);
+	}
 
 	if (hv_reenlightenment_cb == NULL)
 		return 0;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index f1366ce609e3..2322d6bd5883 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -288,6 +288,15 @@ union hv_x64_msr_hypercall_contents {
 	} __packed;
 };
 
+union hv_vp_assist_msr_contents {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 reserved:11;
+		u64 pfn:52;
+	} __packed;
+};
+
 struct hv_reenlightenment_control {
 	__u64 vector:8;
 	__u64 reserved1:8;
author	Praveen Kumar <kumarpraveen@linux.microsoft.com>	2021-07-31 14:05:19 +0200
committer	Wei Liu <wei.liu@kernel.org>	2021-08-04 13:56:53 +0200
commit	e5d9b714fe40270222a7de9dcd1cf62dad63eeef (patch)
tree	5a75292240f981d4494dd05a092c1a8217e80a9d /arch
parent	hv: hyperv.h: Remove unused inline functions (diff)
download	linux-e5d9b714fe40270222a7de9dcd1cf62dad63eeef.tar.xz linux-e5d9b714fe40270222a7de9dcd1cf62dad63eeef.zip