From 193061ea0a50c13f72b907e6fa7befa6e15a4302 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 18 Aug 2023 06:29:14 -0400 Subject: drivers: hv: Mark percpu hvcall input arg page unencrypted in SEV-SNP enlightened guest Hypervisor needs to access input arg, VMBus synic event and message pages. Mark these pages unencrypted in the SEV-SNP guest and free them only if they have been marked encrypted successfully. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230818102919.1318039-5-ltykernel@gmail.com --- drivers/hv/hv.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) (limited to 'drivers/hv/hv.c') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index de6708dbe0df..ec6e35a0d9bf 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -78,7 +79,7 @@ int hv_post_message(union hv_connection_id connection_id, int hv_synic_alloc(void) { - int cpu; + int cpu, ret = -ENOMEM; struct hv_per_cpu_context *hv_cpu; /* @@ -123,26 +124,76 @@ int hv_synic_alloc(void) goto err; } } + + if (hv_isolation_type_en_snp()) { + ret = set_memory_decrypted((unsigned long) + hv_cpu->synic_message_page, 1); + if (ret) { + pr_err("Failed to decrypt SYNIC msg page: %d\n", ret); + hv_cpu->synic_message_page = NULL; + + /* + * Free the event page here so that hv_synic_free() + * won't later try to re-encrypt it. + */ + free_page((unsigned long)hv_cpu->synic_event_page); + hv_cpu->synic_event_page = NULL; + goto err; + } + + ret = set_memory_decrypted((unsigned long) + hv_cpu->synic_event_page, 1); + if (ret) { + pr_err("Failed to decrypt SYNIC event page: %d\n", ret); + hv_cpu->synic_event_page = NULL; + goto err; + } + + memset(hv_cpu->synic_message_page, 0, PAGE_SIZE); + memset(hv_cpu->synic_event_page, 0, PAGE_SIZE); + } } return 0; + err: /* * Any memory allocations that succeeded will be freed when * the caller cleans up by calling hv_synic_free() */ - return -ENOMEM; + return ret; } void hv_synic_free(void) { - int cpu; + int cpu, ret; for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + /* It's better to leak the page if the encryption fails. */ + if (hv_isolation_type_en_snp()) { + if (hv_cpu->synic_message_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->synic_message_page, 1); + if (ret) { + pr_err("Failed to encrypt SYNIC msg page: %d\n", ret); + hv_cpu->synic_message_page = NULL; + } + } + + if (hv_cpu->synic_event_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->synic_event_page, 1); + if (ret) { + pr_err("Failed to encrypt SYNIC event page: %d\n", ret); + hv_cpu->synic_event_page = NULL; + } + } + } + free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); } -- cgit v1.2.3 From 68f2f2bc163d4427b04f0fb6421f091f948175fe Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:05 -0700 Subject: Drivers: hv: vmbus: Support fully enlightened TDX guests Add Hyper-V specific code so that a fully enlightened TDX guest (i.e. without the paravisor) can run on Hyper-V: Don't use hv_vp_assist_page. Use GHCI instead. Don't try to use the unsupported HV_REGISTER_CRASH_CTL. Don't trust (use) Hyper-V's TLB-flushing hypercalls. Don't use lazy EOI. Share the SynIC Event/Message pages with the hypervisor. Don't use the Hyper-V TSC page for now, because non-trivial work is required to share the page with the hypervisor. Reviewed-by: Michael Kelley Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-4-decui@microsoft.com --- arch/x86/hyperv/hv_apic.c | 15 ++++++++++++--- arch/x86/hyperv/hv_init.c | 19 +++++++++++++++---- arch/x86/kernel/cpu/mshyperv.c | 14 ++++++++++++++ drivers/hv/hv.c | 9 +++++++-- 4 files changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers/hv/hv.c') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 1fbda2f94184..cb7429046d18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -177,8 +177,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; - if (!hv_hypercall_pg) - return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; @@ -231,9 +234,15 @@ static bool __send_ipi_one(int cpu, int vector) trace_hyperv_send_ipi_one(cpu, vector); - if (!hv_hypercall_pg || (vp == VP_INVAL)) + if (vp == VP_INVAL) return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 255e02ec467e..c1c1b4e1502f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -80,7 +80,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + struct hv_vp_assist_page **hvp; int ret; ret = hv_common_cpu_init(cpu); @@ -90,6 +90,7 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; + hvp = &hv_vp_assist_page[cpu]; if (hv_root_partition) { /* * For root partition we get the hypervisor provided VP assist @@ -442,11 +443,21 @@ void __init hyperv_init(void) if (hv_common_init()) return; - hv_vp_assist_page = kcalloc(num_possible_cpus(), - sizeof(*hv_vp_assist_page), GFP_KERNEL); + /* + * The VP assist page is useless to a TDX guest: the only use we + * would have for it is lazy EOI, which can not be used with TDX. + */ + if (hv_isolation_type_tdx()) + hv_vp_assist_page = NULL; + else + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), + GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto common_free; + + if (!hv_isolation_type_tdx()) + goto common_free; } if (hv_isolation_type_snp()) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 63093870ec33..ff3d9c5de19c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -420,6 +420,20 @@ static void __init ms_hyperv_init_platform(void) static_branch_enable(&isolation_type_en_snp); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); + + /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; + + if (!ms_hyperv.paravisor_present) { + /* To be supported: more work is required. */ + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + + /* HV_REGISTER_CRASH_CTL is unsupported. */ + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + + /* Don't trust Hyper-V's TLB-flushing hypercalls. */ + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + } } } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ec6e35a0d9bf..d1064118a72f 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -121,11 +121,15 @@ int hv_synic_alloc(void) (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_event_page == NULL) { pr_err("Unable to allocate SYNIC event page\n"); + + free_page((unsigned long)hv_cpu->synic_message_page); + hv_cpu->synic_message_page = NULL; goto err; } } - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long) hv_cpu->synic_message_page, 1); if (ret) { @@ -174,7 +178,8 @@ void hv_synic_free(void) = per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { ret = set_memory_encrypted((unsigned long) hv_cpu->synic_message_page, 1); -- cgit v1.2.3 From d3a9d7e49d15316f68f4347f48adcd1665834980 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:08 -0700 Subject: x86/hyperv: Introduce a global variable hyperv_paravisor_present The new variable hyperv_paravisor_present is set only when the VM is a SNP/TDX VM with the paravisor running: see ms_hyperv_init_platform(). We introduce hyperv_paravisor_present because we can not use ms_hyperv.paravisor_present in arch/x86/include/asm/mshyperv.h: struct ms_hyperv_info is defined in include/asm-generic/mshyperv.h, which is included at the end of arch/x86/include/asm/mshyperv.h, but at the beginning of arch/x86/include/asm/mshyperv.h, we would already need to use struct ms_hyperv_info in hv_do_hypercall(). We use hyperv_paravisor_present only in include/asm-generic/mshyperv.h, and use ms_hyperv.paravisor_present elsewhere. In the future, we'll introduce a hypercall function structure for different VM types, and at boot time, the right function pointers would be written into the structure so that runtime testing of TDX vs. SNP vs. normal will be avoided and hyperv_paravisor_present will no longer be needed. Call hv_vtom_init() when it's a VBS VM or when ms_hyperv.paravisor_present is true, i.e. the VM is a SNP VM or TDX VM with the paravisor. Enhance hv_vtom_init() for a TDX VM with the paravisor. In hv_common_cpu_init(), don't decrypt the hyperv_pcpu_input_arg for a TDX VM with the paravisor, just like we don't decrypt the page for a SNP VM with the paravisor. Signed-off-by: Dexuan Cui Reviewed-by: Tianyu Lan Reviewed-by: Michael Kelley Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-7-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 4 ++-- arch/x86/hyperv/ivm.c | 38 +++++++++++++++++++++++++++++++++++--- arch/x86/include/asm/mshyperv.h | 15 ++++++++++----- arch/x86/kernel/cpu/mshyperv.c | 9 +++++++-- drivers/hv/connection.c | 15 +++++++++++---- drivers/hv/hv.c | 10 +++++----- drivers/hv/hv_common.c | 3 ++- 7 files changed, 72 insertions(+), 22 deletions(-) (limited to 'drivers/hv/hv.c') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index c1c1b4e1502f..eca5c4b7e3b5 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -658,8 +658,8 @@ bool hv_is_hyperv_initialized(void) if (x86_hyper_type != X86_HYPER_MS_HYPERV) return false; - /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ - if (hv_isolation_type_tdx()) + /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) return true; /* * Verify that earlier initialization succeeded by checking diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 6c7598d9e68a..7bd0359d5e38 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -248,6 +248,9 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) } EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * @@ -368,6 +371,10 @@ static bool hv_is_private_mmio(u64 addr) return false; } +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ + +#ifdef CONFIG_AMD_MEM_ENCRYPT + #define hv_populate_vmcb_seg(seg, gdtr_base) \ do { \ if (seg.selector) { \ @@ -495,15 +502,40 @@ int hv_snp_boot_ap(int cpu, unsigned long start_ip) return ret; } +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) + void __init hv_vtom_init(void) { + enum hv_isolation_type type = hv_get_isolation_type(); + + switch (type) { + case HV_ISOLATION_TYPE_VBS: + fallthrough; /* * By design, a VM using vTOM doesn't see the SEV setting, * so SEV initialization is bypassed and sev_status isn't set. * Set it here to indicate a vTOM VM. + * + * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is + * defined as 0ULL, to which we can't assigned a value. */ - sev_status = MSR_AMD64_SNP_VTOM; - cc_vendor = CC_VENDOR_AMD; +#ifdef CONFIG_AMD_MEM_ENCRYPT + case HV_ISOLATION_TYPE_SNP: + sev_status = MSR_AMD64_SNP_VTOM; + cc_vendor = CC_VENDOR_AMD; + break; +#endif + + case HV_ISOLATION_TYPE_TDX: + cc_vendor = CC_VENDOR_INTEL; + break; + + default: + panic("hv_vtom_init: unsupported isolation type %d\n", type); + } + cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -516,7 +548,7 @@ void __init hv_vtom_init(void) mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } -#endif /* CONFIG_AMD_MEM_ENCRYPT */ +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ enum hv_isolation_type hv_get_isolation_type(void) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6a9e00c4730b..a9f453c39371 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -42,6 +42,7 @@ static inline unsigned char hv_get_nmi_reason(void) #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; +extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; @@ -75,7 +76,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input_address, output_address); if (hv_isolation_type_en_snp()) { @@ -131,7 +132,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, 0); if (hv_isolation_type_en_snp()) { @@ -185,7 +186,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, input2); if (hv_isolation_type_en_snp()) { @@ -278,19 +279,23 @@ void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -void hv_vtom_init(void); int hv_snp_boot_ap(int cpu, unsigned long start_ip); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline void hv_vtom_init(void) {} static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif extern bool hv_isolation_type_snp(void); +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_vtom_init(void); +#else +static inline void hv_vtom_init(void) {} +#endif + static inline bool hv_is_synic_reg(unsigned int reg) { return (reg >= HV_REGISTER_SCONTROL) && diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index fe5393d759d3..4c5a174935ca 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -40,6 +40,10 @@ bool hv_root_partition; bool hv_nested; struct ms_hyperv_info ms_hyperv; +/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ +bool hyperv_paravisor_present __ro_after_init; +EXPORT_SYMBOL_GPL(hyperv_paravisor_present); + #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_reg(unsigned int reg) { @@ -429,6 +433,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + hyperv_paravisor_present = !!ms_hyperv.paravisor_present; + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); @@ -526,8 +532,7 @@ static void __init ms_hyperv_init_platform(void) #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || - ((hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) && - ms_hyperv.paravisor_present)) + ms_hyperv.paravisor_present) hv_vtom_init(); /* * Setup the hook to get control post apic initialization. diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 02b54f85dc60..2dd972ca85dd 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -484,10 +484,17 @@ void vmbus_set_event(struct vmbus_channel *channel) ++channel->sig_events; - if (hv_isolation_type_snp()) - hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, - NULL, sizeof(channel->sig_event)); - else + if (ms_hyperv.paravisor_present) { + if (hv_isolation_type_snp()) + hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, + NULL, sizeof(channel->sig_event)); + else if (hv_isolation_type_tdx()) + hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, + channel->sig_event, 0); + else + WARN_ON_ONCE(1); + } else { hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); + } } EXPORT_SYMBOL_GPL(vmbus_set_event); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index d1064118a72f..48b1623112f0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -109,7 +109,7 @@ int hv_synic_alloc(void) * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. */ - if (!hv_isolation_type_snp() && !hv_root_partition) { + if (!ms_hyperv.paravisor_present && !hv_root_partition) { hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_message_page == NULL) { @@ -226,7 +226,7 @@ void hv_synic_enable_regs(unsigned int cpu) simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); simp.simp_enabled = 1; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -245,7 +245,7 @@ void hv_synic_enable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 1; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -328,7 +328,7 @@ void hv_synic_disable_regs(unsigned int cpu) * addresses. */ simp.simp_enabled = 0; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { iounmap(hv_cpu->synic_message_page); hv_cpu->synic_message_page = NULL; } else { @@ -340,7 +340,7 @@ void hv_synic_disable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 0; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { iounmap(hv_cpu->synic_event_page); hv_cpu->synic_event_page = NULL; } else { diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4c858e1636da..e62d64753902 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -382,7 +382,8 @@ int hv_common_cpu_init(unsigned int cpu) *outputarg = (char *)mem + HV_HYP_PAGE_SIZE; } - if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long)mem, pgcount); if (ret) { /* It may be unsafe to free 'mem' */ -- cgit v1.2.3 From 23378295042a4bcaeec350733a4771678e7a1f3a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:09 -0700 Subject: Drivers: hv: vmbus: Bring the post_msg_page back for TDX VMs with the paravisor The post_msg_page was removed in commit 9a6b1a170ca8 ("Drivers: hv: vmbus: Remove the per-CPU post_msg_page") However, it turns out that we need to bring it back, but only for a TDX VM with the paravisor: in such a VM, the hyperv_pcpu_input_arg is not decrypted, but the HVCALL_POST_MESSAGE in such a VM needs a decrypted page as the hypercall input page: see the comments in hyperv_init() for a detailed explanation. Except for HVCALL_POST_MESSAGE and HVCALL_SIGNAL_EVENT, the other hypercalls in a TDX VM with the paravisor still use hv_hypercall_pg and must use the hyperv_pcpu_input_arg (which is encrypted in such a VM), when a hypercall input page is used. Signed-off-by: Dexuan Cui Reviewed-by: Tianyu Lan Reviewed-by: Michael Kelley Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-8-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 20 ++++++++++++++-- drivers/hv/hv.c | 59 ++++++++++++++++++++++++++++++++++++++++++----- drivers/hv/hyperv_vmbus.h | 11 +++++++++ 3 files changed, 82 insertions(+), 8 deletions(-) (limited to 'drivers/hv/hv.c') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index eca5c4b7e3b5..3729eee21e47 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -480,6 +480,22 @@ void __init hyperv_init(void) * Setup the hypercall page and enable hypercalls. * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page + * + * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: + * when the hypercall input is a page, such a VM must pass a decrypted + * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page + * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. + * + * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, + * which are handled by the paravisor and the VM must use an encrypted + * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and + * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and + * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: + * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). + * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. + * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; + * instead, hv_post_message() uses the post_msg_page, which is decrypted + * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); @@ -487,8 +503,8 @@ void __init hyperv_init(void) /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); - /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ - if (hv_isolation_type_tdx()) + /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) goto skip_hypercall_pg_init; hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 48b1623112f0..523c5d99f375 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -57,20 +57,37 @@ int hv_post_message(union hv_connection_id connection_id, local_irq_save(flags); - aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* + * A TDX VM with the paravisor must use the decrypted post_msg_page: see + * the comment in struct hv_per_cpu_context. A SNP VM with the paravisor + * can use the encrypted hyperv_pcpu_input_arg because it copies the + * input into the GHCB page, which has been decrypted by the paravisor. + */ + if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present) + aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page; + else + aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + aligned_msg->connectionid = connection_id; aligned_msg->reserved = 0; aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void *)aligned_msg->payload, payload, payload_size); - if (hv_isolation_type_snp()) - status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, - (void *)aligned_msg, NULL, - sizeof(*aligned_msg)); - else + if (ms_hyperv.paravisor_present) { + if (hv_isolation_type_tdx()) + status = hv_tdx_hypercall(HVCALL_POST_MESSAGE, + virt_to_phys(aligned_msg), 0); + else if (hv_isolation_type_snp()) + status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, + aligned_msg, NULL, + sizeof(*aligned_msg)); + else + status = HV_STATUS_INVALID_PARAMETER; + } else { status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); + } local_irq_restore(flags); @@ -105,6 +122,24 @@ int hv_synic_alloc(void) tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); + if (hv_cpu->post_msg_page == NULL) { + pr_err("Unable to allocate post msg page\n"); + goto err; + } + + ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to decrypt post msg page: %d\n", ret); + /* Just leak the page, as it's unsafe to free the page. */ + hv_cpu->post_msg_page = NULL; + goto err; + } + + memset(hv_cpu->post_msg_page, 0, PAGE_SIZE); + } + /* * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. @@ -178,6 +213,17 @@ void hv_synic_free(void) = per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + if (hv_cpu->post_msg_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to encrypt post msg page: %d\n", ret); + hv_cpu->post_msg_page = NULL; + } + } + } + if (!ms_hyperv.paravisor_present && (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { @@ -199,6 +245,7 @@ void hv_synic_free(void) } } + free_page((unsigned long)hv_cpu->post_msg_page); free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 55f2086841ae..f6b1e710f805 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -123,6 +123,17 @@ struct hv_per_cpu_context { void *synic_message_page; void *synic_event_page; + /* + * The page is only used in hv_post_message() for a TDX VM (with the + * paravisor) to post a messages to Hyper-V: when such a VM calls + * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which + * is encrypted in such a VM) as the hypercall input page, because + * the input page for HVCALL_POST_MESSAGE must be decrypted in such a + * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is + * introduced for this purpose. See hyperv_init() for more comments. + */ + void *post_msg_page; + /* * Starting with win8, we can take channel interrupts on any CPU; * we will manage the tasklet that handles events messages on a per CPU -- cgit v1.2.3 From e3131f1c81448a87e08dffd21867312a5ce563d9 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:11 -0700 Subject: x86/hyperv: Remove hv_isolation_type_en_snp In ms_hyperv_init_platform(), do not distinguish between a SNP VM with the paravisor and a SNP VM without the paravisor. Replace hv_isolation_type_en_snp() with !ms_hyperv.paravisor_present && hv_isolation_type_snp(). The hv_isolation_type_en_snp() in drivers/hv/hv.c and drivers/hv/hv_common.c can be changed to hv_isolation_type_snp() since we know !ms_hyperv.paravisor_present is true there. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-10-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 8 ++++---- arch/x86/hyperv/ivm.c | 12 +----------- arch/x86/include/asm/mshyperv.h | 11 ++++------- arch/x86/kernel/cpu/mshyperv.c | 10 ++++------ drivers/hv/hv.c | 4 ++-- drivers/hv/hv_common.c | 8 +------- include/asm-generic/mshyperv.h | 3 +-- 7 files changed, 17 insertions(+), 39 deletions(-) (limited to 'drivers/hv/hv.c') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index c4cffa3b1c3c..2b0124394e24 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -52,7 +52,7 @@ static int hyperv_init_ghcb(void) void *ghcb_va; void **ghcb_base; - if (!hv_isolation_type_snp()) + if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) return 0; if (!hv_ghcb_pg) @@ -117,7 +117,7 @@ static int hv_cpu_init(unsigned int cpu) * is blocked to run in Confidential VM. So only decrypt assist * page in non-root partition here. */ - if (*hvp && hv_isolation_type_en_snp()) { + if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) { WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1)); memset(*hvp, 0, PAGE_SIZE); } @@ -460,7 +460,7 @@ void __init hyperv_init(void) goto common_free; } - if (hv_isolation_type_snp()) { + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) hv_ghcb_terminate(SEV_TERM_SET_GEN, @@ -583,7 +583,7 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (hv_isolation_type_en_snp()) + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) ms_hyperv.vtl = get_vtl(); return; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index fbc07493fcb4..3d48f823582c 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -637,7 +637,7 @@ bool hv_is_isolation_supported(void) DEFINE_STATIC_KEY_FALSE(isolation_type_snp); /* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based * isolation VM. */ bool hv_isolation_type_snp(void) @@ -645,16 +645,6 @@ bool hv_isolation_type_snp(void) return static_branch_unlikely(&isolation_type_snp); } -DEFINE_STATIC_KEY_FALSE(isolation_type_en_snp); -/* - * hv_isolation_type_en_snp - Check system runs in the AMD SEV-SNP based - * isolation enlightened VM. - */ -bool hv_isolation_type_en_snp(void) -{ - return static_branch_unlikely(&isolation_type_en_snp); -} - DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); /* * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 101f71b85cfd..66ca641a164a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -26,7 +26,6 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); -DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp); DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( @@ -50,7 +49,7 @@ extern u64 hv_current_partition_id; extern union hv_ghcb * __percpu *hv_ghcb_pg; -extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); @@ -79,7 +78,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input_address, output_address); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -135,7 +134,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, 0); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__( "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -189,7 +188,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, input2); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -284,8 +283,6 @@ static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif -extern bool hv_isolation_type_snp(void); - #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) void hv_vtom_init(void); void hv_ivm_msr_write(u64 msr, u64 value); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4f51dac9eeb2..b63590ffc777 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -304,7 +304,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) * Override wakeup_secondary_cpu_64 callback for SEV-SNP * enlightened guest. */ - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; return; } @@ -440,10 +440,7 @@ static void __init ms_hyperv_init_platform(void) if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { - if (ms_hyperv.paravisor_present) - static_branch_enable(&isolation_type_snp); - else - static_branch_enable(&isolation_type_en_snp); + static_branch_enable(&isolation_type_snp); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); @@ -556,7 +553,8 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; - if (hv_root_partition || hv_isolation_type_en_snp()) + if (hv_root_partition || + (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; # endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 523c5d99f375..51e5018ac9b2 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -164,7 +164,7 @@ int hv_synic_alloc(void) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long) hv_cpu->synic_message_page, 1); if (ret) { @@ -225,7 +225,7 @@ void hv_synic_free(void) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { ret = set_memory_encrypted((unsigned long) hv_cpu->synic_message_page, 1); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index e62d64753902..81aa8be3e0df 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -383,7 +383,7 @@ int hv_common_cpu_init(unsigned int cpu) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long)mem, pgcount); if (ret) { /* It may be unsafe to free 'mem' */ @@ -532,12 +532,6 @@ bool __weak hv_isolation_type_snp(void) } EXPORT_SYMBOL_GPL(hv_isolation_type_snp); -bool __weak hv_isolation_type_en_snp(void) -{ - return false; -} -EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp); - bool __weak hv_isolation_type_tdx(void) { return false; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index f577eff58ea0..e7ecf03f675e 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -64,8 +64,7 @@ extern void * __percpu *hyperv_pcpu_output_arg; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); -extern bool hv_isolation_type_snp(void); -extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ -- cgit v1.2.3