From 0b5afe05377d7993f19292bf49dd13e959000790 Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Thu, 23 May 2024 17:40:55 +0000 Subject: KVM: arm64: Add early_param to control WFx trapping Add an early_params to control WFI and WFE trapping. This is to control the degree guests can wait for interrupts on their own without being trapped by KVM. Options for each param are trap and notrap. trap enables the trap. notrap disables the trap. Note that when enabled, traps are allowed but not guaranteed by the CPU architecture. Absent an explicitly set policy, default to current behavior: disabling the trap if only a single task is running and enabling otherwise. Signed-off-by: Colton Lewis Reviewed-by: Jing Zhang Link: https://lore.kernel.org/r/20240523174056.1565133-1-coltonlewis@google.com [ oliver: rework kvm_vcpu_should_clear_tw*() for readability ] Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 59716789fe0f..53e23528d2cf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -48,6 +48,15 @@ static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; +enum kvm_wfx_trap_policy { + KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */ + KVM_WFX_NOTRAP, + KVM_WFX_TRAP, +}; + +static enum kvm_wfx_trap_policy kvm_wfi_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; +static enum kvm_wfx_trap_policy kvm_wfe_trap_policy __read_mostly = KVM_WFX_NOTRAP_SINGLE_TASK; + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); @@ -546,6 +555,24 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) } } +static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu) +{ + if (unlikely(kvm_wfi_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK)) + return kvm_wfi_trap_policy == KVM_WFX_NOTRAP; + + return single_task_running() && + (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || + vcpu->kvm->arch.vgic.nassgireq); +} + +static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu) +{ + if (unlikely(kvm_wfe_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK)) + return kvm_wfe_trap_policy == KVM_WFX_NOTRAP; + + return single_task_running(); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_s2_mmu *mmu; @@ -579,10 +606,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); - if (single_task_running()) - vcpu_clear_wfx_traps(vcpu); + if (kvm_vcpu_should_clear_twe(vcpu)) + vcpu->arch.hcr_el2 &= ~HCR_TWE; + else + vcpu->arch.hcr_el2 |= HCR_TWE; + + if (kvm_vcpu_should_clear_twi(vcpu)) + vcpu->arch.hcr_el2 &= ~HCR_TWI; else - vcpu_set_wfx_traps(vcpu); + vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); @@ -2858,6 +2890,36 @@ static int __init early_kvm_mode_cfg(char *arg) } early_param("kvm-arm.mode", early_kvm_mode_cfg); +static int __init early_kvm_wfx_trap_policy_cfg(char *arg, enum kvm_wfx_trap_policy *p) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "trap") == 0) { + *p = KVM_WFX_TRAP; + return 0; + } + + if (strcmp(arg, "notrap") == 0) { + *p = KVM_WFX_NOTRAP; + return 0; + } + + return -EINVAL; +} + +static int __init early_kvm_wfi_trap_policy_cfg(char *arg) +{ + return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfi_trap_policy); +} +early_param("kvm-arm.wfi_trap_policy", early_kvm_wfi_trap_policy_cfg); + +static int __init early_kvm_wfe_trap_policy_cfg(char *arg) +{ + return early_kvm_wfx_trap_policy_cfg(arg, &kvm_wfe_trap_policy); +} +early_param("kvm-arm.wfe_trap_policy", early_kvm_wfe_trap_policy_cfg); + enum kvm_mode kvm_get_mode(void) { return kvm_mode; -- cgit v1.2.3 From eb9d53d4a949c6d6d7c9f130e537f6b5687fedf9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 13:58:58 +0100 Subject: KVM: arm64: nv: Fix RESx behaviour of disabled FGTs with negative polarity The Fine Grained Trap extension is pretty messy as it doesn't consistently use the same polarity for all trap bits. A bunch of them, added later in the life of the architecture, have a *negative* priority. So if these bits are disabled, they must be RES1 and not RES0. But that's not what the code implements, making the traps for these negative trap bits being always on instead of disabled. Fix the relevant bits, and stick a brown paper bag on my head for the rest of the day... Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614125858.78361-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index bae8536cbf00..0acb60273482 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -328,21 +328,21 @@ int kvm_init_nv_sysregs(struct kvm *kvm) HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 | HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1); if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA)) - res0 |= HFGxTR_EL2_nACCDATA_EL1; + res1 |= HFGxTR_EL2_nACCDATA_EL1; if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) - res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1); + res1 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) - res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); + res1 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) - res0 |= HFGxTR_EL2_nRCWMASK_EL1; + res1 |= HFGxTR_EL2_nRCWMASK_EL1; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) - res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); + res1 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) - res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); + res1 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - res0 |= HFGxTR_EL2_nS2POR_EL1; + res1 |= HFGxTR_EL2_nS2POR_EL1; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) - res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1); + res1 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1); set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1); set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1); @@ -378,10 +378,10 @@ int kvm_init_nv_sysregs(struct kvm *kvm) HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 | HDFGRTR_EL2_TRBTRG_EL1); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) - res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL | + res1 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL | HDFGRTR_EL2_nBRBDATA); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2)) - res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1; + res1 |= HDFGRTR_EL2_nPMSNEVFR_EL1; set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1); /* Reuse the bits from the read-side and add the write-specific stuff */ @@ -417,9 +417,9 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX | HFGITR_EL2_CPPRCTX); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) - res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL); + res1 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) - res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 | + res1 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 | HFGITR_EL2_nGCSEPP); if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX)) res0 |= HFGITR_EL2_COSPRCTX; -- cgit v1.2.3 From c9c012625e12699a4c6c4d4cdd17cbe600e01cd2 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:32 +0000 Subject: KVM: arm64: Trap FFA_VERSION host call in pKVM The pKVM hypervisor initializes with FF-A version 1.0. The spec requires that no other FF-A calls to be issued before the version negotiation phase is complete. Split the hypervisor proxy initialization code in two parts so that we can move the later one after the host negotiates its version. Without trapping the call, the host drivers can negotiate a higher version number with TEE which can result in a different memory layout described during the memory sharing calls. Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-2-sebastianene@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 120 +++++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 30 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 02746f9d0980..c8ab51c331f0 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -67,6 +67,9 @@ struct kvm_ffa_buffers { */ static struct kvm_ffa_buffers hyp_buffers; static struct kvm_ffa_buffers host_buffers; +static u32 hyp_ffa_version; +static bool has_version_negotiated; +static hyp_spinlock_t version_lock; static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) { @@ -639,6 +642,83 @@ out_handled: return true; } +static int hyp_ffa_post_init(void) +{ + size_t min_rxtx_sz; + struct arm_smccc_res res; + + arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + if (res.a2 != HOST_FFA_ID) + return -EINVAL; + + arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, + 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + switch (res.a2) { + case FFA_FEAT_RXTX_MIN_SZ_4K: + min_rxtx_sz = SZ_4K; + break; + case FFA_FEAT_RXTX_MIN_SZ_16K: + min_rxtx_sz = SZ_16K; + break; + case FFA_FEAT_RXTX_MIN_SZ_64K: + min_rxtx_sz = SZ_64K; + break; + default: + return -EINVAL; + } + + if (min_rxtx_sz > PAGE_SIZE) + return -EOPNOTSUPP; + + return 0; +} + +static void do_ffa_version(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, ffa_req_version, ctxt, 1); + + if (FFA_MAJOR_VERSION(ffa_req_version) != 1) { + res->a0 = FFA_RET_NOT_SUPPORTED; + return; + } + + hyp_spin_lock(&version_lock); + if (has_version_negotiated) { + res->a0 = hyp_ffa_version; + goto unlock; + } + + /* + * If the client driver tries to downgrade the version, we need to ask + * first if TEE supports it. + */ + if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) { + arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0, + 0, 0, 0, 0, 0, + res); + if (res->a0 == FFA_RET_NOT_SUPPORTED) + goto unlock; + + hyp_ffa_version = ffa_req_version; + } + + if (hyp_ffa_post_init()) + res->a0 = FFA_RET_NOT_SUPPORTED; + else { + has_version_negotiated = true; + res->a0 = hyp_ffa_version; + } +unlock: + hyp_spin_unlock(&version_lock); +} + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { struct arm_smccc_res res; @@ -659,6 +739,11 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; + if (!has_version_negotiated && func_id != FFA_VERSION) { + ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); + goto out_handled; + } + switch (func_id) { case FFA_FEATURES: if (!do_ffa_features(&res, host_ctxt)) @@ -685,6 +770,9 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) case FFA_MEM_FRAG_TX: do_ffa_mem_frag_tx(&res, host_ctxt); goto out_handled; + case FFA_VERSION: + do_ffa_version(&res, host_ctxt); + goto out_handled; } if (ffa_call_supported(func_id)) @@ -699,7 +787,6 @@ out_handled: int hyp_ffa_init(void *pages) { struct arm_smccc_res res; - size_t min_rxtx_sz; void *tx, *rx; if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) @@ -725,35 +812,7 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != FFA_SUCCESS) - return -EOPNOTSUPP; - - if (res.a2 != HOST_FFA_ID) - return -EINVAL; - - arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, - 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != FFA_SUCCESS) - return -EOPNOTSUPP; - - switch (res.a2) { - case FFA_FEAT_RXTX_MIN_SZ_4K: - min_rxtx_sz = SZ_4K; - break; - case FFA_FEAT_RXTX_MIN_SZ_16K: - min_rxtx_sz = SZ_16K; - break; - case FFA_FEAT_RXTX_MIN_SZ_64K: - min_rxtx_sz = SZ_64K; - break; - default: - return -EINVAL; - } - - if (min_rxtx_sz > PAGE_SIZE) - return -EOPNOTSUPP; - + hyp_ffa_version = FFA_VERSION_1_0; tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; rx = pages; @@ -775,5 +834,6 @@ int hyp_ffa_init(void *pages) .lock = __HYP_SPIN_LOCK_UNLOCKED, }; + version_lock = __HYP_SPIN_LOCK_UNLOCKED; return 0; } -- cgit v1.2.3 From 894376385a2d80a96816449e4991587a9a5ef0dd Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:33 +0000 Subject: KVM: arm64: Add support for FFA_PARTITION_INFO_GET Handle the FFA_PARTITION_INFO_GET host call inside the pKVM hypervisor and copy the response message back to the host buffers. Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-3-sebastianene@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 52 +++++++++++++++++++++++++++++++++++++++++++ include/linux/arm_ffa.h | 3 +++ 2 files changed, 55 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index c8ab51c331f0..4eaef673e98d 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -719,6 +719,55 @@ unlock: hyp_spin_unlock(&version_lock); } +static void do_ffa_part_get(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, uuid0, ctxt, 1); + DECLARE_REG(u32, uuid1, ctxt, 2); + DECLARE_REG(u32, uuid2, ctxt, 3); + DECLARE_REG(u32, uuid3, ctxt, 4); + DECLARE_REG(u32, flags, ctxt, 5); + u32 count, partition_sz, copy_sz; + + hyp_spin_lock(&host_buffers.lock); + if (!host_buffers.rx) { + ffa_to_smccc_res(res, FFA_RET_BUSY); + goto out_unlock; + } + + arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, + uuid2, uuid3, flags, 0, 0, + res); + + if (res->a0 != FFA_SUCCESS) + goto out_unlock; + + count = res->a2; + if (!count) + goto out_unlock; + + if (hyp_ffa_version > FFA_VERSION_1_0) { + /* Get the number of partitions deployed in the system */ + if (flags & 0x1) + goto out_unlock; + + partition_sz = res->a3; + } else { + /* FFA_VERSION_1_0 lacks the size in the response */ + partition_sz = FFA_1_0_PARTITON_INFO_SZ; + } + + copy_sz = partition_sz * count; + if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { + ffa_to_smccc_res(res, FFA_RET_ABORTED); + goto out_unlock; + } + + memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz); +out_unlock: + hyp_spin_unlock(&host_buffers.lock); +} + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { struct arm_smccc_res res; @@ -773,6 +822,9 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) case FFA_VERSION: do_ffa_version(&res, host_ctxt); goto out_handled; + case FFA_PARTITION_INFO_GET: + do_ffa_part_get(&res, host_ctxt); + goto out_handled; } if (ffa_call_supported(func_id)) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index c82d56768101..c6d18f50f671 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -212,6 +212,9 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } extern const struct bus_type ffa_bus_type; +/* The FF-A 1.0 partition structure lacks the uuid[4] */ +#define FFA_1_0_PARTITON_INFO_SZ (8) + /* FFA transport related */ struct ffa_partition_info { u16 id; -- cgit v1.2.3 From 0dd60c4632a1f517e384c318c7b33f4b3915c0a7 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:34 +0000 Subject: KVM: arm64: Update the identification range for the FF-A smcs The FF-A spec 1.2 reserves the following ranges for identifying FF-A calls: 0x84000060-0x840000FF: FF-A 32-bit calls 0xC4000060-0xC40000FF: FF-A 64-bit calls. Use the range identification according to the spec and allow calls that are currently out of the range(eg. FFA_MSG_SEND_DIRECT_REQ2) to be identified correctly. Acked-by: Will Deacon Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-4-sebastianene@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/nvhe/ffa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h index d9fd5e6c7d3c..146e0aebfa1c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/ffa.h +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -9,7 +9,7 @@ #include #define FFA_MIN_FUNC_NUM 0x60 -#define FFA_MAX_FUNC_NUM 0x7F +#define FFA_MAX_FUNC_NUM 0xFF int hyp_ffa_init(void *pages); bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id); -- cgit v1.2.3 From 42fb33dde42b826d5505ec8d391ec473932d1694 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:35 +0000 Subject: KVM: arm64: Use FF-A 1.1 with pKVM Now that the layout of the structures is compatible with 1.1 it is time to probe the 1.1 version of the FF-A protocol inside the hypervisor. If the TEE doesn't support it, it should return the minimum supported version. Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-5-sebastianene@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4eaef673e98d..fdb63b7857ec 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -457,7 +457,7 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id, memcpy(buf, host_buffers.tx, fraglen); ep_mem_access = (void *)buf + - ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + ffa_mem_desc_offset(buf, 0, hyp_ffa_version); offset = ep_mem_access->composite_off; if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) { ret = FFA_RET_INVALID_PARAMETERS; @@ -536,7 +536,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a2; ep_mem_access = (void *)buf + - ffa_mem_desc_offset(buf, 0, FFA_VERSION_1_0); + ffa_mem_desc_offset(buf, 0, hyp_ffa_version); offset = ep_mem_access->composite_off; /* * We can trust the SPMD to get this right, but let's at least @@ -844,7 +844,7 @@ int hyp_ffa_init(void *pages) if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2) return 0; - arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res); + arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res); if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; @@ -864,7 +864,11 @@ int hyp_ffa_init(void *pages) if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; - hyp_ffa_version = FFA_VERSION_1_0; + if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1)) + hyp_ffa_version = res.a0; + else + hyp_ffa_version = FFA_VERSION_1_1; + tx = pages; pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; rx = pages; -- cgit v1.2.3 From 4f128f8e1aaac189f83d0f828bcdb2986d8d2e51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:37 +0100 Subject: KVM: arm64: nv: Support multiple nested Stage-2 mmu structures Add Stage-2 mmu data structures for virtual EL2 and for nested guests. We don't yet populate shadow Stage-2 page tables, but we now have a framework for getting to a shadow Stage-2 pgd. We allocate twice the number of vcpus as Stage-2 mmu structures because that's sufficient for each vcpu running two translation regimes without having to flush the Stage-2 page tables. Co-developed-by: Christoffer Dall Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 36 ++++++ arch/arm64/include/asm/kvm_mmu.h | 24 ++++ arch/arm64/include/asm/kvm_nested.h | 6 + arch/arm64/kvm/arm.c | 11 ++ arch/arm64/kvm/mmu.c | 69 ++++++++---- arch/arm64/kvm/nested.c | 218 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/reset.c | 6 + 7 files changed, 349 insertions(+), 21 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 36b8e97bf49e..d486b7fc55a0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -189,6 +189,33 @@ struct kvm_s2_mmu { uint64_t split_page_chunk_size; struct kvm_arch *arch; + + /* + * For a shadow stage-2 MMU, the virtual vttbr used by the + * host to parse the guest S2. + * This either contains: + * - the virtual VTTBR programmed by the guest hypervisor with + * CnP cleared + * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid + * + * We also cache the full VTCR which gets used for TLB invalidation, + * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted + * to be cached in a TLB" to the letter. + */ + u64 tlb_vttbr; + u64 tlb_vtcr; + + /* + * true when this represents a nested context where virtual + * HCR_EL2.VM == 1 + */ + bool nested_stage2_enabled; + + /* + * 0: Nobody is currently using this, check vttbr for validity + * >0: Somebody is actively using this. + */ + atomic_t refcnt; }; struct kvm_arch_memory_slot { @@ -256,6 +283,14 @@ struct kvm_arch { */ u64 fgu[__NR_FGT_GROUP_IDS__]; + /* + * Stage 2 paging state for VMs with nested S2 using a virtual + * VMID. + */ + struct kvm_s2_mmu *nested_mmus; + size_t nested_mmus_size; + int nested_mmus_next; + /* Interrupt controller */ struct vgic_dist vgic; @@ -1306,6 +1341,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu); int __init kvm_set_ipa_limit(void); +u32 kvm_get_pa_bits(struct kvm *kvm); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d5e48d870461..87cc941cfd15 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -98,6 +98,7 @@ alternative_cb_end #include #include #include +#include void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); @@ -165,6 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size); + void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_uninit_stage2_mmu(struct kvm *kvm); @@ -326,5 +329,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); } + +static inline u64 get_vmid(u64 vttbr) +{ + return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >> + VTTBR_VMID_SHIFT; +} + +static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu) +{ + return !(mmu->tlb_vttbr & VTTBR_CNP_BIT); +} + +static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +{ + /* + * Be careful, mmu may not be fully initialised so do look at + * *any* of its fields. + */ + return &kvm->arch.mmu != mmu; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 5e0ab0596246..a69faee31342 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -61,6 +61,12 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0) } extern bool forward_smc_trap(struct kvm_vcpu *vcpu); +extern void kvm_init_nested(struct kvm *kvm); +extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); +extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); +extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); +extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); +extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 59716789fe0f..11b42af84f6f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -170,6 +170,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_unlock(&kvm->lock); #endif + kvm_init_nested(kvm); + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; @@ -551,6 +553,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct kvm_s2_mmu *mmu; int *last_ran; + if (vcpu_has_nv(vcpu)) + kvm_vcpu_load_hw_mmu(vcpu); + mmu = vcpu->arch.hw_mmu; last_ran = this_cpu_ptr(mmu->last_vcpu_ran); @@ -601,6 +606,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); kvm_vcpu_pmu_restore_host(vcpu); + if (vcpu_has_nv(vcpu)) + kvm_vcpu_put_hw_mmu(vcpu); kvm_arm_vmid_clear_active(); vcpu_clear_on_unsupported_cpu(vcpu); @@ -1459,6 +1466,10 @@ static int kvm_setup_vcpu(struct kvm_vcpu *vcpu) if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu) ret = kvm_arm_set_default_pmu(kvm); + /* Prepare for nested if required */ + if (!ret && vcpu_has_nv(vcpu)) + ret = kvm_vcpu_init_nested(vcpu); + return ret; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8bcab0cc3fe9..8984b7c213e1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -328,7 +328,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 may_block)); } -static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) +void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) { __unmap_stage2_range(mmu, start, size, true); } @@ -855,21 +855,9 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .icache_inval_pou = invalidate_icache_guest_page, }; -/** - * kvm_init_stage2_mmu - Initialise a S2 MMU structure - * @kvm: The pointer to the KVM structure - * @mmu: The pointer to the s2 MMU structure - * @type: The machine type of the virtual machine - * - * Allocates only the stage-2 HW PGD level table(s). - * Note we don't need locking here as this is only called when the VM is - * created, which can only be done once. - */ -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) +static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type) { u32 kvm_ipa_limit = get_kvm_ipa_limit(); - int cpu, err; - struct kvm_pgtable *pgt; u64 mmfr0, mmfr1; u32 phys_shift; @@ -896,11 +884,51 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + return 0; +} + +/** + * kvm_init_stage2_mmu - Initialise a S2 MMU structure + * @kvm: The pointer to the KVM structure + * @mmu: The pointer to the s2 MMU structure + * @type: The machine type of the virtual machine + * + * Allocates only the stage-2 HW PGD level table(s). + * Note we don't need locking here as this is only called in two cases: + * + * - when the VM is created, which can't race against anything + * + * - when secondary kvm_s2_mmu structures are initialised for NV + * guests, and the caller must hold kvm->lock as this is called on a + * per-vcpu basis. + */ +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) +{ + int cpu, err; + struct kvm_pgtable *pgt; + + /* + * If we already have our page tables in place, and that the + * MMU context is the canonical one, we have a bug somewhere, + * as this is only supposed to ever happen once per VM. + * + * Otherwise, we're building nested page tables, and that's + * probably because userspace called KVM_ARM_VCPU_INIT more + * than once on the same vcpu. Since that's actually legal, + * don't kick a fuss and leave gracefully. + */ if (mmu->pgt != NULL) { + if (kvm_is_nested_s2_mmu(kvm, mmu)) + return 0; + kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } + err = kvm_init_ipa_range(mmu, type); + if (err) + return err; + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT); if (!pgt) return -ENOMEM; @@ -925,6 +953,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); + + if (kvm_is_nested_s2_mmu(kvm, mmu)) + kvm_init_nested_s2_mmu(mmu); + return 0; out_destroy_pgtable: @@ -976,7 +1008,7 @@ static void stage2_unmap_memslot(struct kvm *kvm, if (!(vma->vm_flags & VM_PFNMAP)) { gpa_t gpa = addr + (vm_start - memslot->userspace_addr); - unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start); + kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start); } hva = vm_end; } while (hva < reg_end); @@ -2022,11 +2054,6 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { } -void kvm_arch_flush_shadow_all(struct kvm *kvm) -{ - kvm_uninit_stage2_mmu(kvm); -} - void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { @@ -2034,7 +2061,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, phys_addr_t size = slot->npages << PAGE_SHIFT; write_lock(&kvm->mmu_lock); - unmap_stage2_range(&kvm->arch.mmu, gpa, size); + kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size); write_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index bae8536cbf00..45d2975071e3 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include #include @@ -16,6 +18,222 @@ /* Protection against the sysreg repainting madness... */ #define NV_FTR(r, f) ID_AA64##r##_EL1_##f +/* + * Ratio of live shadow S2 MMU per vcpu. This is a trade-off between + * memory usage and potential number of different sets of S2 PTs in + * the guests. Running out of S2 MMUs only affects performance (we + * will invalidate them more often). + */ +#define S2_MMU_PER_VCPU 2 + +void kvm_init_nested(struct kvm *kvm) +{ + kvm->arch.nested_mmus = NULL; + kvm->arch.nested_mmus_size = 0; +} + +static int init_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +{ + /* + * We only initialise the IPA range on the canonical MMU, which + * defines the contract between KVM and userspace on where the + * "hardware" is in the IPA space. This affects the validity of MMIO + * exits forwarded to userspace, for example. + * + * For nested S2s, we use the PARange as exposed to the guest, as it + * is allowed to use it at will to expose whatever memory map it + * wants to its own guests as it would be on real HW. + */ + return kvm_init_stage2_mmu(kvm, mmu, kvm_get_pa_bits(kvm)); +} + +int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_s2_mmu *tmp; + int num_mmus, ret = 0; + + /* + * Let's treat memory allocation failures as benign: If we fail to + * allocate anything, return an error and keep the allocated array + * alive. Userspace may try to recover by intializing the vcpu + * again, and there is no reason to affect the whole VM for this. + */ + num_mmus = atomic_read(&kvm->online_vcpus) * S2_MMU_PER_VCPU; + tmp = kvrealloc(kvm->arch.nested_mmus, + size_mul(sizeof(*kvm->arch.nested_mmus), kvm->arch.nested_mmus_size), + size_mul(sizeof(*kvm->arch.nested_mmus), num_mmus), + GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!tmp) + return -ENOMEM; + + /* + * If we went through a realocation, adjust the MMU back-pointers in + * the previously initialised kvm_pgtable structures. + */ + if (kvm->arch.nested_mmus != tmp) + for (int i = 0; i < kvm->arch.nested_mmus_size; i++) + tmp[i].pgt->mmu = &tmp[i]; + + for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) + ret = init_nested_s2_mmu(kvm, &tmp[i]); + + if (ret) { + for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++) + kvm_free_stage2_pgd(&tmp[i]); + + return ret; + } + + kvm->arch.nested_mmus_size = num_mmus; + kvm->arch.nested_mmus = tmp; + + return 0; +} + +struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + bool nested_stage2_enabled; + u64 vttbr, vtcr, hcr; + + lockdep_assert_held_write(&kvm->mmu_lock); + + vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2); + hcr = vcpu_read_sys_reg(vcpu, HCR_EL2); + + nested_stage2_enabled = hcr & HCR_VM; + + /* Don't consider the CnP bit for the vttbr match */ + vttbr &= ~VTTBR_CNP_BIT; + + /* + * Two possibilities when looking up a S2 MMU context: + * + * - either S2 is enabled in the guest, and we need a context that is + * S2-enabled and matches the full VTTBR (VMID+BADDR) and VTCR, + * which makes it safe from a TLB conflict perspective (a broken + * guest won't be able to generate them), + * + * - or S2 is disabled, and we need a context that is S2-disabled + * and matches the VMID only, as all TLBs are tagged by VMID even + * if S2 translation is disabled. + */ + for (int i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (!kvm_s2_mmu_valid(mmu)) + continue; + + if (nested_stage2_enabled && + mmu->nested_stage2_enabled && + vttbr == mmu->tlb_vttbr && + vtcr == mmu->tlb_vtcr) + return mmu; + + if (!nested_stage2_enabled && + !mmu->nested_stage2_enabled && + get_vmid(vttbr) == get_vmid(mmu->tlb_vttbr)) + return mmu; + } + return NULL; +} + +static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_s2_mmu *s2_mmu; + int i; + + lockdep_assert_held_write(&vcpu->kvm->mmu_lock); + + s2_mmu = lookup_s2_mmu(vcpu); + if (s2_mmu) + goto out; + + /* + * Make sure we don't always search from the same point, or we + * will always reuse a potentially active context, leaving + * free contexts unused. + */ + for (i = kvm->arch.nested_mmus_next; + i < (kvm->arch.nested_mmus_size + kvm->arch.nested_mmus_next); + i++) { + s2_mmu = &kvm->arch.nested_mmus[i % kvm->arch.nested_mmus_size]; + + if (atomic_read(&s2_mmu->refcnt) == 0) + break; + } + BUG_ON(atomic_read(&s2_mmu->refcnt)); /* We have struct MMUs to spare */ + + /* Set the scene for the next search */ + kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size; + + /* Clear the old state */ + if (kvm_s2_mmu_valid(s2_mmu)) + kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu)); + + /* + * The virtual VMID (modulo CnP) will be used as a key when matching + * an existing kvm_s2_mmu. + * + * We cache VTCR at allocation time, once and for all. It'd be great + * if the guest didn't screw that one up, as this is not very + * forgiving... + */ + s2_mmu->tlb_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2) & ~VTTBR_CNP_BIT; + s2_mmu->tlb_vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2); + s2_mmu->nested_stage2_enabled = vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_VM; + +out: + atomic_inc(&s2_mmu->refcnt); + return s2_mmu; +} + +void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu) +{ + /* CnP being set denotes an invalid entry */ + mmu->tlb_vttbr = VTTBR_CNP_BIT; + mmu->nested_stage2_enabled = false; + atomic_set(&mmu->refcnt, 0); +} + +void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) +{ + if (is_hyp_ctxt(vcpu)) { + vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu; + } else { + write_lock(&vcpu->kvm->mmu_lock); + vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu); + write_unlock(&vcpu->kvm->mmu_lock); + } +} + +void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) +{ + if (kvm_is_nested_s2_mmu(vcpu->kvm, vcpu->arch.hw_mmu)) { + atomic_dec(&vcpu->arch.hw_mmu->refcnt); + vcpu->arch.hw_mmu = NULL; + } +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + int i; + + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (!WARN_ON(atomic_read(&mmu->refcnt))) + kvm_free_stage2_pgd(mmu); + } + kfree(kvm->arch.nested_mmus); + kvm->arch.nested_mmus = NULL; + kvm->arch.nested_mmus_size = 0; + kvm_uninit_stage2_mmu(kvm); +} + /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3fc8ca164dbe..0b0ae5ae7bc2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -268,6 +268,12 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu) preempt_enable(); } +u32 kvm_get_pa_bits(struct kvm *kvm) +{ + /* Fixed limit until we can configure ID_AA64MMFR0.PARange */ + return kvm_ipa_limit; +} + u32 get_kvm_ipa_limit(void) { return kvm_ipa_limit; -- cgit v1.2.3 From 61e30b9eef7ffc7f88ffd95e969cfb662e41bb05 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 14 Jun 2024 15:45:38 +0100 Subject: KVM: arm64: nv: Implement nested Stage-2 page table walk logic Based on the pseudo-code in the ARM ARM, implement a stage 2 software page table walker. Co-developed-by: Jintack Lim Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/kvm_nested.h | 13 ++ arch/arm64/kvm/nested.c | 264 ++++++++++++++++++++++++++++++++++++ 3 files changed, 278 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7abf09df7033..15a4be765cad 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -152,6 +152,7 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index a69faee31342..5404b7b843cf 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -68,6 +68,19 @@ extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); +struct kvm_s2_trans { + phys_addr_t output; + unsigned long block_size; + bool writable; + bool readable; + int level; + u32 esr; + u64 upper_attr; +}; + +extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, + struct kvm_s2_trans *result); + int kvm_init_nv_sysregs(struct kvm *kvm); #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 45d2975071e3..8c325d0a5636 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -91,6 +91,270 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) return 0; } +struct s2_walk_info { + int (*read_desc)(phys_addr_t pa, u64 *desc, void *data); + void *data; + u64 baddr; + unsigned int max_oa_bits; + unsigned int pgshift; + unsigned int sl; + unsigned int t0sz; + bool be; +}; + +static unsigned int ps_to_output_size(unsigned int ps) +{ + switch (ps) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: + default: + return 48; + } +} + +static u32 compute_fsc(int level, u32 fsc) +{ + return fsc | (level & 0x3); +} + +static int get_ia_size(struct s2_walk_info *wi) +{ + return 64 - wi->t0sz; +} + +static int check_base_s2_limits(struct s2_walk_info *wi, + int level, int input_size, int stride) +{ + int start_size, ia_size; + + ia_size = get_ia_size(wi); + + /* Check translation limits */ + switch (BIT(wi->pgshift)) { + case SZ_64K: + if (level == 0 || (level == 1 && ia_size <= 42)) + return -EFAULT; + break; + case SZ_16K: + if (level == 0 || (level == 1 && ia_size <= 40)) + return -EFAULT; + break; + case SZ_4K: + if (level < 0 || (level == 0 && ia_size <= 42)) + return -EFAULT; + break; + } + + /* Check input size limits */ + if (input_size > ia_size) + return -EFAULT; + + /* Check number of entries in starting level table */ + start_size = input_size - ((3 - level) * stride + wi->pgshift); + if (start_size < 1 || start_size > stride + 4) + return -EFAULT; + + return 0; +} + +/* Check if output is within boundaries */ +static int check_output_size(struct s2_walk_info *wi, phys_addr_t output) +{ + unsigned int output_size = wi->max_oa_bits; + + if (output_size != 48 && (output & GENMASK_ULL(47, output_size))) + return -1; + + return 0; +} + +/* + * This is essentially a C-version of the pseudo code from the ARM ARM + * AArch64.TranslationTableWalk function. I strongly recommend looking at + * that pseudocode in trying to understand this. + * + * Must be called with the kvm->srcu read lock held + */ +static int walk_nested_s2_pgd(phys_addr_t ipa, + struct s2_walk_info *wi, struct kvm_s2_trans *out) +{ + int first_block_level, level, stride, input_size, base_lower_bound; + phys_addr_t base_addr; + unsigned int addr_top, addr_bottom; + u64 desc; /* page table entry */ + int ret; + phys_addr_t paddr; + + switch (BIT(wi->pgshift)) { + default: + case SZ_64K: + case SZ_16K: + level = 3 - wi->sl; + first_block_level = 2; + break; + case SZ_4K: + level = 2 - wi->sl; + first_block_level = 1; + break; + } + + stride = wi->pgshift - 3; + input_size = get_ia_size(wi); + if (input_size > 48 || input_size < 25) + return -EFAULT; + + ret = check_base_s2_limits(wi, level, input_size, stride); + if (WARN_ON(ret)) + return ret; + + base_lower_bound = 3 + input_size - ((3 - level) * stride + + wi->pgshift); + base_addr = wi->baddr & GENMASK_ULL(47, base_lower_bound); + + if (check_output_size(wi, base_addr)) { + out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); + return 1; + } + + addr_top = input_size - 1; + + while (1) { + phys_addr_t index; + + addr_bottom = (3 - level) * stride + wi->pgshift; + index = (ipa & GENMASK_ULL(addr_top, addr_bottom)) + >> (addr_bottom - 3); + + paddr = base_addr | index; + ret = wi->read_desc(paddr, &desc, wi->data); + if (ret < 0) + return ret; + + /* + * Handle reversedescriptors if endianness differs between the + * host and the guest hypervisor. + */ + if (wi->be) + desc = be64_to_cpu((__force __be64)desc); + else + desc = le64_to_cpu((__force __le64)desc); + + /* Check for valid descriptor at this point */ + if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { + out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); + out->upper_attr = desc; + return 1; + } + + /* We're at the final level or block translation level */ + if ((desc & 3) == 1 || level == 3) + break; + + if (check_output_size(wi, desc)) { + out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); + out->upper_attr = desc; + return 1; + } + + base_addr = desc & GENMASK_ULL(47, wi->pgshift); + + level += 1; + addr_top = addr_bottom - 1; + } + + if (level < first_block_level) { + out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); + out->upper_attr = desc; + return 1; + } + + /* + * We don't use the contiguous bit in the stage-2 ptes, so skip check + * for misprogramming of the contiguous bit. + */ + + if (check_output_size(wi, desc)) { + out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); + out->upper_attr = desc; + return 1; + } + + if (!(desc & BIT(10))) { + out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); + out->upper_attr = desc; + return 1; + } + + /* Calculate and return the result */ + paddr = (desc & GENMASK_ULL(47, addr_bottom)) | + (ipa & GENMASK_ULL(addr_bottom - 1, 0)); + out->output = paddr; + out->block_size = 1UL << ((3 - level) * stride + wi->pgshift); + out->readable = desc & (0b01 << 6); + out->writable = desc & (0b10 << 6); + out->level = level; + out->upper_attr = desc & GENMASK_ULL(63, 52); + return 0; +} + +static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data) +{ + struct kvm_vcpu *vcpu = data; + + return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc)); +} + +static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi) +{ + wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK; + + switch (vtcr & VTCR_EL2_TG0_MASK) { + case VTCR_EL2_TG0_4K: + wi->pgshift = 12; break; + case VTCR_EL2_TG0_16K: + wi->pgshift = 14; break; + case VTCR_EL2_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + + wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + /* Global limit for now, should eventually be per-VM */ + wi->max_oa_bits = min(get_kvm_ipa_limit(), + ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr))); +} + +int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, + struct kvm_s2_trans *result) +{ + u64 vtcr = vcpu_read_sys_reg(vcpu, VTCR_EL2); + struct s2_walk_info wi; + int ret; + + result->esr = 0; + + if (!vcpu_has_nv(vcpu)) + return 0; + + wi.read_desc = read_guest_s2_desc; + wi.data = vcpu; + wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + + vtcr_to_walk_info(vtcr, &wi); + + wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE; + + ret = walk_nested_s2_pgd(gipa, &wi, result); + if (ret) + result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC); + + return ret; +} + struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; -- cgit v1.2.3 From fd276e71d1e7b7f729050f2da235a1e6fe4f328a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:39 +0100 Subject: KVM: arm64: nv: Handle shadow stage 2 page faults If we are faulting on a shadow stage 2 translation, we first walk the guest hypervisor's stage 2 page table to see if it has a mapping. If not, we inject a stage 2 page fault to the virtual EL2. Otherwise, we create a mapping in the shadow stage 2 page table. Note that we have to deal with two IPAs when we got a shadow stage 2 page fault. One is the address we faulted on, and is in the L2 guest phys space. The other is from the guest stage-2 page table walk, and is in the L1 guest phys space. To differentiate them, we rename variables so that fault_ipa is used for the former and ipa is used for the latter. When mapping a page in a shadow stage-2, special care must be taken not to be more permissive than the guest is. Co-developed-by: Christoffer Dall Co-developed-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 33 +++++++++++++ arch/arm64/kvm/mmu.c | 97 +++++++++++++++++++++++++++++++++---- arch/arm64/kvm/nested.c | 45 +++++++++++++++++ 3 files changed, 166 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 5404b7b843cf..82e0484ca26b 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -78,8 +78,41 @@ struct kvm_s2_trans { u64 upper_attr; }; +static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans) +{ + return trans->output; +} + +static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans) +{ + return trans->block_size; +} + +static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans) +{ + return trans->esr; +} + +static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans) +{ + return trans->readable; +} + +static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) +{ + return trans->writable; +} + +static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) +{ + return !(trans->upper_attr & BIT(54)); +} + extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, struct kvm_s2_trans *result); +extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, + struct kvm_s2_trans *trans); +extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 8984b7c213e1..5aed2e9d380d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1407,6 +1407,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, bool fault_is_perm) { @@ -1415,6 +1416,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool exec_fault, mte_allowed; bool device = false, vfio_allow_any_uc = false; unsigned long mmu_seq; + phys_addr_t ipa = fault_ipa; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; struct vm_area_struct *vma; @@ -1498,10 +1500,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } vma_pagesize = 1UL << vma_shift; + + if (nested) { + unsigned long max_map_size; + + max_map_size = force_pte ? PAGE_SIZE : PUD_SIZE; + + ipa = kvm_s2_trans_output(nested); + + /* + * If we're about to create a shadow stage 2 entry, then we + * can only create a block mapping if the guest stage 2 page + * table uses at least as big a mapping. + */ + max_map_size = min(kvm_s2_trans_size(nested), max_map_size); + + /* + * Be careful that if the mapping size falls between + * two host sizes, take the smallest of the two. + */ + if (max_map_size >= PMD_SIZE && max_map_size < PUD_SIZE) + max_map_size = PMD_SIZE; + else if (max_map_size >= PAGE_SIZE && max_map_size < PMD_SIZE) + max_map_size = PAGE_SIZE; + + force_pte = (max_map_size == PAGE_SIZE); + vma_pagesize = min(vma_pagesize, (long)max_map_size); + } + if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) fault_ipa &= ~(vma_pagesize - 1); - gfn = fault_ipa >> PAGE_SHIFT; + gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED; @@ -1552,6 +1582,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; + /* + * Potentially reduce shadow S2 permissions to match the guest's own + * S2. For exec faults, we'd only reach this point if the guest + * actually allowed it (see kvm_s2_handle_perm_fault). + */ + if (nested) { + writable &= kvm_s2_trans_writable(nested); + if (!kvm_s2_trans_readable(nested)) + prot &= ~KVM_PGTABLE_PROT_R; + } + read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) { @@ -1598,7 +1639,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, prot |= KVM_PGTABLE_PROT_NORMAL_NC; else prot |= KVM_PGTABLE_PROT_DEVICE; - } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) { + } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && + (!nested || kvm_s2_trans_executable(nested))) { prot |= KVM_PGTABLE_PROT_X; } @@ -1658,8 +1700,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) { + struct kvm_s2_trans nested_trans, *nested = NULL; unsigned long esr; - phys_addr_t fault_ipa; + phys_addr_t fault_ipa; /* The address we faulted on */ + phys_addr_t ipa; /* Always the IPA in the L1 guest phys space */ struct kvm_memory_slot *memslot; unsigned long hva; bool is_iabt, write_fault, writable; @@ -1668,7 +1712,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) esr = kvm_vcpu_get_esr(vcpu); - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1718,7 +1762,42 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) idx = srcu_read_lock(&vcpu->kvm->srcu); - gfn = fault_ipa >> PAGE_SHIFT; + /* + * We may have faulted on a shadow stage 2 page table if we are + * running a nested guest. In this case, we have to resolve the L2 + * IPA to the L1 IPA first, before knowing what kind of memory should + * back the L1 IPA. + * + * If the shadow stage 2 page table walk faults, then we simply inject + * this to the guest and carry on. + * + * If there are no shadow S2 PTs because S2 is disabled, there is + * nothing to walk and we treat it as a 1:1 before going through the + * canonical translation. + */ + if (kvm_is_nested_s2_mmu(vcpu->kvm,vcpu->arch.hw_mmu) && + vcpu->arch.hw_mmu->nested_stage2_enabled) { + u32 esr; + + ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans); + if (ret) { + esr = kvm_s2_trans_esr(&nested_trans); + kvm_inject_s2_fault(vcpu, esr); + goto out_unlock; + } + + ret = kvm_s2_handle_perm_fault(vcpu, &nested_trans); + if (ret) { + esr = kvm_s2_trans_esr(&nested_trans); + kvm_inject_s2_fault(vcpu, esr); + goto out_unlock; + } + + ipa = kvm_s2_trans_output(&nested_trans); + nested = &nested_trans; + } + + gfn = ipa >> PAGE_SHIFT; memslot = gfn_to_memslot(vcpu->kvm, gfn); hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); write_fault = kvm_is_write_fault(vcpu); @@ -1762,13 +1841,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * faulting VA. This is always 12 bits, irrespective * of the page size. */ - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); - ret = io_mem_abort(vcpu, fault_ipa); + ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + ret = io_mem_abort(vcpu, ipa); goto out_unlock; } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu)); + VM_BUG_ON(ipa >= kvm_phys_size(vcpu->arch.hw_mmu)); if (esr_fsc_is_access_flag_fault(esr)) { handle_access_fault(vcpu, fault_ipa); @@ -1776,7 +1855,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, esr_fsc_is_permission_fault(esr)); if (ret == 0) ret = 1; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 8c325d0a5636..2a8821674b10 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -121,6 +121,15 @@ static u32 compute_fsc(int level, u32 fsc) return fsc | (level & 0x3); } +static int esr_s2_fault(struct kvm_vcpu *vcpu, int level, u32 fsc) +{ + u32 esr; + + esr = kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC; + esr |= compute_fsc(level, fsc); + return esr; +} + static int get_ia_size(struct s2_walk_info *wi) { return 64 - wi->t0sz; @@ -482,6 +491,42 @@ void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu) } } +/* + * Returns non-zero if permission fault is handled by injecting it to the next + * level hypervisor. + */ +int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans) +{ + bool forward_fault = false; + + trans->esr = 0; + + if (!kvm_vcpu_trap_is_permission_fault(vcpu)) + return 0; + + if (kvm_vcpu_trap_is_iabt(vcpu)) { + forward_fault = !kvm_s2_trans_executable(trans); + } else { + bool write_fault = kvm_is_write_fault(vcpu); + + forward_fault = ((write_fault && !trans->writable) || + (!write_fault && !trans->readable)); + } + + if (forward_fault) + trans->esr = esr_s2_fault(vcpu, trans->level, ESR_ELx_FSC_PERM); + + return forward_fault; +} + +int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2) +{ + vcpu_write_sys_reg(vcpu, vcpu->arch.fault.far_el2, FAR_EL2); + vcpu_write_sys_reg(vcpu, vcpu->arch.fault.hpfar_el2, HPFAR_EL2); + + return kvm_inject_nested_sync(vcpu, esr_el2); +} + void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; -- cgit v1.2.3 From ec14c272408af43d392f65f55e66f3b94fc61921 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 14 Jun 2024 15:45:40 +0100 Subject: KVM: arm64: nv: Unmap/flush shadow stage 2 page tables Unmap/flush shadow stage 2 page tables for the nested VMs as well as the stage 2 page table for the guest hypervisor. Note: A bunch of the code in mmu.c relating to MMU notifiers is currently dealt with in an extremely abrupt way, for example by clearing out an entire shadow stage-2 table. This will be handled in a more efficient way using the reverse mapping feature in a later version of the patch series. Signed-off-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/kvm_nested.h | 3 +++ arch/arm64/kvm/mmu.c | 28 ++++++++++++++++++++----- arch/arm64/kvm/nested.c | 42 +++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 87cc941cfd15..216ca424bb16 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -167,6 +167,8 @@ int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr); void __init free_hyp_pgds(void); void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size); +void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); +void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end); void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 82e0484ca26b..6f770405574f 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -113,6 +113,9 @@ extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans); extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); +extern void kvm_nested_s2_wp(struct kvm *kvm); +extern void kvm_nested_s2_unmap(struct kvm *kvm); +extern void kvm_nested_s2_flush(struct kvm *kvm); int kvm_init_nv_sysregs(struct kvm *kvm); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 5aed2e9d380d..4ed93a384255 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -333,13 +333,18 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size) __unmap_stage2_range(mmu, start, size, true); } +void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) +{ + stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush); +} + static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = addr + PAGE_SIZE * memslot->npages; - stage2_apply_range_resched(&kvm->arch.mmu, addr, end, kvm_pgtable_stage2_flush); + kvm_stage2_flush_range(&kvm->arch.mmu, addr, end); } /** @@ -362,6 +367,8 @@ static void stage2_flush_vm(struct kvm *kvm) kvm_for_each_memslot(memslot, bkt, slots) stage2_flush_memslot(kvm, memslot); + kvm_nested_s2_flush(kvm); + write_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); } @@ -1035,6 +1042,8 @@ void stage2_unmap_vm(struct kvm *kvm) kvm_for_each_memslot(memslot, bkt, slots) stage2_unmap_memslot(kvm, memslot); + kvm_nested_s2_unmap(kvm); + write_unlock(&kvm->mmu_lock); mmap_read_unlock(current->mm); srcu_read_unlock(&kvm->srcu, idx); @@ -1134,12 +1143,12 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, } /** - * stage2_wp_range() - write protect stage2 memory region range + * kvm_stage2_wp_range() - write protect stage2 memory region range * @mmu: The KVM stage-2 MMU pointer * @addr: Start address of range * @end: End address of range */ -static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) +void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end) { stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect); } @@ -1170,7 +1179,8 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; write_lock(&kvm->mmu_lock); - stage2_wp_range(&kvm->arch.mmu, start, end); + kvm_stage2_wp_range(&kvm->arch.mmu, start, end); + kvm_nested_s2_wp(kvm); write_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs_memslot(kvm, memslot); } @@ -1224,7 +1234,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, lockdep_assert_held_write(&kvm->mmu_lock); - stage2_wp_range(&kvm->arch.mmu, start, end); + kvm_stage2_wp_range(&kvm->arch.mmu, start, end); /* * Eager-splitting is done when manual-protect is set. We @@ -1236,6 +1246,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, */ if (kvm_dirty_log_manual_protect_and_init_set(kvm)) kvm_mmu_split_huge_pages(kvm, start, end); + + kvm_nested_s2_wp(kvm); } static void kvm_send_hwpoison_signal(unsigned long address, short lsb) @@ -1878,6 +1890,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) (range->end - range->start) << PAGE_SHIFT, range->may_block); + kvm_nested_s2_unmap(kvm); return false; } @@ -1891,6 +1904,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, size, true); + /* + * TODO: Handle nested_mmu structures here using the reverse mapping in + * a later version of patch series. + */ } bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -2141,6 +2158,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, write_lock(&kvm->mmu_lock); kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size); + kvm_nested_s2_unmap(kvm); write_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 2a8821674b10..114a3f59c28b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -527,6 +527,48 @@ int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2) return kvm_inject_nested_sync(vcpu, esr_el2); } +void kvm_nested_s2_wp(struct kvm *kvm) +{ + int i; + + lockdep_assert_held_write(&kvm->mmu_lock); + + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (kvm_s2_mmu_valid(mmu)) + kvm_stage2_wp_range(mmu, 0, kvm_phys_size(mmu)); + } +} + +void kvm_nested_s2_unmap(struct kvm *kvm) +{ + int i; + + lockdep_assert_held_write(&kvm->mmu_lock); + + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (kvm_s2_mmu_valid(mmu)) + kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu)); + } +} + +void kvm_nested_s2_flush(struct kvm *kvm) +{ + int i; + + lockdep_assert_held_write(&kvm->mmu_lock); + + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (kvm_s2_mmu_valid(mmu)) + kvm_stage2_flush_range(mmu, 0, kvm_phys_size(mmu)); + } +} + void kvm_arch_flush_shadow_all(struct kvm *kvm) { int i; -- cgit v1.2.3 From 82e86326ec58e074883bfe27ee098cabe3a9beb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:41 +0100 Subject: KVM: arm64: nv: Add Stage-1 EL2 invalidation primitives Provide the primitives required to handle TLB invalidation for Stage-1 EL2 TLBs, which by definition do not require messing with the Stage-2 page tables. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_asm.h | 2 ++ arch/arm64/kvm/hyp/vhe/tlb.c | 65 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index a6330460d9e5..2181a11b9d92 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -232,6 +232,8 @@ extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); +extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); + extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 5fa0359f3a87..75aa36465805 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -219,3 +219,68 @@ void __kvm_flush_vm_context(void) __tlbi(alle1is); dsb(ish); } + +/* + * TLB invalidation emulation for NV. For any given instruction, we + * perform the following transformtions: + * + * - a TLBI targeting EL2 S1 is remapped to EL1 S1 + * - a non-shareable TLBI is upgraded to being inner-shareable + */ +int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) +{ + struct tlb_inv_context cxt; + int ret = 0; + + /* + * The guest will have provided its own DSB ISHST before trapping. + * If it hasn't, that's its own problem, and we won't paper over it + * (plus, there is plenty of extra synchronisation before we even + * get here...). + */ + + if (mmu) + enter_vmid_context(mmu, &cxt); + + switch (sys_encoding) { + case OP_TLBI_ALLE2: + case OP_TLBI_ALLE2IS: + case OP_TLBI_VMALLE1: + case OP_TLBI_VMALLE1IS: + __tlbi(vmalle1is); + break; + case OP_TLBI_VAE2: + case OP_TLBI_VAE2IS: + case OP_TLBI_VAE1: + case OP_TLBI_VAE1IS: + __tlbi(vae1is, va); + break; + case OP_TLBI_VALE2: + case OP_TLBI_VALE2IS: + case OP_TLBI_VALE1: + case OP_TLBI_VALE1IS: + __tlbi(vale1is, va); + break; + case OP_TLBI_ASIDE1: + case OP_TLBI_ASIDE1IS: + __tlbi(aside1is, va); + break; + case OP_TLBI_VAAE1: + case OP_TLBI_VAAE1IS: + __tlbi(vaae1is, va); + break; + case OP_TLBI_VAALE1: + case OP_TLBI_VAALE1IS: + __tlbi(vaale1is, va); + break; + default: + ret = -EINVAL; + } + dsb(ish); + isb(); + + if (mmu) + exit_vmid_context(&cxt); + + return ret; +} -- cgit v1.2.3 From 67fda56e76da4c4be9a8502d7211dbba024576d2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:42 +0100 Subject: KVM: arm64: nv: Handle EL2 Stage-1 TLB invalidation Due to the way FEAT_NV2 suppresses traps when accessing EL2 system registers, we can't track when the guest changes its HCR_EL2.TGE setting. This means we always trap EL1 TLBIs, even if they don't affect any L2 guest. Given that invalidating the EL2 TLBs doesn't require any messing with the shadow stage-2 page-tables, we can simply emulate the instructions early and return directly to the guest. This is conditioned on the instruction being an EL1 one and the guest's HCR_EL2.{E2H,TGE} being {1,1} (indicating that the instruction targets the EL2 S1 TLBs), or the instruction being one of the EL2 ones (which are not ambiguous). EL1 TLBIs issued with HCR_EL2.{E2H,TGE}={1,0} are not handled here, and cause a full exit so that they can be handled in the context of a VMID. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 55 +++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 17 ++++++++++++ arch/arm64/kvm/hyp/vhe/switch.c | 51 +++++++++++++++++++++++++++++++++- 3 files changed, 122 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6f770405574f..76b88c640602 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -117,6 +117,61 @@ extern void kvm_nested_s2_wp(struct kvm *kvm); extern void kvm_nested_s2_unmap(struct kvm *kvm); extern void kvm_nested_s2_flush(struct kvm *kvm); +static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL1)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + +static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (!(sys_reg_Op0(instr) == TLBI_Op0 && + sys_reg_Op1(instr) == TLBI_Op1_EL2)) + return false; + + if (!(sys_reg_CRn(instr) == TLBI_CRn_XS || + (sys_reg_CRn(instr) == TLBI_CRn_nXS && + kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)))) + return false; + + if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS || + CRm == TLBI_CRm_RNS) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + int kvm_init_nv_sysregs(struct kvm *kvm); #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index af3b206fa423..be4152819456 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -654,6 +654,23 @@ #define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) /* TLBI instructions */ +#define TLBI_Op0 1 + +#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */ +#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */ + +#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */ +#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/ + +#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */ +#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */ +#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */ +#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */ +#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */ +#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */ +#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */ +#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */ + #define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0) #define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1) #define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 8fbb6a2e0559..88409a135a6f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -266,10 +266,59 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); } +static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + int ret = -EINVAL; + u32 instr; + u64 val; + + /* + * Ideally, we would never trap on EL2 S1 TLB invalidations using + * the EL1 instructions when the guest's HCR_EL2.{E2H,TGE}=={1,1}. + * But "thanks" to FEAT_NV2, we don't trap writes to HCR_EL2, + * meaning that we can't track changes to the virtual TGE bit. So we + * have to leave HCR_EL2.TTLB set on the host. Oopsie... + * + * Try and handle these invalidation as quickly as possible, without + * fully exiting. Note that we don't need to consider any forwarding + * here, as having E2H+TGE set is the very definition of being + * InHost. + * + * For the lesser hypervisors out there that have failed to get on + * with the VHE program, we can also handle the nVHE style of EL2 + * invalidation. + */ + if (!(is_hyp_ctxt(vcpu))) + return false; + + instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); + + if ((kvm_supported_tlbi_s1e1_op(vcpu, instr) && + vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) || + kvm_supported_tlbi_s1e2_op (vcpu, instr)) + ret = __kvm_tlbi_s1e2(NULL, val, instr); + + if (ret) + return false; + + __kvm_skip_instr(vcpu); + + return true; +} + +static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code)) + return true; + + return kvm_hyp_handle_sysreg(vcpu, exit_code); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, - [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe, [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, -- cgit v1.2.3 From 8e236efa4cd2df8b270784a33d7e334933789f1a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:43 +0100 Subject: KVM: arm64: nv: Handle TLB invalidation targeting L2 stage-1 While dealing with TLB invalidation targeting the guest hypervisor's own stage-1 was easy, doing the same thing for its own guests is a bit more involved. Since such an invalidation is scoped by VMID, it needs to apply to all s2_mmu contexts that have been tagged by that VMID, irrespective of the value of VTTBR_EL2.BADDR. So for each s2_mmu context matching that VMID, we invalidate the corresponding TLBs, each context having its own "physical" VMID. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-8-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 7 ++++ arch/arm64/kvm/nested.c | 35 ++++++++++++++++ arch/arm64/kvm/sys_regs.c | 80 +++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 76b88c640602..9b7c92ab87cf 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -65,6 +65,13 @@ extern void kvm_init_nested(struct kvm *kvm); extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); + +union tlbi_info; + +extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, + const union tlbi_info *info, + void (*)(struct kvm_s2_mmu *, + const union tlbi_info *)); extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu); extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 114a3f59c28b..a2734d135211 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -364,6 +364,41 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, return ret; } +/* + * We can have multiple *different* MMU contexts with the same VMID: + * + * - S2 being enabled or not, hence differing by the HCR_EL2.VM bit + * + * - Multiple vcpus using private S2s (huh huh...), hence differing by the + * VBBTR_EL2.BADDR address + * + * - A combination of the above... + * + * We can always identify which MMU context to pick at run-time. However, + * TLB invalidation involving a VMID must take action on all the TLBs using + * this particular VMID. This translates into applying the same invalidation + * operation to all the contexts that are using this VMID. Moar phun! + */ +void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, + const union tlbi_info *info, + void (*tlbi_callback)(struct kvm_s2_mmu *, + const union tlbi_info *)) +{ + write_lock(&kvm->mmu_lock); + + for (int i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (!kvm_s2_mmu_valid(mmu)) + continue; + + if (vmid == get_vmid(mmu->tlb_vttbr)) + tlbi_callback(mmu, info); + } + + write_unlock(&kvm->mmu_lock); +} + struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22b45a15d068..b22309fca3a7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2741,6 +2741,73 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; +/* Only defined here as this is an internal "abstraction" */ +union tlbi_info { + struct { + u64 start; + u64 size; + } range; + + struct { + u64 addr; + } ipa; + + struct { + u64 addr; + u32 encoding; + } va; +}; + +static void s2_mmu_tlbi_s1e1(struct kvm_s2_mmu *mmu, + const union tlbi_info *info) +{ + WARN_ON(__kvm_tlbi_s1e2(mmu, info->va.addr, info->va.encoding)); +} + +static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + + /* + * If we're here, this is because we've trapped on a EL1 TLBI + * instruction that affects the EL1 translation regime while + * we're running in a context that doesn't allow us to let the + * HW do its thing (aka vEL2): + * + * - HCR_EL2.E2H == 0 : a non-VHE guest + * - HCR_EL2.{E2H,TGE} == { 1, 0 } : a VHE guest in guest mode + * + * We don't expect these helpers to ever be called when running + * in a vEL1 context. + */ + + WARN_ON(!vcpu_is_el2(vcpu)); + + if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding)) { + kvm_inject_undefined(vcpu); + return false; + } + + kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), + &(union tlbi_info) { + .va = { + .addr = p->regval, + .encoding = sys_encoding, + }, + }, + s2_mmu_tlbi_s1e1); + + return true; +} + +#define SYS_INSN(insn, access_fn) \ + { \ + SYS_DESC(OP_##insn), \ + .access = (access_fn), \ + } + static struct sys_reg_desc sys_insn_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, @@ -2751,6 +2818,19 @@ static struct sys_reg_desc sys_insn_descs[] = { { SYS_DESC(SYS_DC_CISW), access_dcsw }, { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, + + SYS_INSN(TLBI_VMALLE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VMALLE1, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), }; static const struct sys_reg_desc *first_idreg; -- cgit v1.2.3 From e6c9a3015ff21a76ef8ccab54568f5fe630e6e3a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:44 +0100 Subject: KVM: arm64: nv: Handle TLBI VMALLS12E1{,IS} operations Emulating TLBI VMALLS12E1* results in tearing down all the shadow S2 PTs that match the current VMID, since our shadow S2s are just some form of SW-managed TLBs. That teardown itself results in a full TLB invalidation for both S1 and S2. This can result in over-invalidation if two vcpus use the same VMID to tag private S2 PTs, but this is still correct from an architecture perspective. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-9-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b22309fca3a7..22a3691ce248 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2741,6 +2741,22 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; +static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + + if (sys_reg_CRn(instr) == TLBI_CRn_nXS && + !kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)) + return false; + + if (CRm == TLBI_CRm_nROS && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + return true; +} + /* Only defined here as this is an internal "abstraction" */ union tlbi_info { struct { @@ -2758,6 +2774,38 @@ union tlbi_info { } va; }; +static void s2_mmu_unmap_range(struct kvm_s2_mmu *mmu, + const union tlbi_info *info) +{ + kvm_stage2_unmap_range(mmu, info->range.start, info->range.size); +} + +static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + u64 limit, vttbr; + + if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) { + kvm_inject_undefined(vcpu); + return false; + } + + vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + limit = BIT_ULL(kvm_get_pa_bits(vcpu->kvm)); + + kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), + &(union tlbi_info) { + .range = { + .start = 0, + .size = limit, + }, + }, + s2_mmu_unmap_range); + + return true; +} + static void s2_mmu_tlbi_s1e1(struct kvm_s2_mmu *mmu, const union tlbi_info *info) { @@ -2831,6 +2879,9 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VAAE1, handle_tlbi_el1), SYS_INSN(TLBI_VALE1, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), + + SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), + SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), }; static const struct sys_reg_desc *first_idreg; -- cgit v1.2.3 From 5cfb6cec62f2036c7391192c3fa2a0a8a8200286 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:45 +0100 Subject: KVM: arm64: nv: Handle TLBI ALLE1{,IS} operations TLBI ALLE1* is a pretty big hammer that invalides all S1/S2 TLBs. This translates into the unmapping of all our shadow S2 PTs, itself resulting in the corresponding TLB invalidations. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-10-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22a3691ce248..d8d6380b7c66 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2757,6 +2757,29 @@ static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr) return true; } +static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + + if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) { + kvm_inject_undefined(vcpu); + return false; + } + + write_lock(&vcpu->kvm->mmu_lock); + + /* + * Drop all shadow S2s, resulting in S1/S2 TLBIs for each of the + * corresponding VMIDs. + */ + kvm_nested_s2_unmap(vcpu->kvm); + + write_unlock(&vcpu->kvm->mmu_lock); + + return true; +} + /* Only defined here as this is an internal "abstraction" */ union tlbi_info { struct { @@ -2880,7 +2903,9 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE1, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), + SYS_INSN(TLBI_ALLE1IS, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), + SYS_INSN(TLBI_ALLE1, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), }; -- cgit v1.2.3 From 70109bcd701e20d27a81bec6c19e03b8e0c06eba Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:46 +0100 Subject: KVM: arm64: nv: Handle TLBI IPAS2E1{,IS} operations TLBI IPAS2E1* are the last class of TLBI instructions we need to handle. For each matching S2 MMU context, we invalidate a range corresponding to the largest possible mapping for that context. At this stage, we don't handle TTL, which means we are likely over-invalidating. Further patches will aim at making this a bit better. Co-developed-by: Jintack Lim Co-developed-by: Christoffer Dall Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-11-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d8d6380b7c66..06963f1d206e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2780,6 +2780,31 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static bool kvm_supported_tlbi_ipas2_op(struct kvm_vcpu *vpcu, u32 instr) +{ + struct kvm *kvm = vpcu->kvm; + u8 CRm = sys_reg_CRm(instr); + u8 Op2 = sys_reg_Op2(instr); + + if (sys_reg_CRn(instr) == TLBI_CRn_nXS && + !kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP)) + return false; + + if (CRm == TLBI_CRm_IPAIS && (Op2 == 2 || Op2 == 6) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + if (CRm == TLBI_CRm_IPAONS && (Op2 == 0 || Op2 == 4) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) + return false; + + if (CRm == TLBI_CRm_IPAONS && (Op2 == 3 || Op2 == 7) && + !kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE)) + return false; + + return true; +} + /* Only defined here as this is an internal "abstraction" */ union tlbi_info { struct { @@ -2829,6 +2854,72 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu, + const union tlbi_info *info) +{ + unsigned long max_size; + u64 base_addr; + + /* + * We drop a number of things from the supplied value: + * + * - NS bit: we're non-secure only. + * + * - TTL field: We already have the granule size from the + * VTCR_EL2.TG0 field, and the level is only relevant to the + * guest's S2PT. + * + * - IPA[51:48]: We don't support 52bit IPA just yet... + * + * And of course, adjust the IPA to be on an actual address. + */ + base_addr = (info->ipa.addr & GENMASK_ULL(35, 0)) << 12; + + /* Compute the maximum extent of the invalidation */ + switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) { + case VTCR_EL2_TG0_4K: + max_size = SZ_1G; + break; + case VTCR_EL2_TG0_16K: + max_size = SZ_32M; + break; + case VTCR_EL2_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + /* + * No, we do not support 52bit IPA in nested yet. Once + * we do, this should be 4TB. + */ + max_size = SZ_512M; + break; + } + + base_addr &= ~(max_size - 1); + + kvm_stage2_unmap_range(mmu, base_addr, max_size); +} + +static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + + if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) { + kvm_inject_undefined(vcpu); + return false; + } + + kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), + &(union tlbi_info) { + .ipa = { + .addr = p->regval, + }, + }, + s2_mmu_unmap_ipa); + + return true; +} + static void s2_mmu_tlbi_s1e1(struct kvm_s2_mmu *mmu, const union tlbi_info *info) { @@ -2903,8 +2994,13 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE1, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), + SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), + SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), + SYS_INSN(TLBI_ALLE1IS, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), + SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is), + SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is), SYS_INSN(TLBI_ALLE1, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), }; -- cgit v1.2.3 From d1de1576dc2178efcc5536edb0ea2b1cf022bd3e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:47 +0100 Subject: KVM: arm64: nv: Handle FEAT_TTL hinted TLB operations Support guest-provided information information to size the range of required invalidation. This helps with reducing over-invalidation, provided that the guest actually provides accurate information. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-12-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 2 + arch/arm64/kvm/nested.c | 89 +++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 24 +--------- 3 files changed, 92 insertions(+), 23 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 9b7c92ab87cf..fcb0de3a93fe 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -124,6 +124,8 @@ extern void kvm_nested_s2_wp(struct kvm *kvm); extern void kvm_nested_s2_unmap(struct kvm *kvm); extern void kvm_nested_s2_flush(struct kvm *kvm); +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val); + static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr) { struct kvm *kvm = vpcu->kvm; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index a2734d135211..592241de60e3 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -364,6 +364,95 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, return ret; } +static unsigned int ttl_to_size(u8 ttl) +{ + int level = ttl & 3; + int gran = (ttl >> 2) & 3; + unsigned int max_size = 0; + + switch (gran) { + case TLBI_TTL_TG_4K: + switch (level) { + case 0: + break; + case 1: + max_size = SZ_1G; + break; + case 2: + max_size = SZ_2M; + break; + case 3: + max_size = SZ_4K; + break; + } + break; + case TLBI_TTL_TG_16K: + switch (level) { + case 0: + case 1: + break; + case 2: + max_size = SZ_32M; + break; + case 3: + max_size = SZ_16K; + break; + } + break; + case TLBI_TTL_TG_64K: + switch (level) { + case 0: + case 1: + /* No 52bit IPA support */ + break; + case 2: + max_size = SZ_512M; + break; + case 3: + max_size = SZ_64K; + break; + } + break; + default: /* No size information */ + break; + } + + return max_size; +} + +unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val) +{ + unsigned long max_size; + u8 ttl; + + ttl = FIELD_GET(GENMASK_ULL(47, 44), val); + + max_size = ttl_to_size(ttl); + + if (!max_size) { + /* Compute the maximum extent of the invalidation */ + switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) { + case VTCR_EL2_TG0_4K: + max_size = SZ_1G; + break; + case VTCR_EL2_TG0_16K: + max_size = SZ_32M; + break; + case VTCR_EL2_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + /* + * No, we do not support 52bit IPA in nested yet. Once + * we do, this should be 4TB. + */ + max_size = SZ_512M; + break; + } + } + + WARN_ON(!max_size); + return max_size; +} + /* * We can have multiple *different* MMU contexts with the same VMID: * diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 06963f1d206e..5bed362f80d3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2865,34 +2865,12 @@ static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu, * * - NS bit: we're non-secure only. * - * - TTL field: We already have the granule size from the - * VTCR_EL2.TG0 field, and the level is only relevant to the - * guest's S2PT. - * * - IPA[51:48]: We don't support 52bit IPA just yet... * * And of course, adjust the IPA to be on an actual address. */ base_addr = (info->ipa.addr & GENMASK_ULL(35, 0)) << 12; - - /* Compute the maximum extent of the invalidation */ - switch (mmu->tlb_vtcr & VTCR_EL2_TG0_MASK) { - case VTCR_EL2_TG0_4K: - max_size = SZ_1G; - break; - case VTCR_EL2_TG0_16K: - max_size = SZ_32M; - break; - case VTCR_EL2_TG0_64K: - default: /* IMPDEF: treat any other value as 64k */ - /* - * No, we do not support 52bit IPA in nested yet. Once - * we do, this should be 4TB. - */ - max_size = SZ_512M; - break; - } - + max_size = compute_tlb_inval_range(mmu, info->ipa.addr); base_addr &= ~(max_size - 1); kvm_stage2_unmap_range(mmu, base_addr, max_size); -- cgit v1.2.3 From b1a3a94812b95fb8ae410d1ca04a4cc3d61a7503 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:48 +0100 Subject: KVM: arm64: nv: Tag shadow S2 entries with guest's leaf S2 level Populate bits [56:55] of the leaf entry with the level provided by the guest's S2 translation. This will allow us to better scope the invalidation by remembering the mapping size. Of course, this assume that the guest will issue an invalidation with an address that falls into the same leaf. If the guest doesn't, we'll over-invalidate. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-13-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_nested.h | 8 ++++++++ arch/arm64/kvm/mmu.c | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index fcb0de3a93fe..971dbe533730 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -5,6 +5,7 @@ #include #include #include +#include static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu) { @@ -195,4 +196,11 @@ static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr) } #endif +#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0) + +static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) +{ + return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level); +} + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 4ed93a384255..6981b1bc0946 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1598,11 +1598,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * Potentially reduce shadow S2 permissions to match the guest's own * S2. For exec faults, we'd only reach this point if the guest * actually allowed it (see kvm_s2_handle_perm_fault). + * + * Also encode the level of the original translation in the SW bits + * of the leaf entry as a proxy for the span of that translation. + * This will be retrieved on TLB invalidation from the guest and + * used to limit the invalidation scope if a TTL hint or a range + * isn't provided. */ if (nested) { writable &= kvm_s2_trans_writable(nested); if (!kvm_s2_trans_readable(nested)) prot &= ~KVM_PGTABLE_PROT_R; + + prot |= kvm_encode_nested_level(nested); } read_lock(&kvm->mmu_lock); @@ -1661,14 +1669,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_is_perm && vma_pagesize == fault_granule) + if (fault_is_perm && vma_pagesize == fault_granule) { + /* + * Drop the SW bits in favour of those stored in the + * PTE, which will be preserved. + */ + prot &= ~KVM_NV_GUEST_MAP_SZ; ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); - else + } else { ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, memcache, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); + } + out_unlock: read_unlock(&kvm->mmu_lock); -- cgit v1.2.3 From 809b2e6013a51352e407c3071219f12ecceed47f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:49 +0100 Subject: KVM: arm64: nv: Invalidate TLBs based on shadow S2 TTL-like information In order to be able to make S2 TLB invalidations more performant on NV, let's use a scheme derived from the FEAT_TTL extension. If bits [56:55] in the leaf descriptor translating the address in the corresponding shadow S2 are non-zero, they indicate a level which can be used as an invalidation range. This allows further reduction of the systematic over-invalidation that takes place otherwise. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-14-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 592241de60e3..1860ed4a4c00 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -4,6 +4,7 @@ * Author: Jintack Lim */ +#include #include #include @@ -420,12 +421,94 @@ static unsigned int ttl_to_size(u8 ttl) return max_size; } +/* + * Compute the equivalent of the TTL field by parsing the shadow PT. The + * granule size is extracted from the cached VTCR_EL2.TG0 while the level is + * retrieved from first entry carrying the level as a tag. + */ +static u8 get_guest_mapping_ttl(struct kvm_s2_mmu *mmu, u64 addr) +{ + u64 tmp, sz = 0, vtcr = mmu->tlb_vtcr; + kvm_pte_t pte; + u8 ttl, level; + + lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(mmu)->mmu_lock); + + switch (vtcr & VTCR_EL2_TG0_MASK) { + case VTCR_EL2_TG0_4K: + ttl = (TLBI_TTL_TG_4K << 2); + break; + case VTCR_EL2_TG0_16K: + ttl = (TLBI_TTL_TG_16K << 2); + break; + case VTCR_EL2_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + ttl = (TLBI_TTL_TG_64K << 2); + break; + } + + tmp = addr; + +again: + /* Iteratively compute the block sizes for a particular granule size */ + switch (vtcr & VTCR_EL2_TG0_MASK) { + case VTCR_EL2_TG0_4K: + if (sz < SZ_4K) sz = SZ_4K; + else if (sz < SZ_2M) sz = SZ_2M; + else if (sz < SZ_1G) sz = SZ_1G; + else sz = 0; + break; + case VTCR_EL2_TG0_16K: + if (sz < SZ_16K) sz = SZ_16K; + else if (sz < SZ_32M) sz = SZ_32M; + else sz = 0; + break; + case VTCR_EL2_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + if (sz < SZ_64K) sz = SZ_64K; + else if (sz < SZ_512M) sz = SZ_512M; + else sz = 0; + break; + } + + if (sz == 0) + return 0; + + tmp &= ~(sz - 1); + if (kvm_pgtable_get_leaf(mmu->pgt, tmp, &pte, NULL)) + goto again; + if (!(pte & PTE_VALID)) + goto again; + level = FIELD_GET(KVM_NV_GUEST_MAP_SZ, pte); + if (!level) + goto again; + + ttl |= level; + + /* + * We now have found some level information in the shadow S2. Check + * that the resulting range is actually including the original IPA. + */ + sz = ttl_to_size(ttl); + if (addr < (tmp + sz)) + return ttl; + + return 0; +} + unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val) { + struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); unsigned long max_size; u8 ttl; - ttl = FIELD_GET(GENMASK_ULL(47, 44), val); + ttl = FIELD_GET(TLBI_TTL_MASK, val); + + if (!ttl || !kvm_has_feat(kvm, ID_AA64MMFR2_EL1, TTL, IMP)) { + /* No TTL, check the shadow S2 for a hint */ + u64 addr = (val & GENMASK_ULL(35, 0)) << 12; + ttl = get_guest_mapping_ttl(mmu, addr); + } max_size = ttl_to_size(ttl); -- cgit v1.2.3 From 0cb8aae2267687a13e22cf906d1ee1e9840bbe27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:50 +0100 Subject: KVM: arm64: nv: Add handling of outer-shareable TLBI operations Our handling of outer-shareable TLBIs is pretty basic: we just map them to the existing inner-shareable ones, because we really don't have anything else. The only significant change is that we can now advertise FEAT_TLBIOS support if the host supports it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-15-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/tlb.c | 10 ++++++++++ arch/arm64/kvm/nested.c | 5 ++++- arch/arm64/kvm/sys_regs.c | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 75aa36465805..85db6ffd9d9d 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -226,6 +226,7 @@ void __kvm_flush_vm_context(void) * * - a TLBI targeting EL2 S1 is remapped to EL1 S1 * - a non-shareable TLBI is upgraded to being inner-shareable + * - an outer-shareable TLBI is also mapped to inner-shareable */ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) { @@ -245,32 +246,41 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) switch (sys_encoding) { case OP_TLBI_ALLE2: case OP_TLBI_ALLE2IS: + case OP_TLBI_ALLE2OS: case OP_TLBI_VMALLE1: case OP_TLBI_VMALLE1IS: + case OP_TLBI_VMALLE1OS: __tlbi(vmalle1is); break; case OP_TLBI_VAE2: case OP_TLBI_VAE2IS: + case OP_TLBI_VAE2OS: case OP_TLBI_VAE1: case OP_TLBI_VAE1IS: + case OP_TLBI_VAE1OS: __tlbi(vae1is, va); break; case OP_TLBI_VALE2: case OP_TLBI_VALE2IS: + case OP_TLBI_VALE2OS: case OP_TLBI_VALE1: case OP_TLBI_VALE1IS: + case OP_TLBI_VALE1OS: __tlbi(vale1is, va); break; case OP_TLBI_ASIDE1: case OP_TLBI_ASIDE1IS: + case OP_TLBI_ASIDE1OS: __tlbi(aside1is, va); break; case OP_TLBI_VAAE1: case OP_TLBI_VAAE1IS: + case OP_TLBI_VAAE1OS: __tlbi(vaae1is, va); break; case OP_TLBI_VAALE1: case OP_TLBI_VAALE1IS: + case OP_TLBI_VAALE1OS: __tlbi(vaale1is, va); break; default: diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 1860ed4a4c00..63bc8d5022fe 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -805,9 +805,12 @@ static u64 limit_nv_id_reg(u32 id, u64 val) switch (id) { case SYS_ID_AA64ISAR0_EL1: - /* Support everything but TME, O.S. and Range TLBIs */ + /* Support everything but TME and Range TLBIs */ + tmp = FIELD_GET(NV_FTR(ISAR0, TLB), val); + tmp = min(tmp, ID_AA64ISAR0_EL1_TLB_OS); val &= ~(NV_FTR(ISAR0, TLB) | NV_FTR(ISAR0, TME)); + val |= FIELD_PREP(NV_FTR(ISAR0, TLB), tmp); break; case SYS_ID_AA64ISAR1_EL1: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5bed362f80d3..7dec7da167f6 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2959,6 +2959,13 @@ static struct sys_reg_desc sys_insn_descs[] = { { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, + SYS_INSN(TLBI_VMALLE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_VMALLE1IS, handle_tlbi_el1), SYS_INSN(TLBI_VAE1IS, handle_tlbi_el1), SYS_INSN(TLBI_ASIDE1IS, handle_tlbi_el1), @@ -2975,9 +2982,17 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), + SYS_INSN(TLBI_ALLE2OS, trap_undef), + SYS_INSN(TLBI_VAE2OS, trap_undef), + SYS_INSN(TLBI_ALLE1OS, handle_alle1is), + SYS_INSN(TLBI_VALE2OS, trap_undef), + SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is), + SYS_INSN(TLBI_ALLE1IS, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), + SYS_INSN(TLBI_IPAS2E1OS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is), + SYS_INSN(TLBI_IPAS2LE1OS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is), SYS_INSN(TLBI_ALLE1, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), -- cgit v1.2.3 From 5d476ca57d7d1fb6a5a39e46747bb2034190ee4a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:51 +0100 Subject: KVM: arm64: nv: Add handling of range-based TLBI operations We already support some form of range operation by handling FEAT_TTL, but so far the "arbitrary" range operations are unsupported. Let's fix that. For EL2 S1, this is simple enough: we just map both NSH, ISH and OSH instructions onto the ISH version for EL1. For TLBI instructions affecting EL1 S1, we use the same model as their non-range counterpart to invalidate in the context of the correct VMID. For TLBI instructions affecting S2, we interpret the data passed by the guest to compute the range and use that to tear-down part of the shadow S2 range and invalidate the TLBs. Finally, we advertise FEAT_TLBIRANGE if the host supports it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-16-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/tlb.c | 26 ++++++++++++++ arch/arm64/kvm/nested.c | 8 ++--- arch/arm64/kvm/sys_regs.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 85db6ffd9d9d..18e30f03f3f5 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -283,6 +283,32 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_VAALE1OS: __tlbi(vaale1is, va); break; + case OP_TLBI_RVAE2: + case OP_TLBI_RVAE2IS: + case OP_TLBI_RVAE2OS: + case OP_TLBI_RVAE1: + case OP_TLBI_RVAE1IS: + case OP_TLBI_RVAE1OS: + __tlbi(rvae1is, va); + break; + case OP_TLBI_RVALE2: + case OP_TLBI_RVALE2IS: + case OP_TLBI_RVALE2OS: + case OP_TLBI_RVALE1: + case OP_TLBI_RVALE1IS: + case OP_TLBI_RVALE1OS: + __tlbi(rvale1is, va); + break; + case OP_TLBI_RVAAE1: + case OP_TLBI_RVAAE1IS: + case OP_TLBI_RVAAE1OS: + __tlbi(rvaae1is, va); + break; + case OP_TLBI_RVAALE1: + case OP_TLBI_RVAALE1IS: + case OP_TLBI_RVAALE1OS: + __tlbi(rvaale1is, va); + break; default: ret = -EINVAL; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 63bc8d5022fe..451926cb6c5d 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -805,12 +805,8 @@ static u64 limit_nv_id_reg(u32 id, u64 val) switch (id) { case SYS_ID_AA64ISAR0_EL1: - /* Support everything but TME and Range TLBIs */ - tmp = FIELD_GET(NV_FTR(ISAR0, TLB), val); - tmp = min(tmp, ID_AA64ISAR0_EL1_TLB_OS); - val &= ~(NV_FTR(ISAR0, TLB) | - NV_FTR(ISAR0, TME)); - val |= FIELD_PREP(NV_FTR(ISAR0, TLB), tmp); + /* Support everything but TME */ + val &= ~NV_FTR(ISAR0, TME); break; case SYS_ID_AA64ISAR1_EL1: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7dec7da167f6..f6edcb863577 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2854,6 +2854,57 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2); + u64 base, range, tg, num, scale; + int shift; + + if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) { + kvm_inject_undefined(vcpu); + return false; + } + + /* + * Because the shadow S2 structure doesn't necessarily reflect that + * of the guest's S2 (different base granule size, for example), we + * decide to ignore TTL and only use the described range. + */ + tg = FIELD_GET(GENMASK(47, 46), p->regval); + scale = FIELD_GET(GENMASK(45, 44), p->regval); + num = FIELD_GET(GENMASK(43, 39), p->regval); + base = p->regval & GENMASK(36, 0); + + switch(tg) { + case 1: + shift = 12; + break; + case 2: + shift = 14; + break; + case 3: + default: /* IMPDEF: handle tg==0 as 64k */ + shift = 16; + break; + } + + base <<= shift; + range = __TLBI_RANGE_PAGES(num, scale) << shift; + + kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr), + &(union tlbi_info) { + .range = { + .start = base, + .size = range, + }, + }, + s2_mmu_unmap_range); + + return true; +} + static void s2_mmu_unmap_ipa(struct kvm_s2_mmu *mmu, const union tlbi_info *info) { @@ -2966,12 +3017,28 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE1OS, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1IS, handle_tlbi_el1), + SYS_INSN(TLBI_VMALLE1IS, handle_tlbi_el1), SYS_INSN(TLBI_VAE1IS, handle_tlbi_el1), SYS_INSN(TLBI_ASIDE1IS, handle_tlbi_el1), SYS_INSN(TLBI_VAAE1IS, handle_tlbi_el1), SYS_INSN(TLBI_VALE1IS, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1IS, handle_tlbi_el1), + + SYS_INSN(TLBI_RVAE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1OS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1OS, handle_tlbi_el1), + + SYS_INSN(TLBI_RVAE1, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1, handle_tlbi_el1), + SYS_INSN(TLBI_VMALLE1, handle_tlbi_el1), SYS_INSN(TLBI_VAE1, handle_tlbi_el1), SYS_INSN(TLBI_ASIDE1, handle_tlbi_el1), @@ -2980,7 +3047,9 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is), SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is), SYS_INSN(TLBI_ALLE2OS, trap_undef), SYS_INSN(TLBI_VAE2OS, trap_undef), @@ -2988,12 +3057,23 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE2OS, trap_undef), SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is), + SYS_INSN(TLBI_RVAE2IS, trap_undef), + SYS_INSN(TLBI_RVALE2IS, trap_undef), + SYS_INSN(TLBI_ALLE1IS, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is), SYS_INSN(TLBI_IPAS2E1OS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2E1, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2E1, handle_ripas2e1is), + SYS_INSN(TLBI_RIPAS2E1OS, handle_ripas2e1is), SYS_INSN(TLBI_IPAS2LE1OS, handle_ipas2e1is), SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is), + SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is), + SYS_INSN(TLBI_RVAE2OS, trap_undef), + SYS_INSN(TLBI_RVALE2OS, trap_undef), + SYS_INSN(TLBI_RVAE2, trap_undef), + SYS_INSN(TLBI_RVALE2, trap_undef), SYS_INSN(TLBI_ALLE1, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), }; -- cgit v1.2.3 From 0feec7769a63ef15401a9820c2039e26f0391825 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Jun 2024 15:45:52 +0100 Subject: KVM: arm64: nv: Add handling of NXS-flavoured TLBI operations Latest kid on the block: NXS (Non-eXtra-Slow) TLBI operations. Let's add those in bulk (NSH, ISH, OSH, both normal and range) as they directly map to their XS (the standard ones) counterparts. Not a lot to say about them, they are basically useless. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240614144552.2773592-17-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/tlb.c | 46 ++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 18e30f03f3f5..3d50a1bd2bdb 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -227,6 +227,7 @@ void __kvm_flush_vm_context(void) * - a TLBI targeting EL2 S1 is remapped to EL1 S1 * - a non-shareable TLBI is upgraded to being inner-shareable * - an outer-shareable TLBI is also mapped to inner-shareable + * - an nXS TLBI is upgraded to XS */ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) { @@ -250,6 +251,12 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_VMALLE1: case OP_TLBI_VMALLE1IS: case OP_TLBI_VMALLE1OS: + case OP_TLBI_ALLE2NXS: + case OP_TLBI_ALLE2ISNXS: + case OP_TLBI_ALLE2OSNXS: + case OP_TLBI_VMALLE1NXS: + case OP_TLBI_VMALLE1ISNXS: + case OP_TLBI_VMALLE1OSNXS: __tlbi(vmalle1is); break; case OP_TLBI_VAE2: @@ -258,6 +265,12 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_VAE1: case OP_TLBI_VAE1IS: case OP_TLBI_VAE1OS: + case OP_TLBI_VAE2NXS: + case OP_TLBI_VAE2ISNXS: + case OP_TLBI_VAE2OSNXS: + case OP_TLBI_VAE1NXS: + case OP_TLBI_VAE1ISNXS: + case OP_TLBI_VAE1OSNXS: __tlbi(vae1is, va); break; case OP_TLBI_VALE2: @@ -266,21 +279,36 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_VALE1: case OP_TLBI_VALE1IS: case OP_TLBI_VALE1OS: + case OP_TLBI_VALE2NXS: + case OP_TLBI_VALE2ISNXS: + case OP_TLBI_VALE2OSNXS: + case OP_TLBI_VALE1NXS: + case OP_TLBI_VALE1ISNXS: + case OP_TLBI_VALE1OSNXS: __tlbi(vale1is, va); break; case OP_TLBI_ASIDE1: case OP_TLBI_ASIDE1IS: case OP_TLBI_ASIDE1OS: + case OP_TLBI_ASIDE1NXS: + case OP_TLBI_ASIDE1ISNXS: + case OP_TLBI_ASIDE1OSNXS: __tlbi(aside1is, va); break; case OP_TLBI_VAAE1: case OP_TLBI_VAAE1IS: case OP_TLBI_VAAE1OS: + case OP_TLBI_VAAE1NXS: + case OP_TLBI_VAAE1ISNXS: + case OP_TLBI_VAAE1OSNXS: __tlbi(vaae1is, va); break; case OP_TLBI_VAALE1: case OP_TLBI_VAALE1IS: case OP_TLBI_VAALE1OS: + case OP_TLBI_VAALE1NXS: + case OP_TLBI_VAALE1ISNXS: + case OP_TLBI_VAALE1OSNXS: __tlbi(vaale1is, va); break; case OP_TLBI_RVAE2: @@ -289,6 +317,12 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_RVAE1: case OP_TLBI_RVAE1IS: case OP_TLBI_RVAE1OS: + case OP_TLBI_RVAE2NXS: + case OP_TLBI_RVAE2ISNXS: + case OP_TLBI_RVAE2OSNXS: + case OP_TLBI_RVAE1NXS: + case OP_TLBI_RVAE1ISNXS: + case OP_TLBI_RVAE1OSNXS: __tlbi(rvae1is, va); break; case OP_TLBI_RVALE2: @@ -297,16 +331,28 @@ int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding) case OP_TLBI_RVALE1: case OP_TLBI_RVALE1IS: case OP_TLBI_RVALE1OS: + case OP_TLBI_RVALE2NXS: + case OP_TLBI_RVALE2ISNXS: + case OP_TLBI_RVALE2OSNXS: + case OP_TLBI_RVALE1NXS: + case OP_TLBI_RVALE1ISNXS: + case OP_TLBI_RVALE1OSNXS: __tlbi(rvale1is, va); break; case OP_TLBI_RVAAE1: case OP_TLBI_RVAAE1IS: case OP_TLBI_RVAAE1OS: + case OP_TLBI_RVAAE1NXS: + case OP_TLBI_RVAAE1ISNXS: + case OP_TLBI_RVAAE1OSNXS: __tlbi(rvaae1is, va); break; case OP_TLBI_RVAALE1: case OP_TLBI_RVAALE1IS: case OP_TLBI_RVAALE1OS: + case OP_TLBI_RVAALE1NXS: + case OP_TLBI_RVAALE1ISNXS: + case OP_TLBI_RVAALE1OSNXS: __tlbi(rvaale1is, va); break; default: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6edcb863577..803cd5f16e43 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3046,6 +3046,42 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE1, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1, handle_tlbi_el1), + SYS_INSN(TLBI_VMALLE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1OSNXS, handle_tlbi_el1), + + SYS_INSN(TLBI_RVAE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1ISNXS, handle_tlbi_el1), + + SYS_INSN(TLBI_VMALLE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1ISNXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1ISNXS, handle_tlbi_el1), + + SYS_INSN(TLBI_RVAE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1OSNXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1OSNXS, handle_tlbi_el1), + + SYS_INSN(TLBI_RVAE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAAE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVALE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_RVAALE1NXS, handle_tlbi_el1), + + SYS_INSN(TLBI_VMALLE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_ASIDE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAAE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_VALE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_VAALE1NXS, handle_tlbi_el1), + SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is), SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), @@ -3076,6 +3112,43 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_RVALE2, trap_undef), SYS_INSN(TLBI_ALLE1, handle_alle1is), SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is), + + SYS_INSN(TLBI_IPAS2E1ISNXS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2E1ISNXS, handle_ripas2e1is), + SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is), + + SYS_INSN(TLBI_ALLE2OSNXS, trap_undef), + SYS_INSN(TLBI_VAE2OSNXS, trap_undef), + SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is), + SYS_INSN(TLBI_VALE2OSNXS, trap_undef), + SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is), + + SYS_INSN(TLBI_RVAE2ISNXS, trap_undef), + SYS_INSN(TLBI_RVALE2ISNXS, trap_undef), + SYS_INSN(TLBI_ALLE2ISNXS, trap_undef), + SYS_INSN(TLBI_VAE2ISNXS, trap_undef), + + SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is), + SYS_INSN(TLBI_VALE2ISNXS, trap_undef), + SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is), + SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is), + SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2E1NXS, handle_ripas2e1is), + SYS_INSN(TLBI_RIPAS2E1OSNXS, handle_ripas2e1is), + SYS_INSN(TLBI_IPAS2LE1OSNXS, handle_ipas2e1is), + SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is), + SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is), + SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is), + SYS_INSN(TLBI_RVAE2OSNXS, trap_undef), + SYS_INSN(TLBI_RVALE2OSNXS, trap_undef), + SYS_INSN(TLBI_RVAE2NXS, trap_undef), + SYS_INSN(TLBI_RVALE2NXS, trap_undef), + SYS_INSN(TLBI_ALLE2NXS, trap_undef), + SYS_INSN(TLBI_VAE2NXS, trap_undef), + SYS_INSN(TLBI_ALLE1NXS, handle_alle1is), + SYS_INSN(TLBI_VALE2NXS, trap_undef), + SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is), }; static const struct sys_reg_desc *first_idreg; -- cgit v1.2.3 From 3dc14eefa504d2fbe8e75113c7bb164a20bc39b0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 17 Jun 2024 18:10:18 +0000 Subject: KVM: arm64: nv: Use GFP_KERNEL_ACCOUNT for sysreg_masks allocation Of course, userspace is in the driver's seat for struct kvm and associated allocations. Make sure the sysreg_masks allocation participates in kmem accounting. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617181018.2054332-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 0acb60273482..913018e4cdae 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -198,7 +198,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) goto out; kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!kvm->arch.sysreg_masks) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From 4e8ff73eb7ae3f7a7ec1d59f4d54935ae28f4795 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Jun 2024 17:40:27 +0000 Subject: KVM: arm64: Get sys_reg encoding from descriptor in idregs_debug_show() KVM is about to add support for more VM-scoped feature ID regs that live outside of the id_regs[] array, which means the index of the debugfs iterator may not actually be an index into the array. Prepare by getting the sys_reg encoding from the descriptor itself. Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20240619174036.483943-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22b45a15d068..ad453c7ad6cc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3502,7 +3502,7 @@ static int idregs_debug_show(struct seq_file *s, void *v) return 0; seq_printf(s, "%20s:\t%016llx\n", - desc->name, IDREG(kvm, IDX_IDREG(kvm->arch.idreg_debugfs_iter))); + desc->name, IDREG(kvm, reg_to_encoding(desc))); return 0; } -- cgit v1.2.3 From 410db103f6ebc68a505ef541291ec327e385205a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Jun 2024 17:40:28 +0000 Subject: KVM: arm64: Make idregs debugfs iterator search sysreg table directly CTR_EL0 complicates the existing scheme for iterating feature ID registers, as it is not in the contiguous range that we presently support. Just search the sysreg table for the Nth feature ID register in anticipation of this. Yes, the debugfs interface has quadratic time completixy now. Boo hoo. Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20240619174036.483943-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ad453c7ad6cc..1036f865c826 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2753,8 +2753,6 @@ static struct sys_reg_desc sys_insn_descs[] = { { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, }; -static const struct sys_reg_desc *first_idreg; - static bool trap_dbgdidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3440,6 +3438,25 @@ static bool emulate_sys_reg(struct kvm_vcpu *vcpu, return false; } +static const struct sys_reg_desc *idregs_debug_find(struct kvm *kvm, u8 pos) +{ + unsigned long i, idreg_idx = 0; + + for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { + const struct sys_reg_desc *r = &sys_reg_descs[i]; + + if (!is_vm_ftr_id_reg(reg_to_encoding(r))) + continue; + + if (idreg_idx == pos) + return r; + + idreg_idx++; + } + + return NULL; +} + static void *idregs_debug_start(struct seq_file *s, loff_t *pos) { struct kvm *kvm = s->private; @@ -3451,7 +3468,7 @@ static void *idregs_debug_start(struct seq_file *s, loff_t *pos) if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) && *iter == (u8)~0) { *iter = *pos; - if (*iter >= KVM_ARM_ID_REG_NUM) + if (!idregs_debug_find(kvm, *iter)) iter = NULL; } else { iter = ERR_PTR(-EBUSY); @@ -3468,7 +3485,7 @@ static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos) (*pos)++; - if ((kvm->arch.idreg_debugfs_iter + 1) < KVM_ARM_ID_REG_NUM) { + if (idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter + 1)) { kvm->arch.idreg_debugfs_iter++; return &kvm->arch.idreg_debugfs_iter; @@ -3493,10 +3510,10 @@ static void idregs_debug_stop(struct seq_file *s, void *v) static int idregs_debug_show(struct seq_file *s, void *v) { - struct kvm *kvm = s->private; const struct sys_reg_desc *desc; + struct kvm *kvm = s->private; - desc = first_idreg + kvm->arch.idreg_debugfs_iter; + desc = idregs_debug_find(kvm, kvm->arch.idreg_debugfs_iter); if (!desc->name) return 0; @@ -4115,7 +4132,6 @@ out: int __init kvm_sys_reg_table_init(void) { - struct sys_reg_params params; bool valid = true; unsigned int i; int ret = 0; @@ -4136,12 +4152,6 @@ int __init kvm_sys_reg_table_init(void) for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); - /* Find the first idreg (SYS_ID_PFR0_EL1) in sys_reg_descs. */ - params = encoding_to_params(SYS_ID_PFR0_EL1); - first_idreg = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); - if (!first_idreg) - return -EINVAL; - ret = populate_nv_trap_config(); for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++) -- cgit v1.2.3 From 97ca3fcc15cc0b19ccacb56d25545f1df080fbc0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Jun 2024 17:40:29 +0000 Subject: KVM: arm64: Use read-only helper for reading VM ID registers IDREG() expands to the storage of a particular ID reg, which can be useful for handling both reads and writes. However, outside of a select few situations, the ID registers should be considered read only. Replace current readers with a new macro that expands to the value of the field rather than the field itself. Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20240619174036.483943-4-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 16 +++++++++++++++- arch/arm64/kvm/pmu-emul.c | 2 +- arch/arm64/kvm/sys_regs.c | 6 +++--- 3 files changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8170c04fde91..1201af636551 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1332,6 +1332,20 @@ static inline void kvm_hyp_reserve(void) { } void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) +{ + switch (reg) { + case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): + return &ka->id_regs[IDREG_IDX(reg)]; + default: + WARN_ON_ONCE(1); + return NULL; + } +} + +#define kvm_read_vm_id_reg(kvm, reg) \ + ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) + #define __expand_field_sign_unsigned(id, fld, val) \ ((u64)SYS_FIELD_VALUE(id, fld, val)) @@ -1348,7 +1362,7 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); #define get_idreg_field_unsigned(kvm, id, fld) \ ({ \ - u64 __val = IDREG((kvm), SYS_##id); \ + u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \ FIELD_GET(id##_##fld##_MASK, __val); \ }) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index a35ce10e0a9f..7848daeafd03 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -54,7 +54,7 @@ static u32 __kvm_pmu_event_mask(unsigned int pmuver) static u32 kvm_pmu_event_mask(struct kvm *kvm) { - u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1); + u64 dfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1); u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0); return __kvm_pmu_event_mask(pmuver); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1036f865c826..0692a109fd4d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1565,7 +1565,7 @@ static u64 kvm_read_sanitised_id_reg(struct kvm_vcpu *vcpu, static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - return IDREG(vcpu->kvm, reg_to_encoding(r)); + return kvm_read_vm_id_reg(vcpu->kvm, reg_to_encoding(r)); } static bool is_feature_id_reg(u32 encoding) @@ -2760,7 +2760,7 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); + u64 dfr = kvm_read_vm_id_reg(vcpu->kvm, SYS_ID_AA64DFR0_EL1); u32 el3 = kvm_has_feat(vcpu->kvm, ID_AA64PFR0_EL1, EL3, IMP); p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) | @@ -3519,7 +3519,7 @@ static int idregs_debug_show(struct seq_file *s, void *v) return 0; seq_printf(s, "%20s:\t%016llx\n", - desc->name, IDREG(kvm, reg_to_encoding(desc))); + desc->name, kvm_read_vm_id_reg(kvm, reg_to_encoding(desc))); return 0; } -- cgit v1.2.3 From d7508d27dd8878eb09e470855a546d96e0cfd4d3 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Jun 2024 17:40:30 +0000 Subject: KVM: arm64: Add helper for writing ID regs Replace the remaining usage of IDREG() with a new helper for setting the value of a feature ID register, with the benefit of cramming in some extra sanity checks. Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20240619174036.483943-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 ++- arch/arm64/kvm/nested.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 17 ++++++++++++++--- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1201af636551..74e7c29364ee 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -327,7 +327,6 @@ struct kvm_arch { */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) #define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) -#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)]) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; @@ -1346,6 +1345,8 @@ static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) #define kvm_read_vm_id_reg(kvm, reg) \ ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); + #define __expand_field_sign_unsigned(id, fld, val) \ ((u64)SYS_FIELD_VALUE(id, fld, val)) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 6813c7c7f00a..5db5bc9dd290 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -203,8 +203,8 @@ int kvm_init_nv_sysregs(struct kvm *kvm) } for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++) - kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i), - kvm->arch.id_regs[i]); + kvm_set_vm_id_reg(kvm, IDX_IDREG(i), limit_nv_id_reg(IDX_IDREG(i), + kvm->arch.id_regs[i])); /* VTTBR_EL2 */ res0 = res1 = 0; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0692a109fd4d..8e3358905371 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1851,7 +1851,7 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, ret = arm64_check_features(vcpu, rd, val); if (!ret) - IDREG(vcpu->kvm, id) = val; + kvm_set_vm_id_reg(vcpu->kvm, id, val); mutex_unlock(&vcpu->kvm->arch.config_lock); @@ -1867,6 +1867,18 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return ret; } +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val) +{ + u64 *p = __vm_id_reg(&kvm->arch, reg); + + lockdep_assert_held(&kvm->arch.config_lock); + + if (KVM_BUG_ON(kvm_vm_has_ran_once(kvm) || !p, kvm)) + return; + + *p = val; +} + static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 *val) { @@ -3549,8 +3561,7 @@ static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags)) return; - lockdep_assert_held(&kvm->arch.config_lock); - IDREG(kvm, id) = reg->reset(vcpu, reg); + kvm_set_vm_id_reg(kvm, id, reg->reset(vcpu, reg)); } static void reset_vcpu_ftr_id_reg(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 44241f34fac96d23cb8eac944815a1fdbf4ce523 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 19 Jun 2024 17:40:31 +0000 Subject: KVM: arm64: nv: Use accessors for modifying ID registers In the interest of abstracting away the underlying storage of feature ID registers, rework the nested code to go through the accessors instead of directly iterating the id_regs array. This means we now lose the property that ID registers unknown to the nested code get zeroed, but we really ought to be handling those explicitly going forward. Link: https://lore.kernel.org/r/20240619174036.483943-6-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/nested.c | 256 ++++++++++++++++++-------------------- 2 files changed, 122 insertions(+), 135 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74e7c29364ee..294c78319f58 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -326,7 +326,6 @@ struct kvm_arch { * Atomic access to multiple idregs are guarded by kvm_arch.config_lock. */ #define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id)) -#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask) #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5db5bc9dd290..44085c13e673 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -23,141 +23,131 @@ * This list should get updated as new features get added to the NV * support, and new extension to the architecture. */ -static u64 limit_nv_id_reg(u32 id, u64 val) +static void limit_nv_id_regs(struct kvm *kvm) { - u64 tmp; - - switch (id) { - case SYS_ID_AA64ISAR0_EL1: - /* Support everything but TME, O.S. and Range TLBIs */ - val &= ~(NV_FTR(ISAR0, TLB) | - NV_FTR(ISAR0, TME)); - break; - - case SYS_ID_AA64ISAR1_EL1: - /* Support everything but Spec Invalidation */ - val &= ~(GENMASK_ULL(63, 56) | - NV_FTR(ISAR1, SPECRES)); - break; - - case SYS_ID_AA64PFR0_EL1: - /* No AMU, MPAM, S-EL2, RAS or SVE */ - val &= ~(GENMASK_ULL(55, 52) | - NV_FTR(PFR0, AMU) | - NV_FTR(PFR0, MPAM) | - NV_FTR(PFR0, SEL2) | - NV_FTR(PFR0, RAS) | - NV_FTR(PFR0, SVE) | - NV_FTR(PFR0, EL3) | - NV_FTR(PFR0, EL2) | - NV_FTR(PFR0, EL1)); - /* 64bit EL1/EL2/EL3 only */ - val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001); - val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001); - val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001); - break; - - case SYS_ID_AA64PFR1_EL1: - /* Only support SSBS */ - val &= NV_FTR(PFR1, SSBS); - break; - - case SYS_ID_AA64MMFR0_EL1: - /* Hide ECV, ExS, Secure Memory */ - val &= ~(NV_FTR(MMFR0, ECV) | - NV_FTR(MMFR0, EXS) | - NV_FTR(MMFR0, TGRAN4_2) | - NV_FTR(MMFR0, TGRAN16_2) | - NV_FTR(MMFR0, TGRAN64_2) | - NV_FTR(MMFR0, SNSMEM)); - - /* Disallow unsupported S2 page sizes */ - switch (PAGE_SIZE) { - case SZ_64K: - val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001); - fallthrough; - case SZ_16K: - val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001); - fallthrough; - case SZ_4K: - /* Support everything */ - break; - } - /* - * Since we can't support a guest S2 page size smaller than - * the host's own page size (due to KVM only populating its - * own S2 using the kernel's page size), advertise the - * limitation using FEAT_GTG. - */ - switch (PAGE_SIZE) { - case SZ_4K: - val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010); - fallthrough; - case SZ_16K: - val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010); - fallthrough; - case SZ_64K: - val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010); - break; - } - /* Cap PARange to 48bits */ - tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val); - if (tmp > 0b0101) { - val &= ~NV_FTR(MMFR0, PARANGE); - val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101); - } + u64 val, tmp; + + /* Support everything but TME, O.S. and Range TLBIs */ + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1); + val &= ~(NV_FTR(ISAR0, TLB) | + NV_FTR(ISAR0, TME)); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val); + + /* Support everything but Spec Invalidation */ + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1); + val &= ~(GENMASK_ULL(63, 56) | + NV_FTR(ISAR1, SPECRES)); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val); + + /* No AMU, MPAM, S-EL2, RAS or SVE */ + kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1); + val &= ~(GENMASK_ULL(55, 52) | + NV_FTR(PFR0, AMU) | + NV_FTR(PFR0, MPAM) | + NV_FTR(PFR0, SEL2) | + NV_FTR(PFR0, RAS) | + NV_FTR(PFR0, SVE) | + NV_FTR(PFR0, EL3) | + NV_FTR(PFR0, EL2) | + NV_FTR(PFR0, EL1)); + /* 64bit EL1/EL2/EL3 only */ + val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001); + val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001); + val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + + /* Only support SSBS */ + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1); + val &= NV_FTR(PFR1, SSBS); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val); + + /* Hide ECV, ExS, Secure Memory */ + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1); + val &= ~(NV_FTR(MMFR0, ECV) | + NV_FTR(MMFR0, EXS) | + NV_FTR(MMFR0, TGRAN4_2) | + NV_FTR(MMFR0, TGRAN16_2) | + NV_FTR(MMFR0, TGRAN64_2) | + NV_FTR(MMFR0, SNSMEM)); + + /* Disallow unsupported S2 page sizes */ + switch (PAGE_SIZE) { + case SZ_64K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001); + fallthrough; + case SZ_16K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001); + fallthrough; + case SZ_4K: + /* Support everything */ break; - - case SYS_ID_AA64MMFR1_EL1: - val &= (NV_FTR(MMFR1, HCX) | - NV_FTR(MMFR1, PAN) | - NV_FTR(MMFR1, LO) | - NV_FTR(MMFR1, HPDS) | - NV_FTR(MMFR1, VH) | - NV_FTR(MMFR1, VMIDBits)); - break; - - case SYS_ID_AA64MMFR2_EL1: - val &= ~(NV_FTR(MMFR2, BBM) | - NV_FTR(MMFR2, TTL) | - GENMASK_ULL(47, 44) | - NV_FTR(MMFR2, ST) | - NV_FTR(MMFR2, CCIDX) | - NV_FTR(MMFR2, VARange)); - - /* Force TTL support */ - val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001); - break; - - case SYS_ID_AA64MMFR4_EL1: - val = 0; - if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) - val |= FIELD_PREP(NV_FTR(MMFR4, E2H0), - ID_AA64MMFR4_EL1_E2H0_NI_NV1); - break; - - case SYS_ID_AA64DFR0_EL1: - /* Only limited support for PMU, Debug, BPs and WPs */ - val &= (NV_FTR(DFR0, PMUVer) | - NV_FTR(DFR0, WRPs) | - NV_FTR(DFR0, BRPs) | - NV_FTR(DFR0, DebugVer)); - - /* Cap Debug to ARMv8.1 */ - tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val); - if (tmp > 0b0111) { - val &= ~NV_FTR(DFR0, DebugVer); - val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111); - } - break; - - default: - /* Unknown register, just wipe it clean */ - val = 0; + } + /* + * Since we can't support a guest S2 page size smaller than + * the host's own page size (due to KVM only populating its + * own S2 using the kernel's page size), advertise the + * limitation using FEAT_GTG. + */ + switch (PAGE_SIZE) { + case SZ_4K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010); + fallthrough; + case SZ_16K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010); + fallthrough; + case SZ_64K: + val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010); break; } - - return val; + /* Cap PARange to 48bits */ + tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val); + if (tmp > 0b0101) { + val &= ~NV_FTR(MMFR0, PARANGE); + val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101); + } + kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val); + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1); + val &= (NV_FTR(MMFR1, HCX) | + NV_FTR(MMFR1, PAN) | + NV_FTR(MMFR1, LO) | + NV_FTR(MMFR1, HPDS) | + NV_FTR(MMFR1, VH) | + NV_FTR(MMFR1, VMIDBits)); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val); + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1); + val &= ~(NV_FTR(MMFR2, BBM) | + NV_FTR(MMFR2, TTL) | + GENMASK_ULL(47, 44) | + NV_FTR(MMFR2, ST) | + NV_FTR(MMFR2, CCIDX) | + NV_FTR(MMFR2, VARange)); + + /* Force TTL support */ + val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val); + + val = 0; + if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) + val |= FIELD_PREP(NV_FTR(MMFR4, E2H0), + ID_AA64MMFR4_EL1_E2H0_NI_NV1); + kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val); + + /* Only limited support for PMU, Debug, BPs and WPs */ + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1); + val &= (NV_FTR(DFR0, PMUVer) | + NV_FTR(DFR0, WRPs) | + NV_FTR(DFR0, BRPs) | + NV_FTR(DFR0, DebugVer)); + + /* Cap Debug to ARMv8.1 */ + tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val); + if (tmp > 0b0111) { + val &= ~NV_FTR(DFR0, DebugVer); + val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111); + } + kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val); } u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr) @@ -202,9 +192,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm) goto out; } - for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++) - kvm_set_vm_id_reg(kvm, IDX_IDREG(i), limit_nv_id_reg(IDX_IDREG(i), - kvm->arch.id_regs[i])); + limit_nv_id_regs(kvm); /* VTTBR_EL2 */ res0 = res1 = 0; -- cgit v1.2.3 From f1ff3fc5209a1d63a4018bdb4231fbb073063c9a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 19 Jun 2024 17:40:32 +0000 Subject: KVM: arm64: unify code to prepare traps There are 2 functions to calculate traps via HCR_EL2: * kvm_init_sysreg() called via KVM_RUN (before the 1st run or when the pid changes) * vcpu_reset_hcr() called via KVM_ARM_VCPU_INIT To unify these 2 and to support traps that are dependent on the ID register configuration, move the code from vcpu_reset_hcr() to sys_regs.c and call it via kvm_init_sysreg(). We still have to keep the non-FWB handling stuff in vcpu_reset_hcr(). Also the initialization with HCR_GUEST_FLAGS is kept there but guarded by !vcpu_has_run_once() to ensure that previous calculated values don't get overwritten. While at it rename kvm_init_sysreg() to kvm_calculate_traps() to better reflect what it's doing. Signed-off-by: Sebastian Ott Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240619174036.483943-7-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 40 ++++++++---------------------------- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/sys_regs.c | 34 ++++++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 35 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 501e3e019c93..84dc3fac9711 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -69,39 +69,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { - vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (has_vhe() || has_hvhe()) - vcpu->arch.hcr_el2 |= HCR_E2H; - if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { - /* route synchronous external abort exceptions to EL2 */ - vcpu->arch.hcr_el2 |= HCR_TEA; - /* trap error record accesses */ - vcpu->arch.hcr_el2 |= HCR_TERR; - } + if (!vcpu_has_run_once(vcpu)) + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; - if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) { - vcpu->arch.hcr_el2 |= HCR_FWB; - } else { - /* - * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C - * get set in SCTLR_EL1 such that we can detect when the guest - * MMU gets turned on and do the necessary cache maintenance - * then. - */ + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) vcpu->arch.hcr_el2 |= HCR_TVM; - } - - if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) - vcpu->arch.hcr_el2 |= HCR_TID4; - else - vcpu->arch.hcr_el2 |= HCR_TID2; - - if (vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 &= ~HCR_RW; - - if (kvm_has_mte(vcpu->kvm)) - vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 294c78319f58..26042875d6fc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1120,7 +1120,7 @@ int __init populate_nv_trap_config(void); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); -void kvm_init_sysreg(struct kvm_vcpu *); +void kvm_calculate_traps(struct kvm_vcpu *vcpu); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9996a989b52e..6b217afb4e8e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -797,7 +797,7 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) * This needs to happen after NV has imposed its own restrictions on * the feature set */ - kvm_init_sysreg(vcpu); + kvm_calculate_traps(vcpu); ret = kvm_timer_enable(vcpu); if (ret) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8e3358905371..a467ff4290a7 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4069,11 +4069,33 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range * return 0; } -void kvm_init_sysreg(struct kvm_vcpu *vcpu) +static void vcpu_set_hcr(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - mutex_lock(&kvm->arch.config_lock); + if (has_vhe() || has_hvhe()) + vcpu->arch.hcr_el2 |= HCR_E2H; + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) { + /* route synchronous external abort exceptions to EL2 */ + vcpu->arch.hcr_el2 |= HCR_TEA; + /* trap error record accesses */ + vcpu->arch.hcr_el2 |= HCR_TERR; + } + + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_FWB; + + if (cpus_have_final_cap(ARM64_HAS_EVT) && + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + vcpu->arch.hcr_el2 |= HCR_TID4; + else + vcpu->arch.hcr_el2 |= HCR_TID2; + + if (vcpu_el1_is_32bit(vcpu)) + vcpu->arch.hcr_el2 &= ~HCR_RW; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; /* * In the absence of FGT, we cannot independently trap TLBI @@ -4082,6 +4104,14 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu) */ if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS)) vcpu->arch.hcr_el2 |= HCR_TTLBOS; +} + +void kvm_calculate_traps(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + mutex_lock(&kvm->arch.config_lock); + vcpu_set_hcr(vcpu); if (cpus_have_final_cap(ARM64_HAS_HCX)) { vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS; -- cgit v1.2.3 From 2843cae26644fbc922e93c7c4c279f70fb3275f1 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 19 Jun 2024 17:40:33 +0000 Subject: KVM: arm64: Treat CTR_EL0 as a VM feature ID register CTR_EL0 is currently handled as an invariant register, thus guests will be presented with the host value of that register. Add emulation for CTR_EL0 based on a per VM value. Userspace can switch off DIC and IDC bits and reduce DminLine and IminLine sizes. Naturally, ensure CTR_EL0 is trapped (HCR_EL2.TID2=1) any time that a VM's CTR_EL0 differs from hardware. Signed-off-by: Sebastian Ott Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240619174036.483943-8-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 4 ++++ arch/arm64/kvm/sys_regs.c | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 26042875d6fc..f6de08e81d49 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -329,6 +329,8 @@ struct kvm_arch { #define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1) u64 id_regs[KVM_ARM_ID_REG_NUM]; + u64 ctr_el0; + /* Masks for VNCR-baked sysregs */ struct kvm_sysreg_masks *sysreg_masks; @@ -1335,6 +1337,8 @@ static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) switch (reg) { case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): return &ka->id_regs[IDREG_IDX(reg)]; + case SYS_CTR_EL0: + return &ka->ctr_el0; default: WARN_ON_ONCE(1); return NULL; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a467ff4290a7..a12f3bdfb43d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1583,6 +1583,9 @@ static bool is_feature_id_reg(u32 encoding) */ static inline bool is_vm_ftr_id_reg(u32 id) { + if (id == SYS_CTR_EL0) + return true; + return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && sys_reg_CRm(id) < 8); @@ -1898,7 +1901,7 @@ static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) return write_to_read_only(vcpu, p, r); - p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0); + p->regval = kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0); return true; } @@ -2487,7 +2490,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, - { SYS_DESC(SYS_CTR_EL0), access_ctr }, + ID_WRITABLE(CTR_EL0, CTR_EL0_DIC_MASK | + CTR_EL0_IDC_MASK | + CTR_EL0_DminLine_MASK | + CTR_EL0_IminLine_MASK), { SYS_DESC(SYS_SVCR), undef_access }, { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr, @@ -3725,18 +3731,11 @@ FUNCTION_INVARIANT(midr_el1) FUNCTION_INVARIANT(revidr_el1) FUNCTION_INVARIANT(aidr_el1) -static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) -{ - ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0); - return ((struct sys_reg_desc *)r)->val; -} - /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = { { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, - { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, }; static int get_invariant_sys_reg(u64 id, u64 __user *uaddr) @@ -4086,7 +4085,8 @@ static void vcpu_set_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_FWB; if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE) && + kvm_read_vm_id_reg(kvm, SYS_CTR_EL0) == read_sanitised_ftr_reg(SYS_CTR_EL0)) vcpu->arch.hcr_el2 |= HCR_TID4; else vcpu->arch.hcr_el2 |= HCR_TID2; -- cgit v1.2.3 From bb4fa769dcdd0b6e47ecbf0363489be510498b1d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 19 Jun 2024 17:40:34 +0000 Subject: KVM: arm64: show writable masks for feature registers Instead of using ~0UL provide the actual writable mask for non-id feature registers in the output of the KVM_ARM_GET_REG_WRITABLE_MASKS ioctl. This changes the mask for the CTR_EL0 and CLIDR_EL1 registers. Signed-off-by: Sebastian Ott Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240619174036.483943-9-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a12f3bdfb43d..d8d2a7880576 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2486,7 +2486,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1, - .set_user = set_clidr }, + .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, @@ -4046,20 +4046,11 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range * if (!is_feature_id_reg(encoding) || !reg->set_user) continue; - /* - * For ID registers, we return the writable mask. Other feature - * registers return a full 64bit mask. That's not necessary - * compliant with a given revision of the architecture, but the - * RES0/RES1 definitions allow us to do that. - */ - if (is_vm_ftr_id_reg(encoding)) { - if (!reg->val || - (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0())) - continue; - val = reg->val; - } else { - val = ~0UL; + if (!reg->val || + (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0())) { + continue; } + val = reg->val; if (put_user(val, (masks + KVM_ARM_FEATURE_ID_RANGE_INDEX(encoding)))) return -EFAULT; -- cgit v1.2.3 From 76d36012276a328ac0a1e9c7415cafd092447ce7 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 19 Jun 2024 17:40:35 +0000 Subject: KVM: arm64: rename functions for invariant sys regs Invariant system id registers are populated with host values at initialization time using their .reset function cb. These are currently called get_* which is usually used by the functions implementing the .get_user callback. Change their function names to reset_* to reflect what they are used for. Signed-off-by: Sebastian Ott Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20240619174036.483943-10-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d8d2a7880576..71a4ed58f94b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3720,8 +3720,8 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id, */ #define FUNCTION_INVARIANT(reg) \ - static u64 get_##reg(struct kvm_vcpu *v, \ - const struct sys_reg_desc *r) \ + static u64 reset_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ { \ ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ return ((struct sys_reg_desc *)r)->val; \ @@ -3733,9 +3733,9 @@ FUNCTION_INVARIANT(aidr_el1) /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = { - { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, - { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, - { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, + { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 }, + { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 }, + { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 }, }; static int get_invariant_sys_reg(u64 id, u64 __user *uaddr) -- cgit v1.2.3 From a8f0655887cc86db9d65fd5fbaf99d62424eb9b4 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:30 +0100 Subject: KVM: arm64: Fix clobbered ELR in sync abort/SError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the hypervisor receives a SError or synchronous exception (EL2h) while running with the __kvm_hyp_vector and if ELR_EL2 doesn't point to an extable entry, it panics indirectly by overwriting ELR with the address of a panic handler in order for the asm routine it returns to to ERET into the handler. However, this clobbers ELR_EL2 for the handler itself. As a result, hyp_panic(), when retrieving what it believes to be the PC where the exception happened, actually ends up reading the address of the panic handler that called it! This results in an erroneous and confusing panic message where the source of any synchronous exception (e.g. BUG() or kCFI) appears to be __guest_exit_panic, making it hard to locate the actual BRK instruction. Therefore, store the original ELR_EL2 in the per-CPU kvm_hyp_ctxt and point the sysreg to a routine that first restores it to its previous value before running __guest_exit_panic. Fixes: 7db21530479f ("KVM: arm64: Restore hyp when panicking in guest context") Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-2-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp/entry.S | 8 ++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 5 +++-- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 81496083c041..27de1dddb0ab 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -128,6 +128,7 @@ int main(void) DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); + DEFINE(CPU_ELR_EL2, offsetof(struct kvm_cpu_context, sys_regs[ELR_EL2])); DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index f3aa7738b477..4433a234aa9b 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -83,6 +83,14 @@ alternative_else_nop_endif eret sb +SYM_INNER_LABEL(__guest_exit_restore_elr_and_panic, SYM_L_GLOBAL) + // x2-x29,lr: vcpu regs + // vcpu x0-x1 on the stack + + adr_this_cpu x0, kvm_hyp_ctxt, x1 + ldr x0, [x0, #CPU_ELR_EL2] + msr elr_el2, x0 + SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) // x2-x29,lr: vcpu regs // vcpu x0-x1 on the stack diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 0c4de44534b7..1f4b87a73445 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -693,7 +693,7 @@ guest: static inline void __kvm_unexpected_el2_exception(void) { - extern char __guest_exit_panic[]; + extern char __guest_exit_restore_elr_and_panic[]; unsigned long addr, fixup; struct kvm_exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); @@ -715,7 +715,8 @@ static inline void __kvm_unexpected_el2_exception(void) } /* Trigger a panic after restoring the hyp context. */ - write_sysreg(__guest_exit_panic, elr_el2); + this_cpu_ptr(&kvm_hyp_ctxt)->sys_regs[ELR_EL2] = elr_el2; + write_sysreg(__guest_exit_restore_elr_and_panic, elr_el2); } #endif /* __ARM64_KVM_HYP_SWITCH_H__ */ -- cgit v1.2.3 From ea9d7c83d14e332db9ae25eb2872b90a06ebc9e6 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:31 +0100 Subject: KVM: arm64: Fix __pkvm_init_switch_pgd call ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the mismatch between the (incorrect) C signature, C call site, and asm implementation by aligning all three on an API passing the parameters (pgd and SP) separately, instead of as a bundled struct. Remove the now unnecessary memory accesses while the MMU is off from the asm, which simplifies the C caller (as it does not need to convert a VA struct pointer to PA) and makes the code slightly more robust by offsetting the struct fields from C and properly expressing the call to the C compiler (e.g. type checker and kCFI). Fixes: f320bc742bc2 ("KVM: arm64: Prepare the creation of s1 mappings at EL2") Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-3-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_hyp.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 24 +++++++++++++----------- arch/arm64/kvm/hyp/nvhe/setup.c | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b05bceca3385..c838309e4ec4 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -124,8 +124,8 @@ void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, #endif #ifdef __KVM_NVHE_HYPERVISOR__ -void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size, - phys_addr_t pgd, void *sp, void *cont_fn); +void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + void (*fn)(void)); int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, unsigned long *per_cpu_base, u32 hyp_va_bits); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 2994878d68ea..3a2836a52e85 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -265,33 +265,35 @@ alternative_else_nop_endif SYM_CODE_END(__kvm_handle_stub_hvc) +/* + * void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, + * void (*fn)(void)); + */ SYM_FUNC_START(__pkvm_init_switch_pgd) /* Turn the MMU off */ pre_disable_mmu_workaround - mrs x2, sctlr_el2 - bic x3, x2, #SCTLR_ELx_M - msr sctlr_el2, x3 + mrs x3, sctlr_el2 + bic x4, x3, #SCTLR_ELx_M + msr sctlr_el2, x4 isb tlbi alle2 /* Install the new pgtables */ - ldr x3, [x0, #NVHE_INIT_PGD_PA] - phys_to_ttbr x4, x3 + phys_to_ttbr x5, x0 alternative_if ARM64_HAS_CNP - orr x4, x4, #TTBR_CNP_BIT + orr x5, x5, #TTBR_CNP_BIT alternative_else_nop_endif - msr ttbr0_el2, x4 + msr ttbr0_el2, x5 /* Set the new stack pointer */ - ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA] - mov sp, x0 + mov sp, x1 /* And turn the MMU back on! */ dsb nsh isb - set_sctlr_el2 x2 - ret x1 + set_sctlr_el2 x3 + ret x2 SYM_FUNC_END(__pkvm_init_switch_pgd) .popsection diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index f4350ba07b0b..174007f3fadd 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -339,7 +339,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, { struct kvm_nvhe_init_params *params; void *virt = hyp_phys_to_virt(phys); - void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); + typeof(__pkvm_init_switch_pgd) *fn; int ret; BUG_ON(kvm_check_pvm_sysreg_table()); @@ -363,7 +363,7 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, /* Jump in the idmap page to switch to the new page-tables */ params = this_cpu_ptr(&kvm_init_params); fn = (typeof(fn))__hyp_pa(__pkvm_init_switch_pgd); - fn(__hyp_pa(params), __pkvm_init_finalise); + fn(params->pgd_pa, params->stack_hyp_va, __pkvm_init_finalise); unreachable(); } -- cgit v1.2.3 From 6e3b773ed6bc5e783fa314b75071f022324f94a8 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:32 +0100 Subject: KVM: arm64: nVHE: Simplify invalid_host_el2_vect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The invalid_host_el2_vect macro is used by EL2{t,h} handlers in nVHE *host* context, which should never run with a guest context loaded. Therefore, remove the superfluous vCPU context check and branch unconditionally to hyp_panic. Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-4-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/host.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 135cfb294ee5..3d610fc51f4d 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -197,12 +197,6 @@ SYM_FUNC_END(__host_hvc) sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp - /* If a guest is loaded, panic out of it. */ - stp x0, x1, [sp, #-16]! - get_loaded_vcpu x0, x1 - cbnz x0, __guest_exit_panic - add sp, sp, #16 - /* * The panic may not be clean if the exception is taken before the host * context has been saved by __host_exit or after the hyp context has -- cgit v1.2.3 From 4ab3f9dd561b428460038a9bb041e92db6197f18 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:33 +0100 Subject: KVM: arm64: nVHE: gen-hyprel: Skip R_AARCH64_ABS32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ignore R_AARCH64_ABS32 relocations, instead of panicking, when emitting the relocation table of the hypervisor. The toolchain might produce them when generating function calls with kCFI to represent the 32-bit type ID which can then be resolved across compilation units at link time. These are NOT actual 32-bit addresses and are therefore not needed in the final (runtime) relocation table (which is unlikely to use 32-bit absolute addresses for arm64 anyway). Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-5-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/gen-hyprel.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c index 6bc88a756cb7..b63f4e1c1033 100644 --- a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c +++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c @@ -50,6 +50,9 @@ #ifndef R_AARCH64_ABS64 #define R_AARCH64_ABS64 257 #endif +#ifndef R_AARCH64_ABS32 +#define R_AARCH64_ABS32 258 +#endif #ifndef R_AARCH64_PREL64 #define R_AARCH64_PREL64 260 #endif @@ -383,6 +386,9 @@ static void emit_rela_section(Elf64_Shdr *sh_rela) case R_AARCH64_ABS64: emit_rela_abs64(rela, sh_orig_name); break; + /* Allow 32-bit absolute relocation, for kCFI type hashes. */ + case R_AARCH64_ABS32: + break; /* Allow position-relative data relocations. */ case R_AARCH64_PREL64: case R_AARCH64_PREL32: -- cgit v1.2.3 From 3c6eb64876937e38672fba63f11634b9ef3013e1 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:34 +0100 Subject: KVM: arm64: VHE: Mark __hyp_call_panic __noreturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that the sole purpose of __hyp_call_panic() is to call panic(), a __noreturn function, give it the __noreturn attribute, removing the need for its caller to use unreachable(). Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-6-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 8fbb6a2e0559..ee1e1c5847e7 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -388,7 +388,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) return ret; } -static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) +static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par) { struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; @@ -413,7 +413,6 @@ void __noreturn hyp_panic(void) u64 par = read_sysreg_par(); __hyp_call_panic(spsr, elr, par); - unreachable(); } asmlinkage void kvm_unexpected_el2_exception(void) -- cgit v1.2.3 From 7a928b32f1de67760e39d22d00fef99dca69fbd9 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:35 +0100 Subject: arm64: Introduce esr_brk_comment, esr_is_cfi_brk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it is already used in two places, move esr_comment() to a header for re-use, with a clearer name. Introduce esr_is_cfi_brk() to detect kCFI BRK syndromes, currently used by early_brk64() but soon to also be used by hypervisor code. Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-7-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 11 +++++++++++ arch/arm64/kernel/debug-monitors.c | 4 +--- arch/arm64/kernel/traps.c | 8 +++----- arch/arm64/kvm/handle_exit.c | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 7abf09df7033..77569d207ecf 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -379,6 +379,11 @@ #ifndef __ASSEMBLY__ #include +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + static inline bool esr_is_data_abort(unsigned long esr) { const unsigned long ec = ESR_ELx_EC(esr); @@ -386,6 +391,12 @@ static inline bool esr_is_data_abort(unsigned long esr) return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; } +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + static inline bool esr_fsc_is_translation_fault(unsigned long esr) { /* Translation fault, level -1 */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 64f2ecbdfe5c..024a7b245056 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -312,9 +312,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned long esr) * entirely not preemptible, and we can use rcu list safely here. */ list_for_each_entry_rcu(hook, list, node) { - unsigned long comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; - - if ((comment & ~hook->mask) == hook->imm) + if ((esr_brk_comment(esr) & ~hook->mask) == hook->imm) fn = hook->fn; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 215e6d7f2df8..9e22683aa921 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1105,8 +1105,6 @@ static struct break_hook ubsan_break_hook = { }; #endif -#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK) - /* * Initial handler for AArch64 BRK exceptions * This handler only used until debug_traps_init(). @@ -1115,15 +1113,15 @@ int __init early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs) { #ifdef CONFIG_CFI_CLANG - if ((esr_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE) + if (esr_is_cfi_brk(esr)) return cfi_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_KASAN_SW_TAGS - if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) + if ((esr_brk_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif #ifdef CONFIG_UBSAN_TRAP - if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM) + if ((esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM) return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif return bug_handler(regs, esr) != DBG_HOOK_HANDLED; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b037f0a0e27e..d41447193e13 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -423,7 +423,7 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) { kvm_err("Invalid host exception to nVHE hyp!\n"); } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && - (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) { + esr_brk_comment(esr) == BUG_BRK_IMM) { const char *file = NULL; unsigned int line = 0; -- cgit v1.2.3 From 8f3873a39529101213fa1109d499239d57185551 Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:36 +0100 Subject: KVM: arm64: Introduce print_nvhe_hyp_panic helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to display a panic banner soon to also be used for kCFI failures, to ensure that we remain consistent. Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-8-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/handle_exit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index d41447193e13..b3d6657a259d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -411,6 +411,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } +static void print_nvhe_hyp_panic(const char *name, u64 panic_addr) +{ + kvm_err("nVHE hyp %s at: [<%016llx>] %pB!\n", name, panic_addr, + (void *)(panic_addr + kaslr_offset())); +} + void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, @@ -439,11 +445,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, if (file) kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else - kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, - (void *)(panic_addr + kaslr_offset())); + print_nvhe_hyp_panic("BUG", panic_addr); } else { - kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, - (void *)(panic_addr + kaslr_offset())); + print_nvhe_hyp_panic("panic", panic_addr); } /* Dump the nVHE hypervisor backtrace */ -- cgit v1.2.3 From eca4ba5b6dff9b6ec03c9607ac297076f037fcfc Mon Sep 17 00:00:00 2001 From: Pierre-Clément Tosi Date: Mon, 10 Jun 2024 07:32:37 +0100 Subject: KVM: arm64: nVHE: Support CONFIG_CFI_CLANG at EL2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compiler implements kCFI by adding type information (u32) above every function that might be indirectly called and, whenever a function pointer is called, injects a read-and-compare of that u32 against the value corresponding to the expected type. In case of a mismatch, a BRK instruction gets executed. When the hypervisor triggers such an exception in nVHE, it panics and triggers and exception return to EL1. Therefore, teach nvhe_hyp_panic_handler() to detect kCFI errors from the ESR and report them. If necessary, remind the user that EL2 kCFI is not affected by CONFIG_CFI_PERMISSIVE. Pass $(CC_FLAGS_CFI) to the compiler when building the nVHE hyp code. Use SYM_TYPED_FUNC_START() for __pkvm_init_switch_pgd, as nVHE can't call it directly and must use a PA function pointer from C (because it is part of the idmap page), which would trigger a kCFI failure if the type ID wasn't present. Signed-off-by: Pierre-Clément Tosi Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240610063244.2828978-9-ptosi@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/handle_exit.c | 10 ++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 6 +++--- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 6 +++++- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b3d6657a259d..69b08ac7322d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -417,6 +417,14 @@ static void print_nvhe_hyp_panic(const char *name, u64 panic_addr) (void *)(panic_addr + kaslr_offset())); } +static void kvm_nvhe_report_cfi_failure(u64 panic_addr) +{ + print_nvhe_hyp_panic("CFI failure", panic_addr); + + if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) + kvm_err(" (CONFIG_CFI_PERMISSIVE ignored for hyp failures)\n"); +} + void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, @@ -446,6 +454,8 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else print_nvhe_hyp_panic("BUG", panic_addr); + } else if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) { + kvm_nvhe_report_cfi_failure(panic_addr); } else { print_nvhe_hyp_panic("panic", panic_addr); } diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 50fa0ffb6b7e..782b34b004be 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -89,9 +89,9 @@ quiet_cmd_hyprel = HYPREL $@ quiet_cmd_hypcopy = HYPCOPY $@ cmd_hypcopy = $(OBJCOPY) --prefix-symbols=__kvm_nvhe_ $< $@ -# Remove ftrace, Shadow Call Stack, and CFI CFLAGS. -# This is equivalent to the 'notrace', '__noscs', and '__nocfi' annotations. -KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) +# Remove ftrace and Shadow Call Stack CFLAGS. +# This is equivalent to the 'notrace' and '__noscs' annotations. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) # Starting from 13.0.0 llvm emits SHT_REL section '.llvm.call-graph-profile' # when profile optimization is applied. gen-hyprel does not support SHT_REL and # causes a build failure. Remove profile optimization flags. diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 3a2836a52e85..07120b37da35 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -5,6 +5,7 @@ */ #include +#include #include #include @@ -268,8 +269,11 @@ SYM_CODE_END(__kvm_handle_stub_hvc) /* * void __pkvm_init_switch_pgd(phys_addr_t pgd, unsigned long sp, * void (*fn)(void)); + * + * SYM_TYPED_FUNC_START() allows C to call this ID-mapped function indirectly + * using a physical pointer without triggering a kCFI failure. */ -SYM_FUNC_START(__pkvm_init_switch_pgd) +SYM_TYPED_FUNC_START(__pkvm_init_switch_pgd) /* Turn the MMU off */ pre_disable_mmu_workaround mrs x3, sctlr_el2 -- cgit v1.2.3 From d2b2ecba8ddb55dd7c8f9741b4863670850c49de Mon Sep 17 00:00:00 2001 From: Jintack Lim Date: Thu, 20 Jun 2024 16:46:38 +0000 Subject: KVM: arm64: nv: Forward FP/ASIMD traps to guest hypervisor Give precedence to the guest hypervisor's trap configuration when routing an FP/ASIMD trap taken to EL2. Take advantage of the infrastructure for translating CPTR_EL2 into the VHE (i.e. EL1) format and base the trap decision solely on the VHE view of the register. The in-memory value of CPTR_EL2 will always be up to date for the guest hypervisor (more on that later), so just read it directly from memory. Bury all of this behind a macro keyed off of the CPTR bitfield in anticipation of supporting other traps (e.g. SVE). [maz: account for HCR_EL2.E2H when testing for TFP/FPEN, with all the hard work actually being done by Chase Conklin] [ oliver: translate nVHE->VHE format for testing traps; macro for reuse in other CPTR_EL2.xEN fields ] Signed-off-by: Jintack Lim Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 43 +++++++++++++++++++++++++++++++++ arch/arm64/kvm/handle_exit.c | 16 +++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 3 +++ 3 files changed, 58 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 21650e7924d4..29fdeb5b3c96 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -11,6 +11,7 @@ #ifndef __ARM64_KVM_EMULATE_H__ #define __ARM64_KVM_EMULATE_H__ +#include #include #include @@ -660,4 +661,46 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) kvm_write_cptr_el2(val); } + +/* + * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE + * format if E2H isn't set. + */ +static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu) +{ + u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + cptr = translate_cptr_el2_to_cpacr_el1(cptr); + + return cptr; +} + +static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu, + unsigned int xen) +{ + switch (xen) { + case 0b00: + case 0b10: + return true; + case 0b01: + return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu); + case 0b11: + default: + return false; + } +} + +#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \ + (!vcpu_has_nv(vcpu) ? false : \ + ____cptr_xen_trap_enabled(vcpu, \ + SYS_FIELD_GET(CPACR_ELx, xen, \ + vcpu_sanitised_cptr_el2(vcpu)))) + +static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); +} + + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b037f0a0e27e..59fe9b10a87a 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -94,11 +94,19 @@ static int handle_smc(struct kvm_vcpu *vcpu) } /* - * Guest access to FP/ASIMD registers are routed to this handler only - * when the system doesn't support FP/ASIMD. + * This handles the cases where the system does not support FP/ASIMD or when + * we are running nested virtualization and the guest hypervisor is trapping + * FP/ASIMD accesses by its guest guest. + * + * All other handling of guest vs. host FP/ASIMD register state is handled in + * fixup_guest_exit(). */ -static int handle_no_fpsimd(struct kvm_vcpu *vcpu) +static int kvm_handle_fpasimd(struct kvm_vcpu *vcpu) { + if (guest_hyp_fpsimd_traps_enabled(vcpu)) + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + + /* This is the case when the system doesn't support FP/ASIMD. */ kvm_inject_undefined(vcpu); return 1; } @@ -304,7 +312,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, - [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_handle_fpasimd, [ESR_ELx_EC_PAC] = kvm_handle_ptrauth, }; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 0c4de44534b7..b398f2320e94 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -354,6 +354,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Only handle traps the vCPU can support here: */ switch (esr_ec) { case ESR_ELx_EC_FP_ASIMD: + /* Forward traps to the guest hypervisor as required */ + if (guest_hyp_fpsimd_traps_enabled(vcpu)) + return false; break; case ESR_ELx_EC_SVE: if (!sve_guest) -- cgit v1.2.3 From 399debfc97493130167663336a2c3d0d16c2da79 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:39 +0000 Subject: KVM: arm64: nv: Forward SVE traps to guest hypervisor Similar to FPSIMD traps, don't load SVE state if the guest hypervisor has SVE traps enabled and forward the trap instead. Note that ZCR_EL2 will require some special handling, as it takes a sysreg trap to EL2 when HCR_EL2.NV = 1. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 4 ++++ arch/arm64/kvm/handle_exit.c | 3 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ 3 files changed, 9 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 29fdeb5b3c96..befef5d0daae 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -702,5 +702,9 @@ static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu) return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN); } +static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu) +{ + return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN); +} #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 59fe9b10a87a..e4f74699f360 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -217,6 +217,9 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) */ static int handle_sve(struct kvm_vcpu *vcpu) { + if (guest_hyp_sve_traps_enabled(vcpu)) + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + kvm_inject_undefined(vcpu); return 1; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b398f2320e94..be1b22b9c324 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -361,6 +361,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) case ESR_ELx_EC_SVE: if (!sve_guest) return false; + if (guest_hyp_sve_traps_enabled(vcpu)) + return false; break; default: return false; -- cgit v1.2.3 From b3d29a8230998b36afecf494b199211d26052785 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:40 +0000 Subject: KVM: arm64: nv: Handle ZCR_EL2 traps Unlike other SVE-related registers, ZCR_EL2 takes a sysreg trap to EL2 when HCR_EL2.NV = 1. KVM still needs to honor the guest hypervisor's trap configuration, which expects an SVE trap (i.e. ESR_EL2.EC = 0x19) when CPTR traps are enabled for the vCPU's current context. Otherwise, if the guest hypervisor has traps disabled, emulate the access by mapping the requested VL into ZCR_EL1. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-4-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/sys_regs.c | 38 ++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index befef5d0daae..c5fa66f5fd82 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -56,6 +56,14 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu); int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2); int kvm_inject_nested_irq(struct kvm_vcpu *vcpu); +static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu) +{ + u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) | + ESR_ELx_IL; + + kvm_inject_nested_sync(vcpu, esr); +} + #if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__) static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 36b8e97bf49e..0ff1ed1341fc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -423,6 +423,7 @@ enum vcpu_sysreg { MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */ CPTR_EL2, /* Architectural Feature Trap Register (EL2) */ HACR_EL2, /* Hypervisor Auxiliary Control Register */ + ZCR_EL2, /* SVE Control Register (EL2) */ TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */ TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */ TCR_EL2, /* Translation Control Register (EL2) */ @@ -991,6 +992,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break; default: return false; } @@ -1036,6 +1038,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break; default: return false; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22b45a15d068..80aa99a012ae 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -121,6 +121,7 @@ static bool get_el2_to_el1_mapping(unsigned int reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); default: return false; } @@ -2199,6 +2200,40 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return __vcpu_sys_reg(vcpu, r->reg) = val; } +static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + unsigned int r; + + r = el2_visibility(vcpu, rd); + if (r) + return r; + + return sve_visibility(vcpu, rd); +} + +static bool access_zcr_el2(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + unsigned int vq; + + if (guest_hyp_sve_traps_enabled(vcpu)) { + kvm_inject_nested_sve_trap(vcpu); + return true; + } + + if (!p->is_write) { + p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + return true; + } + + vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; + vq = min(vq, vcpu_sve_max_vq(vcpu)); + vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); + return true; +} + /* * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -2688,6 +2723,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(HFGITR_EL2, reset_val, 0), EL2_REG_VNCR(HACR_EL2, reset_val, 0), + { SYS_DESC(SYS_ZCR_EL2), .access = access_zcr_el2, .reset = reset_val, + .visibility = sve_el2_visibility, .reg = ZCR_EL2 }, + EL2_REG_VNCR(HCRX_EL2, reset_val, 0), EL2_REG(TTBR0_EL2, access_rw, reset_val, 0), -- cgit v1.2.3 From 069da3ffdadfe108729fc9aafa3930da77711812 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:41 +0000 Subject: KVM: arm64: nv: Load guest hyp's ZCR into EL1 state Load the guest hypervisor's ZCR_EL2 into the corresponding EL1 register when restoring SVE state, as ZCR_EL2 affects the VL in the hypervisor context. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-5-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/hyp/include/hyp/switch.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ff1ed1341fc..9118716d05fc 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -868,6 +868,9 @@ struct kvm_vcpu_arch { #define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl) +#define vcpu_sve_zcr_elx(vcpu) \ + (unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1) + #define vcpu_sve_state_size(vcpu) ({ \ size_t __size_ret; \ unsigned int __vcpu_vq; \ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index be1b22b9c324..16809e74c7f9 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -318,7 +318,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) __sve_restore_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); - write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); + + write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR); } static inline void __hyp_sve_save_host(void) -- cgit v1.2.3 From b7e5c9426429aa64cacc2d804417e0e5f79b8b60 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:42 +0000 Subject: KVM: arm64: nv: Save guest's ZCR_EL2 when in hyp context When running a guest hypervisor, ZCR_EL2 is an alias for the counterpart EL1 state. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-6-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/fpsimd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 521b32868d0d..0815ff0347f5 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -178,7 +178,13 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (guest_owns_fp_regs()) { if (vcpu_has_sve(vcpu)) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + u64 zcr = read_sysreg_el1(SYS_ZCR); + + /* + * If the vCPU is in the hyp context then ZCR_EL1 is + * loaded with its vEL2 counterpart. + */ + __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr; /* * Restore the VL that was saved when bound to the CPU, -- cgit v1.2.3 From 9092aca9fe9aa986b573355affdd190710a906c0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:43 +0000 Subject: KVM: arm64: nv: Use guest hypervisor's max VL when running nested guest The max VL for nested guests is additionally constrained by the max VL selected by the guest hypervisor. Use that instead of KVM's max VL when running a nested guest. Note that the guest hypervisor's ZCR_EL2 is sanitised against the VM's max VL at the time of access, so there's no additional handling required at the time of use. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-7-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/switch.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 16809e74c7f9..ab70e6e6bb0c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -314,11 +314,23 @@ static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { + /* + * The vCPU's saved SVE state layout always matches the max VL of the + * vCPU. Start off with the max VL so we can load the SVE state. + */ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); __sve_restore_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); + /* + * The effective VL for a VM could differ from the max VL when running a + * nested guest, as the guest hypervisor could select a smaller VL. Slap + * that into hardware before wrapping up. + */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + sve_cond_update_zcr_vq(__vcpu_sys_reg(vcpu, ZCR_EL2), SYS_ZCR_EL2); + write_sysreg_el1(__vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)), SYS_ZCR); } -- cgit v1.2.3 From 2e3cf82063a00ea0629e03e223c7c6ba58718f12 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:44 +0000 Subject: KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state It is possible that the guest hypervisor has selected a smaller VL than the maximum for its nested guest. As such, ZCR_EL2 may be configured for a different VL when exiting a nested guest. Set ZCR_EL2 (via the EL1 alias) to the maximum VL for the VM before saving SVE state as the SVE save area is dimensioned by the max VL. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-8-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/fpsimd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 0815ff0347f5..c53e5b14038d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -195,11 +195,14 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) * Note that this means that at guest exit ZCR_EL1 is * not necessarily the same as on guest entry. * - * Restoring the VL isn't needed in VHE mode since - * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same - * role when doing the save from EL2. + * ZCR_EL2 holds the guest hypervisor's VL when running + * a nested guest, which could be smaller than the + * max for the vCPU. Similar to above, we first need to + * switch to a VL consistent with the layout of the + * vCPU's SVE state. KVM support for NV implies VHE, so + * using the ZCR_EL1 alias is safe. */ - if (!has_vhe()) + if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL1); } -- cgit v1.2.3 From 1785f020b1124c37f59f3d92b7d45ba1d707ee91 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:45 +0000 Subject: KVM: arm64: Spin off helper for programming CPTR traps A subsequent change to KVM will add preliminary support for merging a guest hypervisor's CPTR traps with that of KVM. Prepare by spinning off a new helper for managing CPTR traps. Avoid reading CPACR_EL1 for the baseline trap config, and start off with the most restrictive set of traps that is subsequently relaxed. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-9-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 48 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 8fbb6a2e0559..667ed4f558af 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -65,6 +65,29 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE); } +static void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPACR_ELx_TTA | CPTR_EL2_TAM; + + if (guest_owns_fp_regs()) { + val |= CPACR_ELx_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_ELx_ZEN; + } else { + __activate_traps_fpsimd32(vcpu); + } + + write_sysreg(val, cpacr_el1); +} + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -91,30 +114,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) } } - val = read_sysreg(cpacr_el1); - val |= CPACR_ELx_TTA; - val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN); - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - - val |= CPTR_EL2_TAM; - - if (guest_owns_fp_regs()) { - if (vcpu_has_sve(vcpu)) - val |= CPACR_ELx_ZEN; - } else { - val &= ~CPACR_ELx_FPEN; - __activate_traps_fpsimd32(vcpu); - } - - write_sysreg(val, cpacr_el1); + __activate_cptr_traps(vcpu); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); } -- cgit v1.2.3 From 493da2b1c49ac86c0eb0dde9e42b79333272d1f9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Jun 2024 16:46:46 +0000 Subject: KVM: arm64: nv: Handle CPACR_EL1 traps Handle CPACR_EL1 accesses when running a VHE guest. In order to limit the cost of the emulation, implement it ass a shallow exit. In the other cases: - this is a nVHE L1 which will write to memory, and we don't trap - this is a L2 guest: * the L1 has CPTR_EL2.TCPAC==0, and the L2 has direct register access * the L1 has CPTR_EL2.TCPAC==1, and the L2 will trap, but the handling is defered to the general handling for forwarding Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-10-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 667ed4f558af..3d51789e7d77 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -266,10 +266,40 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) __fpsimd_save_state(*host_data_ptr(fpsimd_state)); } +static bool kvm_hyp_handle_cpacr_el1(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + int rt; + + if (!is_hyp_ctxt(vcpu) || esr_sys64_to_sysreg(esr) != SYS_CPACR_EL1) + return false; + + rt = kvm_vcpu_sys_get_rt(vcpu); + + if ((esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ) { + vcpu_set_reg(vcpu, rt, __vcpu_sys_reg(vcpu, CPTR_EL2)); + } else { + vcpu_write_sys_reg(vcpu, vcpu_get_reg(vcpu, rt), CPTR_EL2); + __activate_cptr_traps(vcpu); + } + + __kvm_skip_instr(vcpu); + + return true; +} + +static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code)) + return true; + + return kvm_hyp_handle_sysreg(vcpu, exit_code); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, - [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe, [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, -- cgit v1.2.3 From 0cfc85b8f5cf3b77463d61542191c75ba0cc3a5f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:47 +0000 Subject: KVM: arm64: nv: Load guest FP state for ZCR_EL2 trap Round out the ZCR_EL2 gymnastics by loading SVE state in the fast path when the guest hypervisor tries to access SVE state. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-11-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index ab70e6e6bb0c..aa768d97ddd6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -371,6 +371,10 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (guest_hyp_fpsimd_traps_enabled(vcpu)) return false; break; + case ESR_ELx_EC_SYS64: + if (WARN_ON_ONCE(!is_hyp_ctxt(vcpu))) + return false; + fallthrough; case ESR_ELx_EC_SVE: if (!sve_guest) return false; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 3d51789e7d77..f4ce892edcd6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -288,11 +288,38 @@ static bool kvm_hyp_handle_cpacr_el1(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } +static bool kvm_hyp_handle_zcr_el2(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + + if (!vcpu_has_nv(vcpu)) + return false; + + if (sysreg != SYS_ZCR_EL2) + return false; + + if (guest_owns_fp_regs()) + return false; + + /* + * ZCR_EL2 traps are handled in the slow path, with the expectation + * that the guest's FP context has already been loaded onto the CPU. + * + * Load the guest's FP context and unconditionally forward to the + * slow path for handling (i.e. return false). + */ + kvm_hyp_handle_fpsimd(vcpu, exit_code); + return false; +} + static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code) { if (kvm_hyp_handle_cpacr_el1(vcpu, exit_code)) return true; + if (kvm_hyp_handle_zcr_el2(vcpu, exit_code)) + return true; + return kvm_hyp_handle_sysreg(vcpu, exit_code); } -- cgit v1.2.3 From 5326303bb7d9da79d94d0e347a6e212eaae8801d Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:48 +0000 Subject: KVM: arm64: nv: Honor guest hypervisor's FP/SVE traps in CPTR_EL2 Start folding the guest hypervisor's FP/SVE traps into the value programmed in hardware. Note that as of writing this is dead code, since KVM does a full put() / load() for every nested exception boundary which saves + flushes the FP/SVE state. However, this will become useful when we can keep the guest's FP/SVE state alive across a nested exception boundary and the host no longer needs to conservatively program traps. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-12-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index f4ce892edcd6..fa6c27b6ad99 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -67,6 +67,8 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { + u64 cptr; + /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, @@ -85,6 +87,35 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_ELx, FPEN, cptr) & BIT(0))) + val &= ~CPACR_ELx_FPEN; + if (!(SYS_FIELD_GET(CPACR_ELx, ZEN, cptr) & BIT(0))) + val &= ~CPACR_ELx_ZEN; + +write: write_sysreg(val, cpacr_el1); } -- cgit v1.2.3 From e19d533126accf342d34019f4bc92b8796b125bc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Jun 2024 16:46:50 +0000 Subject: KVM: arm64: nv: Add trap description for CPTR_EL2 Add trap description for CPTR_EL2.{TCPAC,TAM,E0POE,TTA}. TTA is a bit annoying as it changes location depending on E2H. This forces us to add yet another "complex" trap condition. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-14-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/emulate-nested.c | 91 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 54090967a335..96b837fe5156 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -79,6 +79,10 @@ enum cgt_group_id { CGT_MDCR_E2TB, CGT_MDCR_TDCC, + CGT_CPACR_E0POE, + CGT_CPTR_TAM, + CGT_CPTR_TCPAC, + /* * Anything after this point is a combination of coarse trap * controls, which must all be evaluated to decide what to do. @@ -106,6 +110,8 @@ enum cgt_group_id { CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__, CGT_CNTHCTL_EL1PTEN, + CGT_CPTR_TTA, + /* Must be last */ __NR_CGT_GROUP_IDS__ }; @@ -345,6 +351,24 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = MDCR_EL2_TDCC, .behaviour = BEHAVE_FORWARD_ANY, }, + [CGT_CPACR_E0POE] = { + .index = CPTR_EL2, + .value = CPACR_ELx_E0POE, + .mask = CPACR_ELx_E0POE, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_CPTR_TAM] = { + .index = CPTR_EL2, + .value = CPTR_EL2_TAM, + .mask = CPTR_EL2_TAM, + .behaviour = BEHAVE_FORWARD_ANY, + }, + [CGT_CPTR_TCPAC] = { + .index = CPTR_EL2, + .value = CPTR_EL2_TCPAC, + .mask = CPTR_EL2_TCPAC, + .behaviour = BEHAVE_FORWARD_ANY, + }, }; #define MCB(id, ...) \ @@ -410,12 +434,26 @@ static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu) return BEHAVE_FORWARD_ANY; } +static enum trap_behaviour check_cptr_tta(struct kvm_vcpu *vcpu) +{ + u64 val = __vcpu_sys_reg(vcpu, CPTR_EL2); + + if (!vcpu_el2_e2h_is_set(vcpu)) + val = translate_cptr_el2_to_cpacr_el1(val); + + if (val & CPACR_ELx_TTA) + return BEHAVE_FORWARD_ANY; + + return BEHAVE_HANDLE_LOCALLY; +} + #define CCC(id, fn) \ [id - __COMPLEX_CONDITIONS__] = fn static const complex_condition_check ccc[] = { CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten), CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten), + CCC(CGT_CPTR_TTA, check_cptr_tta), }; /* @@ -1000,6 +1038,59 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_TRBPTR_EL1, CGT_MDCR_E2TB), SR_TRAP(SYS_TRBSR_EL1, CGT_MDCR_E2TB), SR_TRAP(SYS_TRBTRG_EL1, CGT_MDCR_E2TB), + SR_TRAP(SYS_CPACR_EL1, CGT_CPTR_TCPAC), + SR_TRAP(SYS_AMUSERENR_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCFGR_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCGCR_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCNTENCLR0_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCNTENCLR1_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCNTENSET0_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCNTENSET1_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMCR_EL0, CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR0_EL0(0), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR0_EL0(1), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR0_EL0(2), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR0_EL0(3), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(0), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(1), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(2), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(3), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(4), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(5), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(6), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(7), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(8), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(9), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(10), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(11), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(12), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(13), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(14), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVCNTR1_EL0(15), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER0_EL0(0), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER0_EL0(1), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER0_EL0(2), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER0_EL0(3), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(0), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(1), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(2), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(3), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(4), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(5), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(6), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(7), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(8), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(9), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(10), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(11), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(12), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(13), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(14), CGT_CPTR_TAM), + SR_TRAP(SYS_AMEVTYPER1_EL0(15), CGT_CPTR_TAM), + SR_TRAP(SYS_POR_EL0, CGT_CPACR_E0POE), + /* op0=2, op1=1, and CRn<0b1000 */ + SR_RANGE_TRAP(sys_reg(2, 1, 0, 0, 0), + sys_reg(2, 1, 7, 15, 7), CGT_CPTR_TTA), SR_TRAP(SYS_CNTP_TVAL_EL0, CGT_CNTHCTL_EL1PTEN), SR_TRAP(SYS_CNTP_CVAL_EL0, CGT_CNTHCTL_EL1PTEN), SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN), -- cgit v1.2.3 From cd931bd6093cb7da7b9787f04b21bca58c494537 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Jun 2024 16:46:51 +0000 Subject: KVM: arm64: nv: Add additional trap setup for CPTR_EL2 We need to teach KVM a couple of new tricks. CPTR_EL2 and its VHE accessor CPACR_EL1 need to be handled specially: - CPACR_EL1 is trapped on VHE so that we can track the TCPAC and TTA bits - CPTR_EL2.{TCPAC,E0POE} are propagated from L1 to L2 Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-15-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/vhe/switch.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index fa6c27b6ad99..f68018297114 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -87,11 +87,23 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + /* * Layer the guest hypervisor's trap configuration on top of our own if * we're in a nested context. */ - if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu)) + if (is_hyp_ctxt(vcpu)) goto write; cptr = vcpu_sanitised_cptr_el2(vcpu); @@ -115,6 +127,11 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (!(SYS_FIELD_GET(CPACR_ELx, ZEN, cptr) & BIT(0))) val &= ~CPACR_ELx_ZEN; + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_ELx_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + write: write_sysreg(val, cpacr_el1); } -- cgit v1.2.3 From f1ee914fb62683cd72ac49cc68a1d92f12ec65c6 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 20 Jun 2024 16:46:52 +0000 Subject: KVM: arm64: Allow the use of SVE+NV Allow SVE and NV to mix now that everything is in place to handle it correctly. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20240620164653.1130714-16-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/arm.c | 5 ----- arch/arm64/kvm/nested.c | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 59716789fe0f..bfad0a7f5d12 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1419,11 +1419,6 @@ static int kvm_vcpu_init_check_features(struct kvm_vcpu *vcpu, test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features)) return -EINVAL; - /* Disallow NV+SVE for the time being */ - if (test_bit(KVM_ARM_VCPU_HAS_EL2, &features) && - test_bit(KVM_ARM_VCPU_SVE, &features)) - return -EINVAL; - if (!test_bit(KVM_ARM_VCPU_EL1_32BIT, &features)) return 0; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index bae8536cbf00..2b9fcbb0fe6c 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -41,13 +41,12 @@ static u64 limit_nv_id_reg(u32 id, u64 val) break; case SYS_ID_AA64PFR0_EL1: - /* No AMU, MPAM, S-EL2, RAS or SVE */ + /* No AMU, MPAM, S-EL2, or RAS */ val &= ~(GENMASK_ULL(55, 52) | NV_FTR(PFR0, AMU) | NV_FTR(PFR0, MPAM) | NV_FTR(PFR0, SEL2) | NV_FTR(PFR0, RAS) | - NV_FTR(PFR0, SVE) | NV_FTR(PFR0, EL3) | NV_FTR(PFR0, EL2) | NV_FTR(PFR0, EL1)); -- cgit v1.2.3 From 33d85a93c6c3c0c1fc2d168ee5a9ae604c439fa7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 21 Jun 2024 22:40:44 +0000 Subject: KVM: arm64: nv: Unfudge ID_AA64PFR0_EL1 masking Marc reports that L1 VMs aren't booting with the NV series applied to today's kvmarm/next. After bisecting the issue, it appears that 44241f34fac9 ("KVM: arm64: nv: Use accessors for modifying ID registers") is to blame. Poking around at the issue a bit further, it'd appear that the value for ID_AA64PFR0_EL1 is complete garbage, as 'val' still contains the value we set ID_AA64ISAR1_EL1 to. Fix the read-modify-write pattern to actually use ID_AA64PFR0_EL1 as the starting point. Excuse me as I return to my shame cube. Reported-by: Marc Zyngier Fixes: 44241f34fac9 ("KVM: arm64: nv: Use accessors for modifying ID registers") Acked-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240621224044.2465901-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 44085c13e673..794ed18c0537 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -40,7 +40,7 @@ static void limit_nv_id_regs(struct kvm *kvm) kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val); /* No AMU, MPAM, S-EL2, RAS or SVE */ - kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1); + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1); val &= ~(GENMASK_ULL(55, 52) | NV_FTR(PFR0, AMU) | NV_FTR(PFR0, MPAM) | -- cgit v1.2.3 From 9b58e665d6b25ff687380d14009d7cffe7f70df7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Jun 2024 14:00:37 +0100 Subject: KVM: arm64: Correctly honor the presence of FEAT_TCRX We currently blindly enable TCR2_EL1 use in a guest, irrespective of the feature set. This is obviously wrong, and we should actually honor the guest configuration and handle the possible trap resulting from the guest being buggy. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20240625130042.259175-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kvm/sys_regs.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b2adc2c6c82a..e6682a3ace5a 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,7 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 22b45a15d068..71996d36f375 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -383,6 +383,12 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, bool was_enabled = vcpu_has_cache_enabled(vcpu); u64 val, mask, shift; + if (reg_to_encoding(r) == SYS_TCR2_EL1 && + !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + BUG_ON(!p->is_write); get_access_mask(r, &mask, &shift); @@ -4060,6 +4066,9 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu) if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); + + if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) + vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En; } if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags)) -- cgit v1.2.3 From a3ee9ce88ba3adc0a9bcb77dd40eca6aff3cef28 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Jun 2024 14:00:38 +0100 Subject: KVM: arm64: Get rid of HCRX_GUEST_FLAGS HCRX_GUEST_FLAGS gives random KVM hackers the impression that they can stuff bits in this macro and unconditionally enable features in the guest. In general, this is wrong (we have been there with FEAT_MOPS, and again with FEAT_TCRX). Document that HCRX_EL2.SMPME is an exception rather than the rule, and get rid of HCRX_GUEST_FLAGS. Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20240625130042.259175-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 1 - arch/arm64/kvm/sys_regs.c | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index e6682a3ace5a..d81cc746e0eb 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,6 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) -#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 71996d36f375..8e22232c4b0f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4062,7 +4062,13 @@ void kvm_init_sysreg(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 |= HCR_TTLBOS; if (cpus_have_final_cap(ARM64_HAS_HCX)) { - vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS; + /* + * In general, all HCRX_EL2 bits are gated by a feature. + * The only reason we can set SMPME without checking any + * feature is that its effects are not directly observable + * from the guest. + */ + vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME; if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP)) vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2); -- cgit v1.2.3 From 1b04fd40275e09c2062e125593e7d2b0b9f87b0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Jun 2024 14:00:39 +0100 Subject: KVM: arm64: Make TCR2_EL1 save/restore dependent on the VM features As for other registers, save/restore of TCR2_EL1 should be gated on the feature being actually present. In the case of a nVHE hypervisor, it is perfectly fine to leave the host value in the register, as HCRX_EL2.TCREn==0 imposes that TCR2_EL1 is treated as 0. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240625130042.259175-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 4be6a7fa0070..ea2aeeff61db 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -55,6 +55,17 @@ static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt) return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP); } +static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu; + + if (!cpus_have_final_cap(ARM64_HAS_TCR2)) + return false; + + vcpu = ctxt_to_vcpu(ctxt); + return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); @@ -62,7 +73,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); - if (cpus_have_final_cap(ARM64_HAS_TCR2)) + if (ctxt_has_tcrx(ctxt)) ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2); ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); @@ -138,7 +149,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); - if (cpus_have_final_cap(ARM64_HAS_TCR2)) + if (ctxt_has_tcrx(ctxt)) write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2); write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); -- cgit v1.2.3 From 663abf04ee4d750229e8f47881d31a5204259ceb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Jun 2024 14:00:41 +0100 Subject: KVM: arm64: Make PIR{,E0}_EL1 save/restore conditional on FEAT_TCRX As per the architecture, if FEAT_S1PIE is implemented, then FEAT_TCRX must be implemented as well. Take advantage of this to avoid checking for S1PIE when TCRX isn't implemented. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240625130042.259175-6-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index ea2aeeff61db..4c0fdabaf8ae 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -73,8 +73,14 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); - if (ctxt_has_tcrx(ctxt)) + if (ctxt_has_tcrx(ctxt)) { ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2); + + if (ctxt_has_s1pie(ctxt)) { + ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); + ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); + } + } ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); @@ -84,10 +90,6 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); - if (ctxt_has_s1pie(ctxt)) { - ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); - ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); - } ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); @@ -149,8 +151,14 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); - if (ctxt_has_tcrx(ctxt)) + if (ctxt_has_tcrx(ctxt)) { write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2); + + if (ctxt_has_s1pie(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); + } + } write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); @@ -160,10 +168,6 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); - if (ctxt_has_s1pie(ctxt)) { - write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); - write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); - } write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); -- cgit v1.2.3 From 91e9cc70b77516e766fd8b532c3a20aba37369d1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 25 Jun 2024 14:00:42 +0100 Subject: KVM: arm64: Honor trap routing for TCR2_EL1 TCR2_EL1 handling is missing the handling of its trap configuration: - HCRX_EL2.TCR2En must be handled in conjunction with HCR_EL2.{TVM,TRVM} - HFG{R,W}TR_EL2.TCR_EL1 does apply to TCR2_EL1 as well Without these two controls being implemented, it is impossible to correctly route TCR2_EL1 traps. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240625130042.259175-7-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/emulate-nested.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 54090967a335..2fa2d5fc37d6 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -79,6 +79,8 @@ enum cgt_group_id { CGT_MDCR_E2TB, CGT_MDCR_TDCC, + CGT_HCRX_TCR2En, + /* * Anything after this point is a combination of coarse trap * controls, which must all be evaluated to decide what to do. @@ -89,6 +91,7 @@ enum cgt_group_id { CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB_TTLBOS, CGT_HCR_TVM_TRVM, + CGT_HCR_TVM_TRVM_HCRX_TCR2En, CGT_HCR_TPU_TICAB, CGT_HCR_TPU_TOCU, CGT_HCR_NV1_nNV2_ENSCXT, @@ -345,6 +348,12 @@ static const struct trap_bits coarse_trap_bits[] = { .mask = MDCR_EL2_TDCC, .behaviour = BEHAVE_FORWARD_ANY, }, + [CGT_HCRX_TCR2En] = { + .index = HCRX_EL2, + .value = 0, + .mask = HCRX_EL2_TCR2En, + .behaviour = BEHAVE_FORWARD_ANY, + }, }; #define MCB(id, ...) \ @@ -359,6 +368,8 @@ static const enum cgt_group_id *coarse_control_combo[] = { MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS), MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS), MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM), + MCB(CGT_HCR_TVM_TRVM_HCRX_TCR2En, + CGT_HCR_TVM, CGT_HCR_TRVM, CGT_HCRX_TCR2En), MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB), MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU), MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT), @@ -622,6 +633,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM), SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM), + SR_TRAP(SYS_TCR2_EL1, CGT_HCR_TVM_TRVM_HCRX_TCR2En), SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ), SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ), SR_TRAP(SYS_DC_GZVA, CGT_HCR_TDZ), @@ -1071,6 +1083,7 @@ static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { SR_FGT(SYS_TPIDRRO_EL0, HFGxTR, TPIDRRO_EL0, 1), SR_FGT(SYS_TPIDR_EL1, HFGxTR, TPIDR_EL1, 1), SR_FGT(SYS_TCR_EL1, HFGxTR, TCR_EL1, 1), + SR_FGT(SYS_TCR2_EL1, HFGxTR, TCR_EL1, 1), SR_FGT(SYS_SCXTNUM_EL0, HFGxTR, SCXTNUM_EL0, 1), SR_FGT(SYS_SCXTNUM_EL1, HFGxTR, SCXTNUM_EL1, 1), SR_FGT(SYS_SCTLR_EL1, HFGxTR, SCTLR_EL1, 1), -- cgit v1.2.3 From 3cfde36df7aba306e31209ec882a9e015f2d5963 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Jul 2024 16:47:43 +0100 Subject: KVM: arm64: nv: Truely enable nXS TLBI operations Although we now have support for nXS-flavoured TLBI instructions, we still don't expose the feature to the guest thanks to a mixture of misleading comment and use of a bunch of magic values. Fix the comment and correctly express the masking of LS64, which is enough to expose nXS to the world. Not that anyone cares... Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240703154743.824824-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 451926cb6c5d..4b51d7a59c2d 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -810,8 +810,8 @@ static u64 limit_nv_id_reg(u32 id, u64 val) break; case SYS_ID_AA64ISAR1_EL1: - /* Support everything but Spec Invalidation */ - val &= ~(GENMASK_ULL(63, 56) | + /* Support everything but Spec Invalidation and LS64 */ + val &= ~(NV_FTR(ISAR1, LS64) | NV_FTR(ISAR1, SPECRES)); break; -- cgit v1.2.3 From cb52b5c8b81bfbc34df13537d82cd1849725d6c7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 8 Jul 2024 19:50:51 +0000 Subject: Revert "KVM: arm64: nv: Fix RESx behaviour of disabled FGTs with negative polarity" This reverts commit eb9d53d4a949c6d6d7c9f130e537f6b5687fedf9. As Marc pointed out on the list [*], this patch is wrong, and those who find themselves in the SOB chain should have their heads checked. Annoyingly, the architecture has some FGT trap bits that are negative (i.e. 0 implies trap), and there was some confusion how KVM handles this for nested guests. However, it is clear now that KVM honors the RES0-ness of FGT traps already, meaning traps for features never exposed to the guest hypervisor get handled at L0. As they should. Link: https://lore.kernel.org/kvmarm/86bk3c3uss.wl-maz@kernel.org/T/#mb9abb3dd79f6a4544a91cb35676bd637c3a5e836 Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 913018e4cdae..d23618af8291 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -328,21 +328,21 @@ int kvm_init_nv_sysregs(struct kvm *kvm) HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 | HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1); if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA)) - res1 |= HFGxTR_EL2_nACCDATA_EL1; + res0 |= HFGxTR_EL2_nACCDATA_EL1; if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) - res1 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1); + res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP)) - res1 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); + res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP)) - res1 |= HFGxTR_EL2_nRCWMASK_EL1; + res0 |= HFGxTR_EL2_nRCWMASK_EL1; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) - res1 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); + res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP)) - res1 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); + res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1); if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - res1 |= HFGxTR_EL2_nS2POR_EL1; + res0 |= HFGxTR_EL2_nS2POR_EL1; if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP)) - res1 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1); + res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1); set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1); set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1); @@ -378,10 +378,10 @@ int kvm_init_nv_sysregs(struct kvm *kvm) HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 | HDFGRTR_EL2_TRBTRG_EL1); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) - res1 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL | + res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL | HDFGRTR_EL2_nBRBDATA); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2)) - res1 |= HDFGRTR_EL2_nPMSNEVFR_EL1; + res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1; set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1); /* Reuse the bits from the read-side and add the write-specific stuff */ @@ -417,9 +417,9 @@ int kvm_init_nv_sysregs(struct kvm *kvm) res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX | HFGITR_EL2_CPPRCTX); if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP)) - res1 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL); + res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL); if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP)) - res1 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 | + res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 | HFGITR_EL2_nGCSEPP); if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX)) res0 |= HFGITR_EL2_COSPRCTX; -- cgit v1.2.3