diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 22:31:43 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 22:31:43 +0100 |
commit | 3e10585335b7967326ca7b4118cada0d2d00a2ab (patch) | |
tree | e1655bc4f093f7de3a54dc3b2d83a54159aca10b /arch/arm64 | |
parent | Merge tag 'hyperv-next-signed-20210216' of git://git.kernel.org/pub/scm/linux... (diff) | |
parent | Merge tag 'kvmarm-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/kvma... (diff) | |
download | linux-3e10585335b7967326ca7b4118cada0d2d00a2ab.tar.xz linux-3e10585335b7967326ca7b4118cada0d2d00a2ab.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"x86:
- Support for userspace to emulate Xen hypercalls
- Raise the maximum number of user memslots
- Scalability improvements for the new MMU.
Instead of the complex "fast page fault" logic that is used in
mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent,
but the code that can run against page faults is limited. Right now
only page faults take the lock for reading; in the future this will
be extended to some cases of page table destruction. I hope to
switch the default MMU around 5.12-rc3 (some testing was delayed
due to Chinese New Year).
- Cleanups for MAXPHYADDR checks
- Use static calls for vendor-specific callbacks
- On AMD, use VMLOAD/VMSAVE to save and restore host state
- Stop using deprecated jump label APIs
- Workaround for AMD erratum that made nested virtualization
unreliable
- Support for LBR emulation in the guest
- Support for communicating bus lock vmexits to userspace
- Add support for SEV attestation command
- Miscellaneous cleanups
PPC:
- Support for second data watchpoint on POWER10
- Remove some complex workarounds for buggy early versions of POWER9
- Guest entry/exit fixes
ARM64:
- Make the nVHE EL2 object relocatable
- Cleanups for concurrent translation faults hitting the same page
- Support for the standard TRNG hypervisor call
- A bunch of small PMU/Debug fixes
- Simplification of the early init hypercall handling
Non-KVM changes (with acks):
- Detection of contended rwlocks (implemented only for qrwlocks,
because KVM only needs it for x86)
- Allow __DISABLE_EXPORTS from assembly code
- Provide a saner follow_pfn replacements for modules"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (192 commits)
KVM: x86/xen: Explicitly pad struct compat_vcpu_info to 64 bytes
KVM: selftests: Don't bother mapping GVA for Xen shinfo test
KVM: selftests: Fix hex vs. decimal snafu in Xen test
KVM: selftests: Fix size of memslots created by Xen tests
KVM: selftests: Ignore recently added Xen tests' build output
KVM: selftests: Add missing header file needed by xAPIC IPI tests
KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c
KVM: SVM: Make symbol 'svm_gp_erratum_intercept' static
locking/arch: Move qrwlock.h include after qspinlock.h
KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests
KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries
KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2
KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path
KVM: PPC: remove unneeded semicolon
KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB
KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest
KVM: PPC: Book3S HV: Fix radix guest SLB side channel
KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support
KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR
KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR
...
Diffstat (limited to 'arch/arm64')
31 files changed, 829 insertions, 224 deletions
diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h index daa1a1da539e..737ded6b6d0d 100644 --- a/arch/arm64/include/asm/hyp_image.h +++ b/arch/arm64/include/asm/hyp_image.h @@ -7,6 +7,9 @@ #ifndef __ARM64_HYP_IMAGE_H__ #define __ARM64_HYP_IMAGE_H__ +#define __HYP_CONCAT(a, b) a ## b +#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b) + /* * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, * to separate it from the kernel proper. @@ -21,9 +24,31 @@ */ #define HYP_SECTION_NAME(NAME) .hyp##NAME +/* Symbol defined at the beginning of each hyp section. */ +#define HYP_SECTION_SYMBOL_NAME(NAME) \ + HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME)) + +/* + * Helper to generate linker script statements starting a hyp section. + * + * A symbol with a well-known name is defined at the first byte. This + * is used as a base for hyp relocations (see gen-hyprel.c). It must + * be defined inside the section so the linker of `vmlinux` cannot + * separate it from the section data. + */ +#define BEGIN_HYP_SECTION(NAME) \ + HYP_SECTION_NAME(NAME) : { \ + HYP_SECTION_SYMBOL_NAME(NAME) = .; + +/* Helper to generate linker script statements ending a hyp section. */ +#define END_HYP_SECTION \ + } + /* Defines an ELF hyp section from input section @NAME and its subsections. */ -#define HYP_SECTION(NAME) \ - HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) } +#define HYP_SECTION(NAME) \ + BEGIN_HYP_SECTION(NAME) \ + *(NAME NAME##.*) \ + END_HYP_SECTION /* * Defines a linker script alias of a kernel-proper symbol referenced by diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7ccf770c53d9..22d933e9b59e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -199,26 +199,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -/* - * Obtain the PC-relative address of a kernel symbol - * s: symbol - * - * The goal of this macro is to return a symbol's address based on a - * PC-relative computation, as opposed to a loading the VA from a - * constant pool or something similar. This works well for HYP, as an - * absolute VA is guaranteed to be wrong. Only use this if trying to - * obtain the address of a symbol (i.e. not something you obtained by - * following a pointer). - */ -#define hyp_symbol_addr(s) \ - ({ \ - typeof(s) *addr; \ - asm("adrp %0, %1\n" \ - "add %0, %0, :lo12:%1\n" \ - : "=r" (addr) : "S" (&s)); \ - addr; \ - }) - #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ " .align 3\n" \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8fcfab0c2567..3d10e6527f7d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,7 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#define KVM_USER_MEM_SLOTS 512 #define KVM_HALT_POLL_NS_DEFAULT 500000 #include <kvm/arm_vgic.h> @@ -771,4 +770,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) +int kvm_trng_call(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e52d82aeadca..90873851f677 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -73,49 +73,39 @@ alternative_cb_end .endm /* - * Convert a kernel image address to a PA - * reg: kernel address to be converted in place + * Convert a hypervisor VA to a PA + * reg: hypervisor address to be converted in place * tmp: temporary register - * - * The actual code generation takes place in kvm_get_kimage_voffset, and - * the instructions below are only there to reserve the space and - * perform the register allocation (kvm_get_kimage_voffset uses the - * specific registers encoded in the instructions). */ -.macro kimg_pa reg, tmp -alternative_cb kvm_get_kimage_voffset - movz \tmp, #0 - movk \tmp, #0, lsl #16 - movk \tmp, #0, lsl #32 - movk \tmp, #0, lsl #48 -alternative_cb_end - - /* reg = __pa(reg) */ - sub \reg, \reg, \tmp +.macro hyp_pa reg, tmp + ldr_l \tmp, hyp_physvirt_offset + add \reg, \reg, \tmp .endm /* - * Convert a kernel image address to a hyp VA - * reg: kernel address to be converted in place + * Convert a hypervisor VA to a kernel image address + * reg: hypervisor address to be converted in place * tmp: temporary register * * The actual code generation takes place in kvm_get_kimage_voffset, and * the instructions below are only there to reserve the space and - * perform the register allocation (kvm_update_kimg_phys_offset uses the + * perform the register allocation (kvm_get_kimage_voffset uses the * specific registers encoded in the instructions). */ -.macro kimg_hyp_va reg, tmp -alternative_cb kvm_update_kimg_phys_offset +.macro hyp_kimg_va reg, tmp + /* Convert hyp VA -> PA. */ + hyp_pa \reg, \tmp + + /* Load kimage_voffset. */ +alternative_cb kvm_get_kimage_voffset movz \tmp, #0 movk \tmp, #0, lsl #16 movk \tmp, #0, lsl #32 movk \tmp, #0, lsl #48 alternative_cb_end - sub \reg, \reg, \tmp - mov_q \tmp, PAGE_OFFSET - orr \reg, \reg, \tmp - kern_hyp_va \reg + /* Convert PA -> kimg VA. */ + add \reg, \reg, \tmp .endm #else @@ -129,6 +119,7 @@ alternative_cb_end void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); +void kvm_apply_hyp_relocations(void); static __always_inline unsigned long __kern_hyp_va(unsigned long v) { @@ -144,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) -static __always_inline unsigned long __kimg_hyp_va(unsigned long v) -{ - unsigned long offset; - - asm volatile(ALTERNATIVE_CB("movz %0, #0\n" - "movk %0, #0, lsl #16\n" - "movk %0, #0, lsl #32\n" - "movk %0, #0, lsl #48\n", - kvm_update_kimg_phys_offset) - : "=r" (offset)); - - return __kern_hyp_va((v - offset) | PAGE_OFFSET); -} - -#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v)))) - -#define kimg_fn_ptr(x) (typeof(x) **)(x) - /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 52ab38db04c7..8886d43cfb11 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -157,6 +157,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * If device attributes are not explicitly requested in @prot, then the * mapping will be normal, cacheable. * + * Note that the update of a valid leaf PTE in this function will be aborted, + * if it's trying to recreate the exact same mapping or only change the access + * permissions. Instead, the vCPU will exit one more time from guest if still + * needed and then go through the path of relaxing permissions. + * * Note that this function will both coalesce existing table entries and split * existing block mappings, relying on page-faults to fault back areas outside * of the new mapping lazily. diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 8ff579361731..2f36b16a5b5d 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,7 +11,8 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; -extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[]; +extern char __hyp_rodata_start[], __hyp_rodata_end[]; +extern char __hyp_reloc_begin[], __hyp_reloc_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 9083d6992603..0525c0b089ed 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -5,8 +5,8 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H -#include <asm/qrwlock.h> #include <asm/qspinlock.h> +#include <asm/qrwlock.h> /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 767bb2d47be9..f9fbbb4734e8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -853,7 +853,10 @@ #define ID_DFR0_PERFMON_SHIFT 24 +#define ID_DFR0_PERFMON_8_0 0x3 #define ID_DFR0_PERFMON_8_1 0x4 +#define ID_DFR0_PERFMON_8_4 0x5 +#define ID_DFR0_PERFMON_8_5 0x6 #define ID_ISAR4_SWP_FRAC_SHIFT 28 #define ID_ISAR4_PSR_M_SHIFT 24 diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index f676243abac6..23f1a557bd9f 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,7 +64,6 @@ __efistub__ctype = _ctype; /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); -KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); /* Global kernel state accessed by nVHE hyp code. */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ad00f99ee9b0..357590beaabb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -434,8 +434,10 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) + if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) { kvm_compute_layout(); + kvm_apply_hyp_relocations(); + } } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 68f76a96c60b..7eea7888bb02 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -31,10 +31,11 @@ jiffies = jiffies_64; __stop___kvm_ex_table = .; #define HYPERVISOR_DATA_SECTIONS \ - HYP_SECTION_NAME(.data..ro_after_init) : { \ - __hyp_data_ro_after_init_start = .; \ + HYP_SECTION_NAME(.rodata) : { \ + __hyp_rodata_start = .; \ *(HYP_SECTION_NAME(.data..ro_after_init)) \ - __hyp_data_ro_after_init_end = .; \ + *(HYP_SECTION_NAME(.rodata)) \ + __hyp_rodata_end = .; \ } #define HYPERVISOR_PERCPU_SECTION \ @@ -42,10 +43,19 @@ jiffies = jiffies_64; HYP_SECTION_NAME(.data..percpu) : { \ *(HYP_SECTION_NAME(.data..percpu)) \ } + +#define HYPERVISOR_RELOC_SECTION \ + .hyp.reloc : ALIGN(4) { \ + __hyp_reloc_begin = .; \ + *(.hyp.reloc) \ + __hyp_reloc_end = .; \ + } + #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE #define HYPERVISOR_DATA_SECTIONS #define HYPERVISOR_PERCPU_SECTION +#define HYPERVISOR_RELOC_SECTION #endif #define HYPERVISOR_TEXT \ @@ -216,6 +226,8 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) HYPERVISOR_PERCPU_SECTION + HYPERVISOR_RELOC_SECTION + .rela.dyn : ALIGN(8) { *(.rela .rela*) } diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 13b017284bf9..589921392cb1 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - arch_timer.o \ + arch_timer.o trng.o\ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6804c5673312..fc4c95dd2d26 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1750,11 +1750,10 @@ static int init_hyp_mode(void) goto out_err; } - err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start), - kvm_ksym_ref(__hyp_data_ro_after_init_end), - PAGE_HYP_RO); + err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), + kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); if (err) { - kvm_err("Cannot map .hyp.data..ro_after_init section\n"); + kvm_err("Cannot map .hyp.rodata section\n"); goto out_err; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 84473574c2e7..54f4860cd87c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void) struct exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); - entry = hyp_symbol_addr(__start___kvm_ex_table); - end = hyp_symbol_addr(__stop___kvm_ex_table); + entry = &__start___kvm_ex_table; + end = &__stop___kvm_ex_table; while (entry < end) { addr = (unsigned long)&entry->insn + entry->insn; diff --git a/arch/arm64/kvm/hyp/nvhe/.gitignore b/arch/arm64/kvm/hyp/nvhe/.gitignore index 695d73d0249e..5b6c43cc96f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/.gitignore +++ b/arch/arm64/kvm/hyp/nvhe/.gitignore @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only +gen-hyprel hyp.lds +hyp-reloc.S diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 1f1e351c5fe2..a6707df4f6c0 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -3,8 +3,11 @@ # Makefile for Kernel-based Virtual Machine module, HYP/nVHE part # -asflags-y := -D__KVM_NVHE_HYPERVISOR__ -ccflags-y := -D__KVM_NVHE_HYPERVISOR__ +asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS +ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS + +hostprogs := gen-hyprel +HOST_EXTRACFLAGS += -I$(objtree)/include obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o @@ -19,7 +22,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y)) obj-y := kvm_nvhe.o -extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds +extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o # 1) Compile all source files to `.nvhe.o` object files. The file extension # avoids file name clashes for files shared with VHE. @@ -42,11 +45,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE $(call if_changed,ld) -# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'. +# 4) Generate list of hyp code/data positions that need to be relocated at +# runtime. Because the hypervisor is part of the kernel binary, relocations +# produce a kernel VA. We enumerate relocations targeting hyp at build time +# and convert the kernel VAs at those positions to hyp VAs. +$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel + $(call if_changed,hyprel) + +# 5) Compile hyp-reloc.S and link it into the existing partially linked object. +# The object file now contains a section with pointers to hyp positions that +# will contain kernel VAs at runtime. These pointers have relocations on them +# so that they get updated as the hyp object is linked into `vmlinux`. +LDFLAGS_kvm_nvhe.rel.o := -r +$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE + $(call if_changed,ld) + +# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'. # Prefixes names of ELF symbols with '__kvm_nvhe_'. -$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE +$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE $(call if_changed,hypcopy) +# The HYPREL command calls `gen-hyprel` to generate an assembly file with +# a list of relocations targeting hyp code/data. +quiet_cmd_hyprel = HYPREL $@ + cmd_hyprel = $(obj)/gen-hyprel $< > $@ + # The HYPCOPY command uses `objcopy` to prefix all ELF symbol names # to avoid clashes with VHE code/data. quiet_cmd_hypcopy = HYPCOPY $@ diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c new file mode 100644 index 000000000000..ead02c6a7628 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil <dbrazdil@google.com> + * + * Generates relocation information used by the kernel to convert + * absolute addresses in hyp data from kernel VAs to hyp VAs. + * + * This is necessary because hyp code is linked into the same binary + * as the kernel but executes under different memory mappings. + * If the compiler used absolute addressing, those addresses need to + * be converted before they are used by hyp code. + * + * The input of this program is the relocatable ELF object containing + * all hyp code/data, not yet linked into vmlinux. Hyp section names + * should have been prefixed with `.hyp` at this point. + * + * The output (printed to stdout) is an assembly file containing + * an array of 32-bit integers and static relocations that instruct + * the linker of `vmlinux` to populate the array entries with offsets + * to positions in the kernel binary containing VAs used by hyp code. + * + * Note that dynamic relocations could be used for the same purpose. + * However, those are only generated if CONFIG_RELOCATABLE=y. + */ + +#include <elf.h> +#include <endian.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <generated/autoconf.h> + +#define HYP_SECTION_PREFIX ".hyp" +#define HYP_RELOC_SECTION ".hyp.reloc" +#define HYP_SECTION_SYMBOL_PREFIX "__hyp_section_" + +/* + * AArch64 relocation type constants. + * Included in case these are not defined in the host toolchain. + */ +#ifndef R_AARCH64_ABS64 +#define R_AARCH64_ABS64 257 +#endif +#ifndef R_AARCH64_LD_PREL_LO19 +#define R_AARCH64_LD_PREL_LO19 273 +#endif +#ifndef R_AARCH64_ADR_PREL_LO21 +#define R_AARCH64_ADR_PREL_LO21 274 +#endif +#ifndef R_AARCH64_ADR_PREL_PG_HI21 +#define R_AARCH64_ADR_PREL_PG_HI21 275 +#endif +#ifndef R_AARCH64_ADR_PREL_PG_HI21_NC +#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 +#endif +#ifndef R_AARCH64_ADD_ABS_LO12_NC +#define R_AARCH64_ADD_ABS_LO12_NC 277 +#endif +#ifndef R_AARCH64_LDST8_ABS_LO12_NC +#define R_AARCH64_LDST8_ABS_LO12_NC 278 +#endif +#ifndef R_AARCH64_TSTBR14 +#define R_AARCH64_TSTBR14 279 +#endif +#ifndef R_AARCH64_CONDBR19 +#define R_AARCH64_CONDBR19 280 +#endif +#ifndef R_AARCH64_JUMP26 +#define R_AARCH64_JUMP26 282 +#endif +#ifndef R_AARCH64_CALL26 +#define R_AARCH64_CALL26 283 +#endif +#ifndef R_AARCH64_LDST16_ABS_LO12_NC +#define R_AARCH64_LDST16_ABS_LO12_NC 284 +#endif +#ifndef R_AARCH64_LDST32_ABS_LO12_NC +#define R_AARCH64_LDST32_ABS_LO12_NC 285 +#endif +#ifndef R_AARCH64_LDST64_ABS_LO12_NC +#define R_AARCH64_LDST64_ABS_LO12_NC 286 +#endif +#ifndef R_AARCH64_MOVW_PREL_G0 +#define R_AARCH64_MOVW_PREL_G0 287 +#endif +#ifndef R_AARCH64_MOVW_PREL_G0_NC +#define R_AARCH64_MOVW_PREL_G0_NC 288 +#endif +#ifndef R_AARCH64_MOVW_PREL_G1 +#define R_AARCH64_MOVW_PREL_G1 289 +#endif +#ifndef R_AARCH64_MOVW_PREL_G1_NC +#define R_AARCH64_MOVW_PREL_G1_NC 290 +#endif +#ifndef R_AARCH64_MOVW_PREL_G2 +#define R_AARCH64_MOVW_PREL_G2 291 +#endif +#ifndef R_AARCH64_MOVW_PREL_G2_NC +#define R_AARCH64_MOVW_PREL_G2_NC 292 +#endif +#ifndef R_AARCH64_MOVW_PREL_G3 +#define R_AARCH64_MOVW_PREL_G3 293 +#endif +#ifndef R_AARCH64_LDST128_ABS_LO12_NC +#define R_AARCH64_LDST128_ABS_LO12_NC 299 +#endif + +/* Global state of the processed ELF. */ +static struct { + const char *path; + char *begin; + size_t size; + Elf64_Ehdr *ehdr; + Elf64_Shdr *sh_table; + const char *sh_string; +} elf; + +#if defined(CONFIG_CPU_LITTLE_ENDIAN) + +#define elf16toh(x) le16toh(x) +#define elf32toh(x) le32toh(x) +#define elf64toh(x) le64toh(x) + +#define ELFENDIAN ELFDATA2LSB + +#elif defined(CONFIG_CPU_BIG_ENDIAN) + +#define elf16toh(x) be16toh(x) +#define elf32toh(x) be32toh(x) +#define elf64toh(x) be64toh(x) + +#define ELFENDIAN ELFDATA2MSB + +#else + +#error PDP-endian sadly unsupported... + +#endif + +#define fatal_error(fmt, ...) \ + ({ \ + fprintf(stderr, "error: %s: " fmt "\n", \ + elf.path, ## __VA_ARGS__); \ + exit(EXIT_FAILURE); \ + __builtin_unreachable(); \ + }) + +#define fatal_perror(msg) \ + ({ \ + fprintf(stderr, "error: %s: " msg ": %s\n", \ + elf.path, strerror(errno)); \ + exit(EXIT_FAILURE); \ + __builtin_unreachable(); \ + }) + +#define assert_op(lhs, rhs, fmt, op) \ + ({ \ + typeof(lhs) _lhs = (lhs); \ + typeof(rhs) _rhs = (rhs); \ + \ + if (!(_lhs op _rhs)) { \ + fatal_error("assertion " #lhs " " #op " " #rhs \ + " failed (lhs=" fmt ", rhs=" fmt \ + ", line=%d)", _lhs, _rhs, __LINE__); \ + } \ + }) + +#define assert_eq(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, ==) +#define assert_ne(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, !=) +#define assert_lt(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, <) +#define assert_ge(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, >=) + +/* + * Return a pointer of a given type at a given offset from + * the beginning of the ELF file. + */ +#define elf_ptr(type, off) ((type *)(elf.begin + (off))) + +/* Iterate over all sections in the ELF. */ +#define for_each_section(var) \ + for (var = elf.sh_table; var < elf.sh_table + elf16toh(elf.ehdr->e_shnum); ++var) + +/* Iterate over all Elf64_Rela relocations in a given section. */ +#define for_each_rela(shdr, var) \ + for (var = elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset)); \ + var < elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset) + elf64toh(shdr->sh_size)); var++) + +/* True if a string starts with a given prefix. */ +static inline bool starts_with(const char *str, const char *prefix) +{ + return memcmp(str, prefix, strlen(prefix)) == 0; +} + +/* Returns a string containing the name of a given section. */ +static inline const char *section_name(Elf64_Shdr *shdr) +{ + return elf.sh_string + elf32toh(shdr->sh_name); +} + +/* Returns a pointer to the first byte of section data. */ +static inline const char *section_begin(Elf64_Shdr *shdr) +{ + return elf_ptr(char, elf64toh(shdr->sh_offset)); +} + +/* Find a section by its offset from the beginning of the file. */ +static inline Elf64_Shdr *section_by_off(Elf64_Off off) +{ + assert_ne(off, 0UL, "%lu"); + return elf_ptr(Elf64_Shdr, off); +} + +/* Find a section by its index. */ +static inline Elf64_Shdr *section_by_idx(uint16_t idx) +{ + assert_ne(idx, SHN_UNDEF, "%u"); + return &elf.sh_table[idx]; +} + +/* + * Memory-map the given ELF file, perform sanity checks, and + * populate global state. + */ +static void init_elf(const char *path) +{ + int fd, ret; + struct stat stat; + + /* Store path in the global struct for error printing. */ + elf.path = path; + + /* Open the ELF file. */ + fd = open(path, O_RDONLY); + if (fd < 0) + fatal_perror("Could not open ELF file"); + + /* Get status of ELF file to obtain its size. */ + ret = fstat(fd, &stat); + if (ret < 0) { + close(fd); + fatal_perror("Could not get status of ELF file"); + } + + /* mmap() the entire ELF file read-only at an arbitrary address. */ + elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (elf.begin == MAP_FAILED) { + close(fd); + fatal_perror("Could not mmap ELF file"); + } + + /* mmap() was successful, close the FD. */ + close(fd); + + /* Get pointer to the ELF header. */ + assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu"); + elf.ehdr = elf_ptr(Elf64_Ehdr, 0); + + /* Check the ELF magic. */ + assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x"); + assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x"); + assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x"); + assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x"); + + /* Sanity check that this is an ELF64 relocatable object for AArch64. */ + assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u"); + assert_eq(elf.ehdr->e_ident[EI_DATA], ELFENDIAN, "%u"); + assert_eq(elf16toh(elf.ehdr->e_type), ET_REL, "%u"); + assert_eq(elf16toh(elf.ehdr->e_machine), EM_AARCH64, "%u"); + + /* Populate fields of the global struct. */ + elf.sh_table = section_by_off(elf64toh(elf.ehdr->e_shoff)); + elf.sh_string = section_begin(section_by_idx(elf16toh(elf.ehdr->e_shstrndx))); +} + +/* Print the prologue of the output ASM file. */ +static void emit_prologue(void) +{ + printf(".data\n" + ".pushsection " HYP_RELOC_SECTION ", \"a\"\n"); +} + +/* Print ASM statements needed as a prologue to a processed hyp section. */ +static void emit_section_prologue(const char *sh_orig_name) +{ + /* Declare the hyp section symbol. */ + printf(".global %s%s\n", HYP_SECTION_SYMBOL_PREFIX, sh_orig_name); +} + +/* + * Print ASM statements to create a hyp relocation entry for a given + * R_AARCH64_ABS64 relocation. + * + * The linker of vmlinux will populate the position given by `rela` with + * an absolute 64-bit kernel VA. If the kernel is relocatable, it will + * also generate a dynamic relocation entry so that the kernel can shift + * the address at runtime for KASLR. + * + * Emit a 32-bit offset from the current address to the position given + * by `rela`. This way the kernel can iterate over all kernel VAs used + * by hyp at runtime and convert them to hyp VAs. However, that offset + * will not be known until linking of `vmlinux`, so emit a PREL32 + * relocation referencing a symbol that the hyp linker script put at + * the beginning of the relocated section + the offset from `rela`. + */ +static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name) +{ + /* Offset of this reloc from the beginning of HYP_RELOC_SECTION. */ + static size_t reloc_offset; + + /* Create storage for the 32-bit offset. */ + printf(".word 0\n"); + + /* + * Create a PREL32 relocation which instructs the linker of `vmlinux` + * to insert offset to position <base> + <offset>, where <base> is + * a symbol at the beginning of the relocated section, and <offset> + * is `rela->r_offset`. + */ + printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n", + reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name, + elf64toh(rela->r_offset)); + + reloc_offset += 4; +} + +/* Print the epilogue of the output ASM file. */ +static void emit_epilogue(void) +{ + printf(".popsection\n"); +} + +/* + * Iterate over all RELA relocations in a given section and emit + * hyp relocation data for all absolute addresses in hyp code/data. + * + * Static relocations that generate PC-relative-addressing are ignored. + * Failure is reported for unexpected relocation types. + */ +static void emit_rela_section(Elf64_Shdr *sh_rela) +{ + Elf64_Shdr *sh_orig = &elf.sh_table[elf32toh(sh_rela->sh_info)]; + const char *sh_orig_name = section_name(sh_orig); + Elf64_Rela *rela; + + /* Skip all non-hyp sections. */ + if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX)) + return; + + emit_section_prologue(sh_orig_name); + + for_each_rela(sh_rela, rela) { + uint32_t type = (uint32_t)elf64toh(rela->r_info); + + /* Check that rela points inside the relocated section. */ + assert_lt(elf64toh(rela->r_offset), elf64toh(sh_orig->sh_size), "0x%lx"); + + switch (type) { + /* + * Data relocations to generate absolute addressing. + * Emit a hyp relocation. + */ + case R_AARCH64_ABS64: + emit_rela_abs64(rela, sh_orig_name); + break; + /* Allow relocations to generate PC-relative addressing. */ + case R_AARCH64_LD_PREL_LO19: + case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_ADR_PREL_PG_HI21: + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADD_ABS_LO12_NC: + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + break; + /* Allow relative relocations for control-flow instructions. */ + case R_AARCH64_TSTBR14: + case R_AARCH64_CONDBR19: + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + break; + /* Allow group relocations to create PC-relative offset inline. */ + case R_AARCH64_MOVW_PREL_G0: + case R_AARCH64_MOVW_PREL_G0_NC: + case R_AARCH64_MOVW_PREL_G1: + case R_AARCH64_MOVW_PREL_G1_NC: + case R_AARCH64_MOVW_PREL_G2: + case R_AARCH64_MOVW_PREL_G2_NC: + case R_AARCH64_MOVW_PREL_G3: + break; + default: + fatal_error("Unexpected RELA type %u", type); + } + } +} + +/* Iterate over all sections and emit hyp relocation data for RELA sections. */ +static void emit_all_relocs(void) +{ + Elf64_Shdr *shdr; + + for_each_section(shdr) { + switch (elf32toh(shdr->sh_type)) { + case SHT_REL: + fatal_error("Unexpected SHT_REL section \"%s\"", + section_name(shdr)); + case SHT_RELA: + emit_rela_section(shdr); + break; + } + } +} + +int main(int argc, const char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]); + return EXIT_FAILURE; + } + + init_elf(argv[1]); + + emit_prologue(); + emit_all_relocs(); + emit_epilogue(); + + return EXIT_SUCCESS; +} diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index a820dfdc9c25..6585a7cbbc56 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter) * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) - /* Load the format arguments into x1-7 */ - mov x6, x3 - get_vcpu_ptr x7, x3 - - mrs x3, esr_el2 - mrs x4, far_el2 - mrs x5, hpfar_el2 - /* Prepare and exit to the host's panic funciton. */ mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, lr ldr lr, =panic + hyp_kimg_va lr, x6 msr elr_el2, lr - /* - * Set the panic format string and enter the host, conditionally - * restoring the host context. - */ + /* Set the panic format string. Use the, now free, LR as scratch. */ + ldr lr, =__hyp_panic_string + hyp_kimg_va lr, x6 + + /* Load the format arguments into x1-7. */ + mov x6, x3 + get_vcpu_ptr x7, x3 + mrs x3, esr_el2 + mrs x4, far_el2 + mrs x5, hpfar_el2 + + /* Enter the host, conditionally restoring the host context. */ cmp x0, xzr - ldr x0, =__hyp_panic_string + mov x0, lr b.eq __host_enter_without_restoring b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) @@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic) * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc - kimg_pa x5, x6 + hyp_pa x5, x6 br x5 .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 882fd40d068e..c631e29fb001 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -18,7 +18,7 @@ #include <asm/virt.h> .text - .pushsection .hyp.idmap.text, "ax" + .pushsection .idmap.text, "ax" .align 11 @@ -57,17 +57,10 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - // We only actively check bits [24:31], and everything - // else has to be zero, which we check at build time. -#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) -#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value -#endif - - ror x0, x0, #24 - eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) - ror x0, x0, #4 - eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) - cbz x0, 1f + mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) + cmp x0, x3 + b.eq 1f + mov x0, #SMCCC_RET_NOT_SUPPORTED eret @@ -141,7 +134,6 @@ alternative_else_nop_endif /* Set the host vector */ ldr x0, =__kvm_hyp_host_vector - kimg_hyp_va x0, x1 msr vbar_el2, x0 ret @@ -200,7 +192,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) /* Leave idmap. */ mov x0, x29 ldr x1, =kvm_host_psci_cpu_entry - kimg_hyp_va x1, x2 br x1 SYM_CODE_END(__kvm_hyp_init_cpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a906f9e2ff34..f012f8665ecc 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -108,9 +108,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) typedef void (*hcall_t)(struct kvm_cpu_context *); -#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x) +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x -static const hcall_t *host_hcall[] = { +static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), @@ -130,7 +130,6 @@ static const hcall_t *host_hcall[] = { static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); - const hcall_t *kfn; hcall_t hfn; id -= KVM_HOST_SMCCC_ID(0); @@ -138,13 +137,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) if (unlikely(id >= ARRAY_SIZE(host_hcall))) goto inval; - kfn = host_hcall[id]; - if (unlikely(!kfn)) + hfn = host_hcall[id]; + if (unlikely(!hfn)) goto inval; cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; - - hfn = kimg_fn_hyp_va(kfn); hfn(host_ctxt); return; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 2997aa156d8e..879559057dee 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu) if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base)) hyp_panic(); - cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base); + cpu_base_array = (unsigned long *)&kvm_arm_hyp_percpu_base; this_cpu_base = kern_hyp_va(cpu_base_array[cpu]); - elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start); + elf_base = (unsigned long)&__per_cpu_start; return this_cpu_base - elf_base; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index 1206d0d754d5..cd119d82d8e3 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -12,14 +12,17 @@ #include <asm/memory.h> SECTIONS { + HYP_SECTION(.idmap.text) HYP_SECTION(.text) + HYP_SECTION(.data..ro_after_init) + HYP_SECTION(.rodata) + /* * .hyp..data..percpu needs to be page aligned to maintain the same * alignment for when linking into vmlinux. */ . = ALIGN(PAGE_SIZE); - HYP_SECTION_NAME(.data..percpu) : { + BEGIN_HYP_SECTION(.data..percpu) PERCPU_INPUT(L1_CACHE_BYTES) - } - HYP_SECTION(.data..ro_after_init) + END_HYP_SECTION } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 8e7128cb7667..63de71c0481e 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -128,8 +128,8 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) if (cpu_id == INVALID_CPU_ID) return PSCI_RET_INVALID_PARAMS; - boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id); - init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id); + boot_args = per_cpu_ptr(&cpu_on_args, cpu_id); + init_params = per_cpu_ptr(&kvm_init_params, cpu_id); /* Check if the target CPU is already being booted. */ if (!try_acquire_boot_args(boot_args)) @@ -140,7 +140,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) wmb(); ret = psci_call(func_id, mpidr, - __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)), + __hyp_pa(&kvm_hyp_cpu_entry), __hyp_pa(init_params)); /* If successful, the lock will be released by the target CPU. */ @@ -159,8 +159,8 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) struct psci_boot_args *boot_args; struct kvm_nvhe_init_params *init_params; - boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); - init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + boot_args = this_cpu_ptr(&suspend_args); + init_params = this_cpu_ptr(&kvm_init_params); /* * No need to acquire a lock before writing to boot_args because a core @@ -174,7 +174,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) * point if it is a deep sleep state. */ return psci_call(func_id, power_state, - __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(&kvm_hyp_cpu_resume), __hyp_pa(init_params)); } @@ -186,8 +186,8 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) struct psci_boot_args *boot_args; struct kvm_nvhe_init_params *init_params; - boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); - init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + boot_args = this_cpu_ptr(&suspend_args); + init_params = this_cpu_ptr(&kvm_init_params); /* * No need to acquire a lock before writing to boot_args because a core @@ -198,7 +198,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) /* Will only return on error. */ return psci_call(func_id, - __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(&kvm_hyp_cpu_resume), __hyp_pa(init_params), 0); } @@ -207,12 +207,12 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; if (is_cpu_on) - boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + boot_args = this_cpu_ptr(&cpu_on_args); else - boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + boot_args = this_cpu_ptr(&suspend_args); cpu_reg(host_ctxt, 0) = boot_args->r0; write_sysreg_el2(boot_args->pc, SYS_ELR); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index bdf8e55ed308..4d177ce1d536 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -45,6 +45,10 @@ #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) +#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_HI_S2_XN) + struct kvm_pgtable_walk_data { struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; @@ -170,10 +174,9 @@ static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp) smp_store_release(ptep, pte); } -static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, - u32 level) +static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) { - kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa); + kvm_pte_t pte = kvm_phys_to_pte(pa); u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE : KVM_PTE_TYPE_BLOCK; @@ -181,12 +184,7 @@ static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr, pte |= FIELD_PREP(KVM_PTE_TYPE, type); pte |= KVM_PTE_VALID; - /* Tolerate KVM recreating the exact same mapping. */ - if (kvm_pte_valid(old)) - return old == pte; - - smp_store_release(ptep, pte); - return true; + return pte; } static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, @@ -341,12 +339,17 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot, static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { + kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; - WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)); + /* Tolerate KVM recreating the exact same mapping */ + new = kvm_init_valid_leaf_pte(phys, data->attr, level); + if (old != new && !WARN_ON(kvm_pte_valid(old))) + smp_store_release(ptep, new); + data->phys += granule; return true; } @@ -461,34 +464,41 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot, return 0; } -static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - struct stage2_map_data *data) +static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + struct stage2_map_data *data) { + kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct page *page = virt_to_page(ptep); if (!kvm_block_mapping_supported(addr, end, phys, level)) - return false; - - /* - * If the PTE was already valid, drop the refcount on the table - * early, as it will be bumped-up again in stage2_map_walk_leaf(). - * This ensures that the refcount stays constant across a valid to - * valid PTE update. - */ - if (kvm_pte_valid(*ptep)) - put_page(virt_to_page(ptep)); - - if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) - goto out; + return -E2BIG; + + new = kvm_init_valid_leaf_pte(phys, data->attr, level); + if (kvm_pte_valid(old)) { + /* + * Skip updating the PTE if we are trying to recreate the exact + * same mapping or only change the access permissions. Instead, + * the vCPU will exit one more time from guest if still needed + * and then go through the path of relaxing permissions. + */ + if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS))) + return -EAGAIN; + + /* + * There's an existing different valid leaf entry, so perform + * break-before-make. + */ + kvm_set_invalid_pte(ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); + put_page(page); + } - /* There's an existing valid leaf entry, so perform break-before-make */ - kvm_set_invalid_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level); - kvm_set_valid_leaf_pte(ptep, phys, data->attr, level); -out: + smp_store_release(ptep, new); + get_page(page); data->phys += granule; - return true; + return 0; } static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, @@ -516,6 +526,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { + int ret; kvm_pte_t *childp, pte = *ptep; struct page *page = virt_to_page(ptep); @@ -526,8 +537,9 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return 0; } - if (stage2_map_walker_try_leaf(addr, end, level, ptep, data)) - goto out_get_page; + ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); + if (ret != -E2BIG) + return ret; if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -551,9 +563,8 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, } kvm_set_table_pte(ptep, childp); - -out_get_page: get_page(page); + return 0; } diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 8f0585640241..87a54375bd6e 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) } rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va; + addr = kvm_vgic_global_state.vcpu_hyp_va; addr += fault_ipa - vgic->vgic_cpu_base; if (kvm_vcpu_dabt_iswrite(vcpu)) { diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 25ea4ecb6449..ead21b98b620 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -71,6 +71,12 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) if (gpa != GPA_INVALID) val = gpa; break; + case ARM_SMCCC_TRNG_VERSION: + case ARM_SMCCC_TRNG_FEATURES: + case ARM_SMCCC_TRNG_GET_UUID: + case ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + return kvm_trng_call(vcpu); default: return kvm_psci_call(vcpu); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7d2257cc5438..77cb2d28f2a4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -879,11 +879,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (vma_pagesize == PAGE_SIZE && !force_pte) vma_pagesize = transparent_hugepage_adjust(memslot, hva, &pfn, &fault_ipa); - if (writable) { + if (writable) prot |= KVM_PGTABLE_PROT_W; - kvm_set_pfn_dirty(pfn); - mark_page_dirty(kvm, gfn); - } if (fault_status != FSC_PERM && !device) clean_dcache_guest_page(pfn, vma_pagesize); @@ -911,11 +908,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, memcache); } + /* Mark the page dirty only if the fault is handled successfully */ + if (writable && !ret) { + kvm_set_pfn_dirty(pfn); + mark_page_dirty(kvm, gfn); + } + out_unlock: spin_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); - return ret; + return ret != -EAGAIN ? ret : 0; } /* Resolve the access fault by making the page young again. */ diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 247422ac78a9..e9ec08b0b070 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -23,11 +23,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); static u32 kvm_pmu_event_mask(struct kvm *kvm) { switch (kvm->arch.pmuver) { - case 1: /* ARMv8.0 */ + case ID_AA64DFR0_PMUVER_8_0: return GENMASK(9, 0); - case 4: /* ARMv8.1 */ - case 5: /* ARMv8.4 */ - case 6: /* ARMv8.5 */ + case ID_AA64DFR0_PMUVER_8_1: + case ID_AA64DFR0_PMUVER_8_4: + case ID_AA64DFR0_PMUVER_8_5: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); @@ -795,6 +795,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) base = 0; } else { val = read_sysreg(pmceid1_el0); + /* + * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled + * as RAZ + */ + if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4) + val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); base = 32; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7c4f79532406..4f2f1e3145de 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -9,6 +9,7 @@ * Christoffer Dall <c.dall@virtualopensystems.com> */ +#include <linux/bitfield.h> #include <linux/bsearch.h> #include <linux/kvm_host.h> #include <linux/mm.h> @@ -700,14 +701,18 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 pmceid; + u64 pmceid, mask, shift; BUG_ON(p->is_write); if (pmu_access_el0_disabled(vcpu)) return false; + get_access_mask(r, &mask, &shift); + pmceid = kvm_pmu_get_pmceid(vcpu, (p->Op2 & 1)); + pmceid &= mask; + pmceid >>= shift; p->regval = pmceid; @@ -1021,6 +1026,8 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +#define FEATURE(x) (GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT)) + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) @@ -1028,36 +1035,41 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, u32 id = reg_to_encoding(r); u64 val = raz ? 0 : read_sanitised_ftr_reg(id); - if (id == SYS_ID_AA64PFR0_EL1) { + switch (id) { + case SYS_ID_AA64PFR0_EL1: if (!vcpu_has_sve(vcpu)) - val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); - val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); - val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); - val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); - val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT); - val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT); - } else if (id == SYS_ID_AA64PFR1_EL1) { - val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); - } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { - val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) | - (0xfUL << ID_AA64ISAR1_API_SHIFT) | - (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | - (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); - } else if (id == SYS_ID_AA64DFR0_EL1) { - u64 cap = 0; - - /* Limit guests to PMUv3 for ARMv8.1 */ - if (kvm_vcpu_has_pmu(vcpu)) - cap = ID_AA64DFR0_PMUVER_8_1; - + val &= ~FEATURE(ID_AA64PFR0_SVE); + val &= ~FEATURE(ID_AA64PFR0_AMU); + val &= ~FEATURE(ID_AA64PFR0_CSV2); + val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); + val &= ~FEATURE(ID_AA64PFR0_CSV3); + val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + break; + case SYS_ID_AA64PFR1_EL1: + val &= ~FEATURE(ID_AA64PFR1_MTE); + break; + case SYS_ID_AA64ISAR1_EL1: + if (!vcpu_has_ptrauth(vcpu)) + val &= ~(FEATURE(ID_AA64ISAR1_APA) | + FEATURE(ID_AA64ISAR1_API) | + FEATURE(ID_AA64ISAR1_GPA) | + FEATURE(ID_AA64ISAR1_GPI)); + break; + case SYS_ID_AA64DFR0_EL1: + /* Limit debug to ARMv8.0 */ + val &= ~FEATURE(ID_AA64DFR0_DEBUGVER); + val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6); + /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_PMUVER_SHIFT, - cap); - } else if (id == SYS_ID_DFR0_EL1) { - /* Limit guests to PMUv3 for ARMv8.1 */ + ID_AA64DFR0_PMUVER_SHIFT, + kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0); + break; + case SYS_ID_DFR0_EL1: + /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_PERFMON_SHIFT, - ID_DFR0_PERFMON_8_1); + ID_DFR0_PERFMON_SHIFT, + kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0); + break; } return val; @@ -1493,6 +1505,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { .access = access_pminten, .reg = PMINTENSET_EL1 }, { PMU_SYS_REG(SYS_PMINTENCLR_EL1), .access = access_pminten, .reg = PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1720,7 +1733,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, }; -static bool trap_dbgidr(struct kvm_vcpu *vcpu, +static bool trap_dbgdidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { @@ -1734,7 +1747,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) - | (6 << 16) | (el3 << 14) | (el3 << 12)); + | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12)); return true; } } @@ -1767,8 +1780,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, * guest. Revisit this one day, would this principle change. */ static const struct sys_reg_desc cp14_regs[] = { - /* DBGIDR */ - { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr }, + /* DBGDIDR */ + { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgdidr }, /* DBGDTRRXext */ { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi }, @@ -1918,8 +1931,8 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs }, { Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc }, { Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr }, - { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid }, - { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid }, + { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid }, + { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid }, { Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper }, { Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr }, @@ -1927,6 +1940,10 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, + { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid }, + { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid }, + /* PMMIR */ + { Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi }, /* PRRR/MAIR0 */ { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, diff --git a/arch/arm64/kvm/trng.c b/arch/arm64/kvm/trng.c new file mode 100644 index 000000000000..99bdd7103c9c --- /dev/null +++ b/arch/arm64/kvm/trng.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> + +#define ARM_SMCCC_TRNG_VERSION_1_0 0x10000UL + +/* Those values are deliberately separate from the generic SMCCC definitions. */ +#define TRNG_SUCCESS 0UL +#define TRNG_NOT_SUPPORTED ((unsigned long)-1) +#define TRNG_INVALID_PARAMETER ((unsigned long)-2) +#define TRNG_NO_ENTROPY ((unsigned long)-3) + +#define TRNG_MAX_BITS64 192 + +static const uuid_t arm_smc_trng_uuid __aligned(4) = UUID_INIT( + 0x0d21e000, 0x4384, 0x11eb, 0x80, 0x70, 0x52, 0x44, 0x55, 0x4e, 0x5a, 0x4c); + +static int kvm_trng_do_rnd(struct kvm_vcpu *vcpu, int size) +{ + DECLARE_BITMAP(bits, TRNG_MAX_BITS64); + u32 num_bits = smccc_get_arg1(vcpu); + int i; + + if (num_bits > 3 * size) { + smccc_set_retval(vcpu, TRNG_INVALID_PARAMETER, 0, 0, 0); + return 1; + } + + /* get as many bits as we need to fulfil the request */ + for (i = 0; i < DIV_ROUND_UP(num_bits, BITS_PER_LONG); i++) + bits[i] = get_random_long(); + + bitmap_clear(bits, num_bits, TRNG_MAX_BITS64 - num_bits); + + if (size == 32) + smccc_set_retval(vcpu, TRNG_SUCCESS, lower_32_bits(bits[1]), + upper_32_bits(bits[0]), lower_32_bits(bits[0])); + else + smccc_set_retval(vcpu, TRNG_SUCCESS, bits[2], bits[1], bits[0]); + + memzero_explicit(bits, sizeof(bits)); + return 1; +} + +int kvm_trng_call(struct kvm_vcpu *vcpu) +{ + const __le32 *u = (__le32 *)arm_smc_trng_uuid.b; + u32 func_id = smccc_get_function(vcpu); + unsigned long val = TRNG_NOT_SUPPORTED; + int size = 64; + + switch (func_id) { + case ARM_SMCCC_TRNG_VERSION: + val = ARM_SMCCC_TRNG_VERSION_1_0; + break; + case ARM_SMCCC_TRNG_FEATURES: + switch (smccc_get_arg1(vcpu)) { + case ARM_SMCCC_TRNG_VERSION: + case ARM_SMCCC_TRNG_FEATURES: + case ARM_SMCCC_TRNG_GET_UUID: + case ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + val = TRNG_SUCCESS; + } + break; + case ARM_SMCCC_TRNG_GET_UUID: + smccc_set_retval(vcpu, le32_to_cpu(u[0]), le32_to_cpu(u[1]), + le32_to_cpu(u[2]), le32_to_cpu(u[3])); + return 1; + case ARM_SMCCC_TRNG_RND32: + size = 32; + fallthrough; + case ARM_SMCCC_TRNG_RND64: + return kvm_trng_do_rnd(vcpu, size); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 70fcd6a12fe1..978301392d67 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -81,6 +81,34 @@ __init void kvm_compute_layout(void) init_hyp_physvirt_offset(); } +/* + * The .hyp.reloc ELF section contains a list of kimg positions that + * contains kimg VAs but will be accessed only in hyp execution context. + * Convert them to hyp VAs. See gen-hyprel.c for more details. + */ +__init void kvm_apply_hyp_relocations(void) +{ + int32_t *rel; + int32_t *begin = (int32_t *)__hyp_reloc_begin; + int32_t *end = (int32_t *)__hyp_reloc_end; + + for (rel = begin; rel < end; ++rel) { + uintptr_t *ptr, kimg_va; + + /* + * Each entry contains a 32-bit relative offset from itself + * to a kimg VA position. + */ + ptr = (uintptr_t *)lm_alias((char *)rel + *rel); + + /* Read the kimg VA value at the relocation address. */ + kimg_va = *ptr; + + /* Convert to hyp VA and store back to the relocation address. */ + *ptr = __early_kern_hyp_va((uintptr_t)lm_alias(kimg_va)); + } +} + static u32 compute_instruction(int n, u32 rd, u32 rn) { u32 insn = AARCH64_BREAK_FAULT; @@ -255,12 +283,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst *updptr++ = cpu_to_le32(insn); } -void kvm_update_kimg_phys_offset(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, int nr_inst) -{ - generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); -} - void kvm_get_kimage_voffset(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { |